1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpiaij.c,v 1.304 1999/10/13 20:37:20 bsmith Exp bsmith $"; 3 #endif 4 5 #include "src/mat/impls/aij/mpi/mpiaij.h" 6 #include "src/vec/vecimpl.h" 7 #include "src/inline/spops.h" 8 9 extern int MatSetUpMultiply_MPIAIJ(Mat); 10 extern int DisAssemble_MPIAIJ(Mat); 11 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 12 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 13 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 14 extern int MatPrintHelp_SeqAIJ(Mat); 15 16 /* 17 Local utility routine that creates a mapping from the global column 18 number to the local number in the off-diagonal part of the local 19 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 20 a slightly higher hash table cost; without it it is not scalable (each processor 21 has an order N integer array but is fast to acess. 22 */ 23 #undef __FUNC__ 24 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 25 int CreateColmap_MPIAIJ_Private(Mat mat) 26 { 27 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 28 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 29 int n = B->n,i,ierr; 30 31 PetscFunctionBegin; 32 #if defined (PETSC_USE_CTABLE) 33 ierr = PetscTableCreate(aij->n/5,&aij->colmap);CHKERRQ(ierr); 34 for ( i=0; i<n; i++ ){ 35 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 36 } 37 #else 38 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 39 PLogObjectMemory(mat,aij->N*sizeof(int)); 40 ierr = PetscMemzero(aij->colmap,aij->N*sizeof(int));CHKERRQ(ierr); 41 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 42 #endif 43 PetscFunctionReturn(0); 44 } 45 46 #define CHUNKSIZE 15 47 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 48 { \ 49 \ 50 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 51 rmax = aimax[row]; nrow = ailen[row]; \ 52 col1 = col - shift; \ 53 \ 54 low = 0; high = nrow; \ 55 while (high-low > 5) { \ 56 t = (low+high)/2; \ 57 if (rp[t] > col) high = t; \ 58 else low = t; \ 59 } \ 60 for ( _i=0; _i<nrow; _i++ ) { \ 61 if (rp[_i] > col1) break; \ 62 if (rp[_i] == col1) { \ 63 if (addv == ADD_VALUES) ap[_i] += value; \ 64 else ap[_i] = value; \ 65 goto a_noinsert; \ 66 } \ 67 } \ 68 if (nonew == 1) goto a_noinsert; \ 69 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 70 if (nrow >= rmax) { \ 71 /* there is no extra room in row, therefore enlarge */ \ 72 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 73 Scalar *new_a; \ 74 \ 75 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 76 \ 77 /* malloc new storage space */ \ 78 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 79 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 80 new_j = (int *) (new_a + new_nz); \ 81 new_i = new_j + new_nz; \ 82 \ 83 /* copy over old data into new slots */ \ 84 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 85 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 86 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 87 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 88 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 89 len*sizeof(int));CHKERRQ(ierr); \ 90 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 91 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 92 len*sizeof(Scalar));CHKERRQ(ierr); \ 93 /* free up old matrix storage */ \ 94 \ 95 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 96 if (!a->singlemalloc) { \ 97 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 98 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 99 } \ 100 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 101 a->singlemalloc = 1; \ 102 \ 103 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 104 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 105 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 106 a->maxnz += CHUNKSIZE; \ 107 a->reallocs++; \ 108 } \ 109 N = nrow++ - 1; a->nz++; \ 110 /* shift up all the later entries in this row */ \ 111 for ( ii=N; ii>=_i; ii-- ) { \ 112 rp[ii+1] = rp[ii]; \ 113 ap[ii+1] = ap[ii]; \ 114 } \ 115 rp[_i] = col1; \ 116 ap[_i] = value; \ 117 a_noinsert: ; \ 118 ailen[row] = nrow; \ 119 } 120 121 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 122 { \ 123 \ 124 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 125 rmax = bimax[row]; nrow = bilen[row]; \ 126 col1 = col - shift; \ 127 \ 128 low = 0; high = nrow; \ 129 while (high-low > 5) { \ 130 t = (low+high)/2; \ 131 if (rp[t] > col) high = t; \ 132 else low = t; \ 133 } \ 134 for ( _i=0; _i<nrow; _i++ ) { \ 135 if (rp[_i] > col1) break; \ 136 if (rp[_i] == col1) { \ 137 if (addv == ADD_VALUES) ap[_i] += value; \ 138 else ap[_i] = value; \ 139 goto b_noinsert; \ 140 } \ 141 } \ 142 if (nonew == 1) goto b_noinsert; \ 143 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 144 if (nrow >= rmax) { \ 145 /* there is no extra room in row, therefore enlarge */ \ 146 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 147 Scalar *new_a; \ 148 \ 149 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 150 \ 151 /* malloc new storage space */ \ 152 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 153 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 154 new_j = (int *) (new_a + new_nz); \ 155 new_i = new_j + new_nz; \ 156 \ 157 /* copy over old data into new slots */ \ 158 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 159 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 160 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 161 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 162 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 163 len*sizeof(int));CHKERRQ(ierr); \ 164 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 165 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 166 len*sizeof(Scalar));CHKERRQ(ierr); \ 167 /* free up old matrix storage */ \ 168 \ 169 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 170 if (!b->singlemalloc) { \ 171 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 172 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 173 } \ 174 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 175 b->singlemalloc = 1; \ 176 \ 177 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 178 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 179 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 180 b->maxnz += CHUNKSIZE; \ 181 b->reallocs++; \ 182 } \ 183 N = nrow++ - 1; b->nz++; \ 184 /* shift up all the later entries in this row */ \ 185 for ( ii=N; ii>=_i; ii-- ) { \ 186 rp[ii+1] = rp[ii]; \ 187 ap[ii+1] = ap[ii]; \ 188 } \ 189 rp[_i] = col1; \ 190 ap[_i] = value; \ 191 b_noinsert: ; \ 192 bilen[row] = nrow; \ 193 } 194 195 #undef __FUNC__ 196 #define __FUNC__ "MatSetValues_MPIAIJ" 197 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 198 { 199 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 200 Scalar value; 201 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 202 int cstart = aij->cstart, cend = aij->cend,row,col; 203 int roworiented = aij->roworiented; 204 205 /* Some Variables required in the macro */ 206 Mat A = aij->A; 207 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 208 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 209 Scalar *aa = a->a; 210 211 Mat B = aij->B; 212 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 213 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 214 Scalar *ba = b->a; 215 216 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 217 int nonew = a->nonew,shift = a->indexshift; 218 Scalar *ap; 219 220 PetscFunctionBegin; 221 for ( i=0; i<m; i++ ) { 222 if (im[i] < 0) continue; 223 #if defined(PETSC_USE_BOPT_g) 224 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 225 #endif 226 if (im[i] >= rstart && im[i] < rend) { 227 row = im[i] - rstart; 228 for ( j=0; j<n; j++ ) { 229 if (in[j] >= cstart && in[j] < cend){ 230 col = in[j] - cstart; 231 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 232 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 233 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 234 } 235 else if (in[j] < 0) continue; 236 #if defined(PETSC_USE_BOPT_g) 237 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 238 #endif 239 else { 240 if (mat->was_assembled) { 241 if (!aij->colmap) { 242 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 243 } 244 #if defined (PETSC_USE_CTABLE) 245 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 246 col--; 247 #else 248 col = aij->colmap[in[j]] - 1; 249 #endif 250 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 251 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 252 col = in[j]; 253 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 254 B = aij->B; 255 b = (Mat_SeqAIJ *) B->data; 256 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 257 ba = b->a; 258 } 259 } else col = in[j]; 260 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 261 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 262 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 263 } 264 } 265 } else { 266 if (!aij->donotstash) { 267 if (roworiented) { 268 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 269 } else { 270 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 271 } 272 } 273 } 274 } 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNC__ 279 #define __FUNC__ "MatGetValues_MPIAIJ" 280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 283 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 284 int cstart = aij->cstart, cend = aij->cend,row,col; 285 286 PetscFunctionBegin; 287 for ( i=0; i<m; i++ ) { 288 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 289 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 290 if (idxm[i] >= rstart && idxm[i] < rend) { 291 row = idxm[i] - rstart; 292 for ( j=0; j<n; j++ ) { 293 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 294 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 295 if (idxn[j] >= cstart && idxn[j] < cend){ 296 col = idxn[j] - cstart; 297 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 298 } else { 299 if (!aij->colmap) { 300 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 #if defined (PETSC_USE_CTABLE) 303 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 304 col --; 305 #else 306 col = aij->colmap[idxn[j]] - 1; 307 #endif 308 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 309 else { 310 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 311 } 312 } 313 } 314 } else { 315 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 316 } 317 } 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNC__ 322 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 324 { 325 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 326 int ierr,nstash,reallocs; 327 InsertMode addv; 328 329 PetscFunctionBegin; 330 if (aij->donotstash) { 331 PetscFunctionReturn(0); 332 } 333 334 /* make sure all processors are either in INSERTMODE or ADDMODE */ 335 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 336 if (addv == (ADD_VALUES|INSERT_VALUES)) { 337 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 338 } 339 mat->insertmode = addv; /* in case this processor had no cache */ 340 341 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 342 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 343 PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 344 PetscFunctionReturn(0); 345 } 346 347 348 #undef __FUNC__ 349 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 351 { 352 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 353 int i,j,rstart,ncols,n,ierr,flg; 354 int *row,*col,other_disassembled; 355 Scalar *val; 356 InsertMode addv = mat->insertmode; 357 358 PetscFunctionBegin; 359 if (!aij->donotstash) { 360 while (1) { 361 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 362 if (!flg) break; 363 364 for ( i=0; i<n; ) { 365 /* Now identify the consecutive vals belonging to the same row */ 366 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 367 if (j < n) ncols = j-i; 368 else ncols = n-i; 369 /* Now assemble all these values with a single function call */ 370 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 371 i = j; 372 } 373 } 374 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 375 } 376 377 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 378 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 379 380 /* determine if any processor has disassembled, if so we must 381 also disassemble ourselfs, in order that we may reassemble. */ 382 /* 383 if nonzero structure of submatrix B cannot change then we know that 384 no processor disassembled thus we can skip this stuff 385 */ 386 if (!((Mat_SeqAIJ*) aij->B->data)->nonew) { 387 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 388 if (mat->was_assembled && !other_disassembled) { 389 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 390 } 391 } 392 393 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 394 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 395 } 396 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 398 399 if (aij->rowvalues) { 400 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 401 aij->rowvalues = 0; 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNC__ 407 #define __FUNC__ "MatZeroEntries_MPIAIJ" 408 int MatZeroEntries_MPIAIJ(Mat A) 409 { 410 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 411 int ierr; 412 413 PetscFunctionBegin; 414 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 415 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 416 PetscFunctionReturn(0); 417 } 418 419 #undef __FUNC__ 420 #define __FUNC__ "MatZeroRows_MPIAIJ" 421 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 422 { 423 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 424 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 425 int *procs,*nprocs,j,found,idx,nsends,*work,row; 426 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 427 int *rvalues,tag = A->tag,count,base,slen,n,*source; 428 int *lens,imdex,*lrows,*values,rstart=l->rstart; 429 MPI_Comm comm = A->comm; 430 MPI_Request *send_waits,*recv_waits; 431 MPI_Status recv_status,*send_status; 432 IS istmp; 433 434 PetscFunctionBegin; 435 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 436 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 437 438 /* first count number of contributors to each processor */ 439 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 440 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 441 procs = nprocs + size; 442 owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 443 for ( i=0; i<N; i++ ) { 444 idx = rows[i]; 445 found = 0; 446 for ( j=0; j<size; j++ ) { 447 if (idx >= owners[j] && idx < owners[j+1]) { 448 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 449 } 450 } 451 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 452 } 453 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 454 455 /* inform other processors of number of messages and max length*/ 456 work = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(work); 457 ierr = MPI_Allreduce( nprocs, work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 458 nrecvs = work[size+rank]; 459 nmax = work[rank]; 460 ierr = PetscFree(work);CHKERRQ(ierr); 461 462 /* post receives: */ 463 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 464 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 465 for ( i=0; i<nrecvs; i++ ) { 466 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 467 } 468 469 /* do sends: 470 1) starts[i] gives the starting index in svalues for stuff going to 471 the ith processor 472 */ 473 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 474 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 475 starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 476 starts[0] = 0; 477 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 478 for ( i=0; i<N; i++ ) { 479 svalues[starts[owner[i]]++] = rows[i]; 480 } 481 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 482 483 starts[0] = 0; 484 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 485 count = 0; 486 for ( i=0; i<size; i++ ) { 487 if (procs[i]) { 488 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 489 } 490 } 491 ierr = PetscFree(starts);CHKERRQ(ierr); 492 493 base = owners[rank]; 494 495 /* wait on receives */ 496 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 497 source = lens + nrecvs; 498 count = nrecvs; slen = 0; 499 while (count) { 500 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 501 /* unpack receives into our local space */ 502 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 503 source[imdex] = recv_status.MPI_SOURCE; 504 lens[imdex] = n; 505 slen += n; 506 count--; 507 } 508 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 509 510 /* move the data into the send scatter */ 511 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 512 count = 0; 513 for ( i=0; i<nrecvs; i++ ) { 514 values = rvalues + i*nmax; 515 for ( j=0; j<lens[i]; j++ ) { 516 lrows[count++] = values[j] - base; 517 } 518 } 519 ierr = PetscFree(rvalues);CHKERRQ(ierr); 520 ierr = PetscFree(lens);CHKERRQ(ierr); 521 ierr = PetscFree(owner);CHKERRQ(ierr); 522 ierr = PetscFree(nprocs);CHKERRQ(ierr); 523 524 /* actually zap the local rows */ 525 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 526 PLogObjectParent(A,istmp); 527 528 /* 529 Zero the required rows. If the "diagonal block" of the matrix 530 is square and the user wishes to set the diagonal we use seperate 531 code so that MatSetValues() is not called for each diagonal allocating 532 new memory, thus calling lots of mallocs and slowing things down. 533 534 Contributed by: Mathew Knepley 535 */ 536 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 537 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 538 if (diag && (l->A->M == l->A->N)) { 539 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 540 } else if (diag) { 541 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 542 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 543 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 544 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 545 } 546 for ( i = 0; i < slen; i++ ) { 547 row = lrows[i] + rstart; 548 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 549 } 550 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 551 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 552 } else { 553 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 554 } 555 ierr = ISDestroy(istmp);CHKERRQ(ierr); 556 ierr = PetscFree(lrows);CHKERRQ(ierr); 557 558 /* wait on sends */ 559 if (nsends) { 560 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 561 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 562 ierr = PetscFree(send_status);CHKERRQ(ierr); 563 } 564 ierr = PetscFree(send_waits);CHKERRQ(ierr); 565 ierr = PetscFree(svalues);CHKERRQ(ierr); 566 567 PetscFunctionReturn(0); 568 } 569 570 #undef __FUNC__ 571 #define __FUNC__ "MatMult_MPIAIJ" 572 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 573 { 574 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 575 int ierr,nt; 576 577 PetscFunctionBegin; 578 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 579 if (nt != a->n) { 580 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 581 } 582 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 583 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 584 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 585 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNC__ 590 #define __FUNC__ "MatMultAdd_MPIAIJ" 591 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 592 { 593 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 594 int ierr; 595 596 PetscFunctionBegin; 597 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 598 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 599 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 600 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 601 PetscFunctionReturn(0); 602 } 603 604 #undef __FUNC__ 605 #define __FUNC__ "MatMultTrans_MPIAIJ" 606 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 607 { 608 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 609 int ierr; 610 611 PetscFunctionBegin; 612 /* do nondiagonal part */ 613 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 614 /* send it on its way */ 615 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 616 /* do local part */ 617 ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr); 618 /* receive remote parts: note this assumes the values are not actually */ 619 /* inserted in yy until the next line, which is true for my implementation*/ 620 /* but is not perhaps always true. */ 621 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 622 PetscFunctionReturn(0); 623 } 624 625 #undef __FUNC__ 626 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 627 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 628 { 629 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 630 int ierr; 631 632 PetscFunctionBegin; 633 /* do nondiagonal part */ 634 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 635 /* send it on its way */ 636 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 637 /* do local part */ 638 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 639 /* receive remote parts: note this assumes the values are not actually */ 640 /* inserted in yy until the next line, which is true for my implementation*/ 641 /* but is not perhaps always true. */ 642 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 643 PetscFunctionReturn(0); 644 } 645 646 /* 647 This only works correctly for square matrices where the subblock A->A is the 648 diagonal block 649 */ 650 #undef __FUNC__ 651 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 652 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 653 { 654 int ierr; 655 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 656 657 PetscFunctionBegin; 658 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 659 if (a->rstart != a->cstart || a->rend != a->cend) { 660 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 661 } 662 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNC__ 667 #define __FUNC__ "MatScale_MPIAIJ" 668 int MatScale_MPIAIJ(Scalar *aa,Mat A) 669 { 670 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 671 int ierr; 672 673 PetscFunctionBegin; 674 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 675 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 676 PetscFunctionReturn(0); 677 } 678 679 #undef __FUNC__ 680 #define __FUNC__ "MatDestroy_MPIAIJ" 681 int MatDestroy_MPIAIJ(Mat mat) 682 { 683 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 684 int ierr; 685 686 PetscFunctionBegin; 687 688 if (mat->mapping) { 689 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 690 } 691 if (mat->bmapping) { 692 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 693 } 694 if (mat->rmap) { 695 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 696 } 697 if (mat->cmap) { 698 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 699 } 700 #if defined(PETSC_USE_LOG) 701 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 702 #endif 703 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 704 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 705 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 706 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 707 #if defined (PETSC_USE_CTABLE) 708 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 709 #else 710 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 711 #endif 712 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 713 if (aij->lvec) VecDestroy(aij->lvec); 714 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 715 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 716 ierr = PetscFree(aij);CHKERRQ(ierr); 717 PLogObjectDestroy(mat); 718 PetscHeaderDestroy(mat); 719 PetscFunctionReturn(0); 720 } 721 722 #undef __FUNC__ 723 #define __FUNC__ "MatView_MPIAIJ_Binary" 724 int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 725 { 726 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 727 int ierr; 728 729 PetscFunctionBegin; 730 if (aij->size == 1) { 731 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 732 } 733 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 734 PetscFunctionReturn(0); 735 } 736 737 #undef __FUNC__ 738 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket" 739 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 740 { 741 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 742 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 743 int ierr, format,shift = C->indexshift,rank = aij->rank,size = aij->size; 744 PetscTruth isdraw,isascii; 745 746 PetscFunctionBegin; 747 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 748 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 749 if (isascii) { 750 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 751 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 752 MatInfo info; 753 int flg; 754 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 755 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 756 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 757 if (flg) { 758 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 759 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 760 } else { 761 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 762 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 763 } 764 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 765 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 766 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 767 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 768 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 769 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 770 PetscFunctionReturn(0); 771 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 772 PetscFunctionReturn(0); 773 } 774 } else if (isdraw) { 775 Draw draw; 776 PetscTruth isnull; 777 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 778 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 779 } 780 781 if (size == 1) { 782 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 783 } else { 784 /* assemble the entire matrix onto first processor. */ 785 Mat A; 786 Mat_SeqAIJ *Aloc; 787 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 788 Scalar *a; 789 790 if (!rank) { 791 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 792 } else { 793 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 794 } 795 PLogObjectParent(mat,A); 796 797 /* copy over the A part */ 798 Aloc = (Mat_SeqAIJ*) aij->A->data; 799 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 800 row = aij->rstart; 801 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 802 for ( i=0; i<m; i++ ) { 803 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 804 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 805 } 806 aj = Aloc->j; 807 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 808 809 /* copy over the B part */ 810 Aloc = (Mat_SeqAIJ*) aij->B->data; 811 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 812 row = aij->rstart; 813 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) );CHKPTRQ(cols); 814 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 815 for ( i=0; i<m; i++ ) { 816 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 817 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 818 } 819 ierr = PetscFree(ct);CHKERRQ(ierr); 820 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 821 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 822 /* 823 Everyone has to call to draw the matrix since the graphics waits are 824 synchronized across all processors that share the Draw object 825 */ 826 if (!rank) { 827 Viewer sviewer; 828 ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 829 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 830 ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 831 } 832 ierr = MatDestroy(A);CHKERRQ(ierr); 833 } 834 PetscFunctionReturn(0); 835 } 836 837 #undef __FUNC__ 838 #define __FUNC__ "MatView_MPIAIJ" 839 int MatView_MPIAIJ(Mat mat,Viewer viewer) 840 { 841 int ierr; 842 PetscTruth isascii,isdraw,issocket,isbinary; 843 844 PetscFunctionBegin; 845 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 846 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 847 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 848 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 849 if (isascii || isdraw || isbinary || issocket) { 850 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 851 } else { 852 SETERRQ1(1,1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 853 } 854 PetscFunctionReturn(0); 855 } 856 857 /* 858 This has to provide several versions. 859 860 2) a) use only local smoothing updating outer values only once. 861 b) local smoothing updating outer values each inner iteration 862 3) color updating out values betwen colors. 863 */ 864 #undef __FUNC__ 865 #define __FUNC__ "MatRelax_MPIAIJ" 866 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 867 double fshift,int its,Vec xx) 868 { 869 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 870 Mat AA = mat->A, BB = mat->B; 871 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 872 Scalar *b,*x,*xs,*ls,d,*v,sum; 873 int ierr,*idx, *diag; 874 int n = mat->n, m = mat->m, i,shift = A->indexshift; 875 876 PetscFunctionBegin; 877 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA);CHKERRQ(ierr);} 878 diag = A->diag; 879 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 880 if (flag & SOR_ZERO_INITIAL_GUESS) { 881 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 882 PetscFunctionReturn(0); 883 } 884 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 885 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 886 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 887 if (xx != bb) { 888 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 889 } else { 890 b = x; 891 } 892 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 893 xs = x + shift; /* shift by one for index start of 1 */ 894 ls = ls + shift; 895 while (its--) { 896 /* go down through the rows */ 897 for ( i=0; i<m; i++ ) { 898 n = A->i[i+1] - A->i[i]; 899 PLogFlops(4*n+3); 900 idx = A->j + A->i[i] + shift; 901 v = A->a + A->i[i] + shift; 902 sum = b[i]; 903 SPARSEDENSEMDOT(sum,xs,v,idx,n); 904 d = fshift + A->a[diag[i]+shift]; 905 n = B->i[i+1] - B->i[i]; 906 idx = B->j + B->i[i] + shift; 907 v = B->a + B->i[i] + shift; 908 SPARSEDENSEMDOT(sum,ls,v,idx,n); 909 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 910 } 911 /* come up through the rows */ 912 for ( i=m-1; i>-1; i-- ) { 913 n = A->i[i+1] - A->i[i]; 914 PLogFlops(4*n+3) 915 idx = A->j + A->i[i] + shift; 916 v = A->a + A->i[i] + shift; 917 sum = b[i]; 918 SPARSEDENSEMDOT(sum,xs,v,idx,n); 919 d = fshift + A->a[diag[i]+shift]; 920 n = B->i[i+1] - B->i[i]; 921 idx = B->j + B->i[i] + shift; 922 v = B->a + B->i[i] + shift; 923 SPARSEDENSEMDOT(sum,ls,v,idx,n); 924 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 925 } 926 } 927 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 928 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 929 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 930 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 931 if (flag & SOR_ZERO_INITIAL_GUESS) { 932 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 933 PetscFunctionReturn(0); 934 } 935 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 936 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 937 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 938 if (xx != bb) { 939 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 940 } else { 941 b = x; 942 } 943 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 944 xs = x + shift; /* shift by one for index start of 1 */ 945 ls = ls + shift; 946 while (its--) { 947 for ( i=0; i<m; i++ ) { 948 n = A->i[i+1] - A->i[i]; 949 PLogFlops(4*n+3); 950 idx = A->j + A->i[i] + shift; 951 v = A->a + A->i[i] + shift; 952 sum = b[i]; 953 SPARSEDENSEMDOT(sum,xs,v,idx,n); 954 d = fshift + A->a[diag[i]+shift]; 955 n = B->i[i+1] - B->i[i]; 956 idx = B->j + B->i[i] + shift; 957 v = B->a + B->i[i] + shift; 958 SPARSEDENSEMDOT(sum,ls,v,idx,n); 959 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 960 } 961 } 962 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 963 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 964 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 965 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 966 if (flag & SOR_ZERO_INITIAL_GUESS) { 967 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 968 PetscFunctionReturn(0); 969 } 970 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 971 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 972 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 973 if (xx != bb) { 974 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 975 } else { 976 b = x; 977 } 978 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 979 xs = x + shift; /* shift by one for index start of 1 */ 980 ls = ls + shift; 981 while (its--) { 982 for ( i=m-1; i>-1; i-- ) { 983 n = A->i[i+1] - A->i[i]; 984 PLogFlops(4*n+3); 985 idx = A->j + A->i[i] + shift; 986 v = A->a + A->i[i] + shift; 987 sum = b[i]; 988 SPARSEDENSEMDOT(sum,xs,v,idx,n); 989 d = fshift + A->a[diag[i]+shift]; 990 n = B->i[i+1] - B->i[i]; 991 idx = B->j + B->i[i] + shift; 992 v = B->a + B->i[i] + shift; 993 SPARSEDENSEMDOT(sum,ls,v,idx,n); 994 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 995 } 996 } 997 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 998 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 999 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 1000 } else { 1001 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 1002 } 1003 PetscFunctionReturn(0); 1004 } 1005 1006 #undef __FUNC__ 1007 #define __FUNC__ "MatGetInfo_MPIAIJ" 1008 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1009 { 1010 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1011 Mat A = mat->A, B = mat->B; 1012 int ierr; 1013 double isend[5], irecv[5]; 1014 1015 PetscFunctionBegin; 1016 info->block_size = 1.0; 1017 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1018 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1019 isend[3] = info->memory; isend[4] = info->mallocs; 1020 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1021 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1022 isend[3] += info->memory; isend[4] += info->mallocs; 1023 if (flag == MAT_LOCAL) { 1024 info->nz_used = isend[0]; 1025 info->nz_allocated = isend[1]; 1026 info->nz_unneeded = isend[2]; 1027 info->memory = isend[3]; 1028 info->mallocs = isend[4]; 1029 } else if (flag == MAT_GLOBAL_MAX) { 1030 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1031 info->nz_used = irecv[0]; 1032 info->nz_allocated = irecv[1]; 1033 info->nz_unneeded = irecv[2]; 1034 info->memory = irecv[3]; 1035 info->mallocs = irecv[4]; 1036 } else if (flag == MAT_GLOBAL_SUM) { 1037 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1038 info->nz_used = irecv[0]; 1039 info->nz_allocated = irecv[1]; 1040 info->nz_unneeded = irecv[2]; 1041 info->memory = irecv[3]; 1042 info->mallocs = irecv[4]; 1043 } 1044 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1045 info->fill_ratio_needed = 0; 1046 info->factor_mallocs = 0; 1047 info->rows_global = (double)mat->M; 1048 info->columns_global = (double)mat->N; 1049 info->rows_local = (double)mat->m; 1050 info->columns_local = (double)mat->N; 1051 1052 PetscFunctionReturn(0); 1053 } 1054 1055 #undef __FUNC__ 1056 #define __FUNC__ "MatSetOption_MPIAIJ" 1057 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1058 { 1059 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1060 int ierr; 1061 1062 PetscFunctionBegin; 1063 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1064 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1065 op == MAT_COLUMNS_UNSORTED || 1066 op == MAT_COLUMNS_SORTED || 1067 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1068 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1069 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1070 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1071 } else if (op == MAT_ROW_ORIENTED) { 1072 a->roworiented = 1; 1073 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1074 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1075 } else if (op == MAT_ROWS_SORTED || 1076 op == MAT_ROWS_UNSORTED || 1077 op == MAT_SYMMETRIC || 1078 op == MAT_STRUCTURALLY_SYMMETRIC || 1079 op == MAT_YES_NEW_DIAGONALS) 1080 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1081 else if (op == MAT_COLUMN_ORIENTED) { 1082 a->roworiented = 0; 1083 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1084 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1085 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1086 a->donotstash = 1; 1087 } else if (op == MAT_NO_NEW_DIAGONALS){ 1088 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1089 } else { 1090 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1091 } 1092 PetscFunctionReturn(0); 1093 } 1094 1095 #undef __FUNC__ 1096 #define __FUNC__ "MatGetSize_MPIAIJ" 1097 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1098 { 1099 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1100 1101 PetscFunctionBegin; 1102 if (m) *m = mat->M; 1103 if (n) *n = mat->N; 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNC__ 1108 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1109 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1110 { 1111 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1112 1113 PetscFunctionBegin; 1114 if (m) *m = mat->m; 1115 if (n) *n = mat->n; 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNC__ 1120 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1121 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1122 { 1123 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1124 1125 PetscFunctionBegin; 1126 *m = mat->rstart; *n = mat->rend; 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNC__ 1131 #define __FUNC__ "MatGetRow_MPIAIJ" 1132 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1133 { 1134 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1135 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1136 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1137 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1138 int *cmap, *idx_p; 1139 1140 PetscFunctionBegin; 1141 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1142 mat->getrowactive = PETSC_TRUE; 1143 1144 if (!mat->rowvalues && (idx || v)) { 1145 /* 1146 allocate enough space to hold information from the longest row. 1147 */ 1148 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1149 int max = 1,m = mat->m,tmp; 1150 for ( i=0; i<m; i++ ) { 1151 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1152 if (max < tmp) { max = tmp; } 1153 } 1154 mat->rowvalues = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1155 mat->rowindices = (int *) (mat->rowvalues + max); 1156 } 1157 1158 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1159 lrow = row - rstart; 1160 1161 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1162 if (!v) {pvA = 0; pvB = 0;} 1163 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1164 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1165 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1166 nztot = nzA + nzB; 1167 1168 cmap = mat->garray; 1169 if (v || idx) { 1170 if (nztot) { 1171 /* Sort by increasing column numbers, assuming A and B already sorted */ 1172 int imark = -1; 1173 if (v) { 1174 *v = v_p = mat->rowvalues; 1175 for ( i=0; i<nzB; i++ ) { 1176 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1177 else break; 1178 } 1179 imark = i; 1180 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1181 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1182 } 1183 if (idx) { 1184 *idx = idx_p = mat->rowindices; 1185 if (imark > -1) { 1186 for ( i=0; i<imark; i++ ) { 1187 idx_p[i] = cmap[cworkB[i]]; 1188 } 1189 } else { 1190 for ( i=0; i<nzB; i++ ) { 1191 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1192 else break; 1193 } 1194 imark = i; 1195 } 1196 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1197 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1198 } 1199 } else { 1200 if (idx) *idx = 0; 1201 if (v) *v = 0; 1202 } 1203 } 1204 *nz = nztot; 1205 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1206 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1207 PetscFunctionReturn(0); 1208 } 1209 1210 #undef __FUNC__ 1211 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1212 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1213 { 1214 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1215 1216 PetscFunctionBegin; 1217 if (aij->getrowactive == PETSC_FALSE) { 1218 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1219 } 1220 aij->getrowactive = PETSC_FALSE; 1221 PetscFunctionReturn(0); 1222 } 1223 1224 #undef __FUNC__ 1225 #define __FUNC__ "MatNorm_MPIAIJ" 1226 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1227 { 1228 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1229 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1230 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1231 double sum = 0.0; 1232 Scalar *v; 1233 1234 PetscFunctionBegin; 1235 if (aij->size == 1) { 1236 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1237 } else { 1238 if (type == NORM_FROBENIUS) { 1239 v = amat->a; 1240 for (i=0; i<amat->nz; i++ ) { 1241 #if defined(PETSC_USE_COMPLEX) 1242 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1243 #else 1244 sum += (*v)*(*v); v++; 1245 #endif 1246 } 1247 v = bmat->a; 1248 for (i=0; i<bmat->nz; i++ ) { 1249 #if defined(PETSC_USE_COMPLEX) 1250 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1251 #else 1252 sum += (*v)*(*v); v++; 1253 #endif 1254 } 1255 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1256 *norm = sqrt(*norm); 1257 } else if (type == NORM_1) { /* max column norm */ 1258 double *tmp, *tmp2; 1259 int *jj, *garray = aij->garray; 1260 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp); 1261 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp2); 1262 ierr = PetscMemzero(tmp,aij->N*sizeof(double));CHKERRQ(ierr); 1263 *norm = 0.0; 1264 v = amat->a; jj = amat->j; 1265 for ( j=0; j<amat->nz; j++ ) { 1266 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1267 } 1268 v = bmat->a; jj = bmat->j; 1269 for ( j=0; j<bmat->nz; j++ ) { 1270 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1271 } 1272 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1273 for ( j=0; j<aij->N; j++ ) { 1274 if (tmp2[j] > *norm) *norm = tmp2[j]; 1275 } 1276 ierr = PetscFree(tmp);CHKERRQ(ierr); 1277 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1278 } else if (type == NORM_INFINITY) { /* max row norm */ 1279 double ntemp = 0.0; 1280 for ( j=0; j<amat->m; j++ ) { 1281 v = amat->a + amat->i[j] + shift; 1282 sum = 0.0; 1283 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1284 sum += PetscAbsScalar(*v); v++; 1285 } 1286 v = bmat->a + bmat->i[j] + shift; 1287 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1288 sum += PetscAbsScalar(*v); v++; 1289 } 1290 if (sum > ntemp) ntemp = sum; 1291 } 1292 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1293 } else { 1294 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1295 } 1296 } 1297 PetscFunctionReturn(0); 1298 } 1299 1300 #undef __FUNC__ 1301 #define __FUNC__ "MatTranspose_MPIAIJ" 1302 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1303 { 1304 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1305 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1306 int ierr,shift = Aloc->indexshift; 1307 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1308 Mat B; 1309 Scalar *array; 1310 1311 PetscFunctionBegin; 1312 if (matout == PETSC_NULL && M != N) { 1313 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1314 } 1315 1316 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1317 1318 /* copy over the A part */ 1319 Aloc = (Mat_SeqAIJ*) a->A->data; 1320 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1321 row = a->rstart; 1322 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1323 for ( i=0; i<m; i++ ) { 1324 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1325 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1326 } 1327 aj = Aloc->j; 1328 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1329 1330 /* copy over the B part */ 1331 Aloc = (Mat_SeqAIJ*) a->B->data; 1332 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1333 row = a->rstart; 1334 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) );CHKPTRQ(cols); 1335 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1336 for ( i=0; i<m; i++ ) { 1337 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1338 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1339 } 1340 ierr = PetscFree(ct);CHKERRQ(ierr); 1341 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1342 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1343 if (matout != PETSC_NULL) { 1344 *matout = B; 1345 } else { 1346 PetscOps *Abops; 1347 struct _MatOps *Aops; 1348 1349 /* This isn't really an in-place transpose .... but free data structures from a */ 1350 ierr = PetscFree(a->rowners);CHKERRQ(ierr); 1351 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1352 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1353 #if defined (PETSC_USE_CTABLE) 1354 if (a->colmap) {ierr = PetscTableDelete(a->colmap);CHKERRQ(ierr);} 1355 #else 1356 if (a->colmap) {ierr = PetscFree(a->colmap);CHKERRQ(ierr);} 1357 #endif 1358 if (a->garray) {ierr = PetscFree(a->garray);CHKERRQ(ierr);} 1359 if (a->lvec) VecDestroy(a->lvec); 1360 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1361 ierr = PetscFree(a);CHKERRQ(ierr); 1362 1363 /* 1364 This is horrible, horrible code. We need to keep the 1365 A pointers for the bops and ops but copy everything 1366 else from C. 1367 */ 1368 Abops = A->bops; 1369 Aops = A->ops; 1370 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1371 A->bops = Abops; 1372 A->ops = Aops; 1373 PetscHeaderDestroy(B); 1374 } 1375 PetscFunctionReturn(0); 1376 } 1377 1378 #undef __FUNC__ 1379 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1380 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1381 { 1382 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1383 Mat a = aij->A, b = aij->B; 1384 int ierr,s1,s2,s3; 1385 1386 PetscFunctionBegin; 1387 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1388 if (rr) { 1389 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1390 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1391 /* Overlap communication with computation. */ 1392 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1393 } 1394 if (ll) { 1395 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1396 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1397 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1398 } 1399 /* scale the diagonal block */ 1400 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1401 1402 if (rr) { 1403 /* Do a scatter end and then right scale the off-diagonal block */ 1404 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1405 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1406 } 1407 1408 PetscFunctionReturn(0); 1409 } 1410 1411 1412 #undef __FUNC__ 1413 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1414 int MatPrintHelp_MPIAIJ(Mat A) 1415 { 1416 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1417 int ierr; 1418 1419 PetscFunctionBegin; 1420 if (!a->rank) { 1421 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1422 } 1423 PetscFunctionReturn(0); 1424 } 1425 1426 #undef __FUNC__ 1427 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1428 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1429 { 1430 PetscFunctionBegin; 1431 *bs = 1; 1432 PetscFunctionReturn(0); 1433 } 1434 #undef __FUNC__ 1435 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1436 int MatSetUnfactored_MPIAIJ(Mat A) 1437 { 1438 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1439 int ierr; 1440 1441 PetscFunctionBegin; 1442 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1443 PetscFunctionReturn(0); 1444 } 1445 1446 #undef __FUNC__ 1447 #define __FUNC__ "MatEqual_MPIAIJ" 1448 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1449 { 1450 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1451 Mat a, b, c, d; 1452 PetscTruth flg; 1453 int ierr; 1454 1455 PetscFunctionBegin; 1456 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1457 a = matA->A; b = matA->B; 1458 c = matB->A; d = matB->B; 1459 1460 ierr = MatEqual(a, c, &flg);CHKERRQ(ierr); 1461 if (flg == PETSC_TRUE) { 1462 ierr = MatEqual(b, d, &flg);CHKERRQ(ierr); 1463 } 1464 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1465 PetscFunctionReturn(0); 1466 } 1467 1468 #undef __FUNC__ 1469 #define __FUNC__ "MatCopy_MPIAIJ" 1470 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1471 { 1472 int ierr; 1473 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1474 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1475 1476 PetscFunctionBegin; 1477 if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) { 1478 /* because of the column compression in the off-processor part of the matrix a->B, 1479 the number of columns in a->B and b->B may be different, hence we cannot call 1480 the MatCopy() directly on the two parts. If need be, we can provide a more 1481 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1482 then copying the submatrices */ 1483 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1484 } else { 1485 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1486 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1487 } 1488 PetscFunctionReturn(0); 1489 } 1490 1491 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1492 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1493 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1494 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **); 1495 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *); 1496 1497 /* -------------------------------------------------------------------*/ 1498 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1499 MatGetRow_MPIAIJ, 1500 MatRestoreRow_MPIAIJ, 1501 MatMult_MPIAIJ, 1502 MatMultAdd_MPIAIJ, 1503 MatMultTrans_MPIAIJ, 1504 MatMultTransAdd_MPIAIJ, 1505 0, 1506 0, 1507 0, 1508 0, 1509 0, 1510 0, 1511 MatRelax_MPIAIJ, 1512 MatTranspose_MPIAIJ, 1513 MatGetInfo_MPIAIJ, 1514 MatEqual_MPIAIJ, 1515 MatGetDiagonal_MPIAIJ, 1516 MatDiagonalScale_MPIAIJ, 1517 MatNorm_MPIAIJ, 1518 MatAssemblyBegin_MPIAIJ, 1519 MatAssemblyEnd_MPIAIJ, 1520 0, 1521 MatSetOption_MPIAIJ, 1522 MatZeroEntries_MPIAIJ, 1523 MatZeroRows_MPIAIJ, 1524 0, 1525 0, 1526 0, 1527 0, 1528 MatGetSize_MPIAIJ, 1529 MatGetLocalSize_MPIAIJ, 1530 MatGetOwnershipRange_MPIAIJ, 1531 0, 1532 0, 1533 0, 1534 0, 1535 MatDuplicate_MPIAIJ, 1536 0, 1537 0, 1538 0, 1539 0, 1540 0, 1541 MatGetSubMatrices_MPIAIJ, 1542 MatIncreaseOverlap_MPIAIJ, 1543 MatGetValues_MPIAIJ, 1544 MatCopy_MPIAIJ, 1545 MatPrintHelp_MPIAIJ, 1546 MatScale_MPIAIJ, 1547 0, 1548 0, 1549 0, 1550 MatGetBlockSize_MPIAIJ, 1551 0, 1552 0, 1553 0, 1554 0, 1555 MatFDColoringCreate_MPIAIJ, 1556 0, 1557 MatSetUnfactored_MPIAIJ, 1558 0, 1559 0, 1560 MatGetSubMatrix_MPIAIJ, 1561 0, 1562 0, 1563 MatGetMaps_Petsc}; 1564 1565 /* ----------------------------------------------------------------------------------------*/ 1566 1567 EXTERN_C_BEGIN 1568 #undef __FUNC__ 1569 #define __FUNC__ "MatStoreValues_MPIAIJ" 1570 int MatStoreValues_MPIAIJ(Mat mat) 1571 { 1572 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1573 int ierr; 1574 1575 PetscFunctionBegin; 1576 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1577 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1578 PetscFunctionReturn(0); 1579 } 1580 EXTERN_C_END 1581 1582 EXTERN_C_BEGIN 1583 #undef __FUNC__ 1584 #define __FUNC__ "MatRetrieveValues_MPIAIJ" 1585 int MatRetrieveValues_MPIAIJ(Mat mat) 1586 { 1587 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1588 int ierr; 1589 1590 PetscFunctionBegin; 1591 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1592 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1593 PetscFunctionReturn(0); 1594 } 1595 EXTERN_C_END 1596 1597 #include "pc.h" 1598 EXTERN_C_BEGIN 1599 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1600 EXTERN_C_END 1601 1602 #undef __FUNC__ 1603 #define __FUNC__ "MatCreateMPIAIJ" 1604 /*@C 1605 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1606 (the default parallel PETSc format). For good matrix assembly performance 1607 the user should preallocate the matrix storage by setting the parameters 1608 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1609 performance can be increased by more than a factor of 50. 1610 1611 Collective on MPI_Comm 1612 1613 Input Parameters: 1614 + comm - MPI communicator 1615 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1616 This value should be the same as the local size used in creating the 1617 y vector for the matrix-vector product y = Ax. 1618 . n - This value should be the same as the local size used in creating the 1619 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1620 calculated if N is given) For square matrices n is almost always m. 1621 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1622 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1623 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1624 (same value is used for all local rows) 1625 . d_nnz - array containing the number of nonzeros in the various rows of the 1626 DIAGONAL portion of the local submatrix (possibly different for each row) 1627 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1628 The size of this array is equal to the number of local rows, i.e 'm'. 1629 You must leave room for the diagonal entry even if it is zero. 1630 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 1631 submatrix (same value is used for all local rows). 1632 - o_nnz - array containing the number of nonzeros in the various rows of the 1633 OFF-DIAGONAL portion of the local submatrix (possibly different for 1634 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 1635 structure. The size of this array is equal to the number 1636 of local rows, i.e 'm'. 1637 1638 Output Parameter: 1639 . A - the matrix 1640 1641 Notes: 1642 m,n,M,N parameters specify the size of the matrix, and its partitioning across 1643 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 1644 storage requirements for this matrix. 1645 1646 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 1647 processor than it must be used on all processors that share the object for 1648 that argument. 1649 1650 The AIJ format (also called the Yale sparse matrix format or 1651 compressed row storage), is fully compatible with standard Fortran 77 1652 storage. That is, the stored row and column indices can begin at 1653 either one (as in Fortran) or zero. See the users manual for details. 1654 1655 The user MUST specify either the local or global matrix dimensions 1656 (possibly both). 1657 1658 The parallel matrix is partitioned such that the first m0 rows belong to 1659 process 0, the next m1 rows belong to process 1, the next m2 rows belong 1660 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 1661 1662 The DIAGONAL portion of the local submatrix of a processor can be defined 1663 as the submatrix which is obtained by extraction the part corresponding 1664 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 1665 first row that belongs to the processor, and r2 is the last row belonging 1666 to the this processor. This is a square mxm matrix. The remaining portion 1667 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 1668 1669 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 1670 1671 By default, this format uses inodes (identical nodes) when possible. 1672 We search for consecutive rows with the same nonzero structure, thereby 1673 reusing matrix information to achieve increased efficiency. 1674 1675 Options Database Keys: 1676 + -mat_aij_no_inode - Do not use inodes 1677 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1678 - -mat_aij_oneindex - Internally use indexing starting at 1 1679 rather than 0. Note that when calling MatSetValues(), 1680 the user still MUST index entries starting at 0! 1681 . -mat_mpi - use the parallel matrix data structures even on one processor 1682 (defaults to using SeqBAIJ format on one processor) 1683 . -mat_mpi - use the parallel matrix data structures even on one processor 1684 (defaults to using SeqAIJ format on one processor) 1685 1686 1687 Example usage: 1688 1689 Consider the following 8x8 matrix with 34 non-zero values, that is 1690 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 1691 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 1692 as follows: 1693 1694 .vb 1695 1 2 0 | 0 3 0 | 0 4 1696 Proc0 0 5 6 | 7 0 0 | 8 0 1697 9 0 10 | 11 0 0 | 12 0 1698 ------------------------------------- 1699 13 0 14 | 15 16 17 | 0 0 1700 Proc1 0 18 0 | 19 20 21 | 0 0 1701 0 0 0 | 22 23 0 | 24 0 1702 ------------------------------------- 1703 Proc2 25 26 27 | 0 0 28 | 29 0 1704 30 0 0 | 31 32 33 | 0 34 1705 .ve 1706 1707 This can be represented as a collection of submatrices as: 1708 1709 .vb 1710 A B C 1711 D E F 1712 G H I 1713 .ve 1714 1715 Where the submatrices A,B,C are owned by proc0, D,E,F are 1716 owned by proc1, G,H,I are owned by proc2. 1717 1718 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1719 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1720 The 'M','N' parameters are 8,8, and have the same values on all procs. 1721 1722 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 1723 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 1724 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 1725 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 1726 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 1727 matrix, ans [DF] as another SeqAIJ matrix. 1728 1729 When d_nz, o_nz parameters are specified, d_nz storage elements are 1730 allocated for every row of the local diagonal submatrix, and o_nz 1731 storage locations are allocated for every row of the OFF-DIAGONAL submat. 1732 One way to choose d_nz and o_nz is to use the max nonzerors per local 1733 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 1734 In this case, the values of d_nz,o_nz are: 1735 .vb 1736 proc0 : dnz = 2, o_nz = 2 1737 proc1 : dnz = 3, o_nz = 2 1738 proc2 : dnz = 1, o_nz = 4 1739 .ve 1740 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 1741 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 1742 for proc3. i.e we are using 12+15+10=37 storage locations to store 1743 34 values. 1744 1745 When d_nnz, o_nnz parameters are specified, the storage is specified 1746 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 1747 In the above case the values for d_nnz,o_nnz are: 1748 .vb 1749 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 1750 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 1751 proc2: d_nnz = [1,1] and o_nnz = [4,4] 1752 .ve 1753 Here the space allocated is sum of all the above values i.e 34, and 1754 hence pre-allocation is perfect. 1755 1756 Level: intermediate 1757 1758 .keywords: matrix, aij, compressed row, sparse, parallel 1759 1760 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1761 @*/ 1762 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1763 { 1764 Mat B; 1765 Mat_MPIAIJ *b; 1766 int ierr, i,size,flag1 = 0, flag2 = 0; 1767 1768 PetscFunctionBegin; 1769 1770 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -2: value %d",d_nz); 1771 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -2: value %d",o_nz); 1772 if (d_nnz) { 1773 for (i=0; i<m; i++) { 1774 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1775 } 1776 } 1777 if (o_nnz) { 1778 for (i=0; i<m; i++) { 1779 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1780 } 1781 } 1782 1783 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1784 ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1);CHKERRQ(ierr); 1785 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 1786 if (!flag1 && !flag2 && size == 1) { 1787 if (M == PETSC_DECIDE) M = m; 1788 if (N == PETSC_DECIDE) N = n; 1789 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 1790 PetscFunctionReturn(0); 1791 } 1792 1793 *A = 0; 1794 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView); 1795 PLogObjectCreate(B); 1796 B->data = (void *) (b = PetscNew(Mat_MPIAIJ));CHKPTRQ(b); 1797 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1798 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1799 B->ops->destroy = MatDestroy_MPIAIJ; 1800 B->ops->view = MatView_MPIAIJ; 1801 B->factor = 0; 1802 B->assembled = PETSC_FALSE; 1803 B->mapping = 0; 1804 1805 B->insertmode = NOT_SET_VALUES; 1806 b->size = size; 1807 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 1808 1809 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1810 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1811 } 1812 1813 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1814 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1815 b->m = m; B->m = m; 1816 b->n = n; B->n = n; 1817 b->N = N; B->N = N; 1818 b->M = M; B->M = M; 1819 1820 /* the information in the maps duplicates the information computed below, eventually 1821 we should remove the duplicate information that is not contained in the maps */ 1822 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 1823 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 1824 1825 /* build local table of row and column ownerships */ 1826 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 1827 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1828 b->cowners = b->rowners + b->size + 2; 1829 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1830 b->rowners[0] = 0; 1831 for ( i=2; i<=b->size; i++ ) { 1832 b->rowners[i] += b->rowners[i-1]; 1833 } 1834 b->rstart = b->rowners[b->rank]; 1835 b->rend = b->rowners[b->rank+1]; 1836 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1837 b->cowners[0] = 0; 1838 for ( i=2; i<=b->size; i++ ) { 1839 b->cowners[i] += b->cowners[i-1]; 1840 } 1841 b->cstart = b->cowners[b->rank]; 1842 b->cend = b->cowners[b->rank+1]; 1843 1844 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1845 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1846 PLogObjectParent(B,b->A); 1847 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1848 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1849 PLogObjectParent(B,b->B); 1850 1851 /* build cache for off array entries formed */ 1852 ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr); 1853 b->donotstash = 0; 1854 b->colmap = 0; 1855 b->garray = 0; 1856 b->roworiented = 1; 1857 1858 /* stuff used for matrix vector multiply */ 1859 b->lvec = 0; 1860 b->Mvctx = 0; 1861 1862 /* stuff for MatGetRow() */ 1863 b->rowindices = 0; 1864 b->rowvalues = 0; 1865 b->getrowactive = PETSC_FALSE; 1866 1867 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 1868 "MatStoreValues_MPIAIJ", 1869 (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1870 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 1871 "MatRetrieveValues_MPIAIJ", 1872 (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1873 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 1874 "MatGetDiagonalBlock_MPIAIJ", 1875 (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1876 *A = B; 1877 PetscFunctionReturn(0); 1878 } 1879 1880 #undef __FUNC__ 1881 #define __FUNC__ "MatDuplicate_MPIAIJ" 1882 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1883 { 1884 Mat mat; 1885 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1886 int ierr, len=0, flg; 1887 1888 PetscFunctionBegin; 1889 *newmat = 0; 1890 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView); 1891 PLogObjectCreate(mat); 1892 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ));CHKPTRQ(a); 1893 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1894 mat->ops->destroy = MatDestroy_MPIAIJ; 1895 mat->ops->view = MatView_MPIAIJ; 1896 mat->factor = matin->factor; 1897 mat->assembled = PETSC_TRUE; 1898 mat->insertmode = NOT_SET_VALUES; 1899 1900 a->m = mat->m = oldmat->m; 1901 a->n = mat->n = oldmat->n; 1902 a->M = mat->M = oldmat->M; 1903 a->N = mat->N = oldmat->N; 1904 1905 a->rstart = oldmat->rstart; 1906 a->rend = oldmat->rend; 1907 a->cstart = oldmat->cstart; 1908 a->cend = oldmat->cend; 1909 a->size = oldmat->size; 1910 a->rank = oldmat->rank; 1911 a->donotstash = oldmat->donotstash; 1912 a->roworiented = oldmat->roworiented; 1913 a->rowindices = 0; 1914 a->rowvalues = 0; 1915 a->getrowactive = PETSC_FALSE; 1916 1917 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 1918 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1919 a->cowners = a->rowners + a->size + 2; 1920 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1921 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1922 if (oldmat->colmap) { 1923 #if defined (PETSC_USE_CTABLE) 1924 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1925 #else 1926 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1927 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1928 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));CHKERRQ(ierr); 1929 #endif 1930 } else a->colmap = 0; 1931 if (oldmat->garray) { 1932 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1933 a->garray = (int *) PetscMalloc((len+1)*sizeof(int));CHKPTRQ(a->garray); 1934 PLogObjectMemory(mat,len*sizeof(int)); 1935 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1936 } else a->garray = 0; 1937 1938 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1939 PLogObjectParent(mat,a->lvec); 1940 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1941 PLogObjectParent(mat,a->Mvctx); 1942 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1943 PLogObjectParent(mat,a->A); 1944 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1945 PLogObjectParent(mat,a->B); 1946 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 1947 if (flg) { 1948 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 1949 } 1950 ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1951 *newmat = mat; 1952 PetscFunctionReturn(0); 1953 } 1954 1955 #include "sys.h" 1956 1957 #undef __FUNC__ 1958 #define __FUNC__ "MatLoad_MPIAIJ" 1959 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1960 { 1961 Mat A; 1962 Scalar *vals,*svals; 1963 MPI_Comm comm = ((PetscObject)viewer)->comm; 1964 MPI_Status status; 1965 int i, nz, ierr, j,rstart, rend, fd; 1966 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1967 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1968 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1969 1970 PetscFunctionBegin; 1971 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1972 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1973 if (!rank) { 1974 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1975 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1976 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1977 if (header[3] < 0) { 1978 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 1979 } 1980 } 1981 1982 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1983 M = header[1]; N = header[2]; 1984 /* determine ownership of all rows */ 1985 m = M/size + ((M % size) > rank); 1986 rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1987 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1988 rowners[0] = 0; 1989 for ( i=2; i<=size; i++ ) { 1990 rowners[i] += rowners[i-1]; 1991 } 1992 rstart = rowners[rank]; 1993 rend = rowners[rank+1]; 1994 1995 /* distribute row lengths to all processors */ 1996 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens); 1997 offlens = ourlens + (rend-rstart); 1998 if (!rank) { 1999 rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths); 2000 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2001 sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 2002 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 2003 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2004 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2005 } else { 2006 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 2007 } 2008 2009 if (!rank) { 2010 /* calculate the number of nonzeros on each processor */ 2011 procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 2012 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2013 for ( i=0; i<size; i++ ) { 2014 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 2015 procsnz[i] += rowlengths[j]; 2016 } 2017 } 2018 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2019 2020 /* determine max buffer needed and allocate it */ 2021 maxnz = 0; 2022 for ( i=0; i<size; i++ ) { 2023 maxnz = PetscMax(maxnz,procsnz[i]); 2024 } 2025 cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 2026 2027 /* read in my part of the matrix column indices */ 2028 nz = procsnz[0]; 2029 mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 2030 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2031 2032 /* read in every one elses and ship off */ 2033 for ( i=1; i<size; i++ ) { 2034 nz = procsnz[i]; 2035 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2036 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2037 } 2038 ierr = PetscFree(cols);CHKERRQ(ierr); 2039 } else { 2040 /* determine buffer space needed for message */ 2041 nz = 0; 2042 for ( i=0; i<m; i++ ) { 2043 nz += ourlens[i]; 2044 } 2045 mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 2046 2047 /* receive message of column indices*/ 2048 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2049 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2050 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2051 } 2052 2053 /* determine column ownership if matrix is not square */ 2054 if (N != M) { 2055 n = N/size + ((N % size) > rank); 2056 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2057 cstart = cend - n; 2058 } else { 2059 cstart = rstart; 2060 cend = rend; 2061 n = cend - cstart; 2062 } 2063 2064 /* loop over local rows, determining number of off diagonal entries */ 2065 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2066 jj = 0; 2067 for ( i=0; i<m; i++ ) { 2068 for ( j=0; j<ourlens[i]; j++ ) { 2069 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2070 jj++; 2071 } 2072 } 2073 2074 /* create our matrix */ 2075 for ( i=0; i<m; i++ ) { 2076 ourlens[i] -= offlens[i]; 2077 } 2078 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 2079 A = *newmat; 2080 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2081 for ( i=0; i<m; i++ ) { 2082 ourlens[i] += offlens[i]; 2083 } 2084 2085 if (!rank) { 2086 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals); 2087 2088 /* read in my part of the matrix numerical values */ 2089 nz = procsnz[0]; 2090 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2091 2092 /* insert into matrix */ 2093 jj = rstart; 2094 smycols = mycols; 2095 svals = vals; 2096 for ( i=0; i<m; i++ ) { 2097 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2098 smycols += ourlens[i]; 2099 svals += ourlens[i]; 2100 jj++; 2101 } 2102 2103 /* read in other processors and ship out */ 2104 for ( i=1; i<size; i++ ) { 2105 nz = procsnz[i]; 2106 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2107 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2108 } 2109 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2110 } else { 2111 /* receive numeric values */ 2112 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals); 2113 2114 /* receive message of values*/ 2115 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2116 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2117 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2118 2119 /* insert into matrix */ 2120 jj = rstart; 2121 smycols = mycols; 2122 svals = vals; 2123 for ( i=0; i<m; i++ ) { 2124 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2125 smycols += ourlens[i]; 2126 svals += ourlens[i]; 2127 jj++; 2128 } 2129 } 2130 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2131 ierr = PetscFree(vals);CHKERRQ(ierr); 2132 ierr = PetscFree(mycols);CHKERRQ(ierr); 2133 ierr = PetscFree(rowners);CHKERRQ(ierr); 2134 2135 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2136 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2137 PetscFunctionReturn(0); 2138 } 2139 2140 #undef __FUNC__ 2141 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 2142 /* 2143 Not great since it makes two copies of the submatrix, first an SeqAIJ 2144 in local and then by concatenating the local matrices the end result. 2145 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2146 */ 2147 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2148 { 2149 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2150 Mat *local,M, Mreuse; 2151 Scalar *vwork,*aa; 2152 MPI_Comm comm = mat->comm; 2153 Mat_SeqAIJ *aij; 2154 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 2155 2156 PetscFunctionBegin; 2157 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2158 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2159 2160 if (call == MAT_REUSE_MATRIX) { 2161 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2162 if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse"); 2163 local = &Mreuse; 2164 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2165 } else { 2166 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2167 Mreuse = *local; 2168 ierr = PetscFree(local);CHKERRQ(ierr); 2169 } 2170 2171 /* 2172 m - number of local rows 2173 n - number of columns (same on all processors) 2174 rstart - first row in new global matrix generated 2175 */ 2176 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2177 if (call == MAT_INITIAL_MATRIX) { 2178 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2179 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2180 ii = aij->i; 2181 jj = aij->j; 2182 2183 /* 2184 Determine the number of non-zeros in the diagonal and off-diagonal 2185 portions of the matrix in order to do correct preallocation 2186 */ 2187 2188 /* first get start and end of "diagonal" columns */ 2189 if (csize == PETSC_DECIDE) { 2190 nlocal = n/size + ((n % size) > rank); 2191 } else { 2192 nlocal = csize; 2193 } 2194 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2195 rstart = rend - nlocal; 2196 if (rank == size - 1 && rend != n) { 2197 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 2198 } 2199 2200 /* next, compute all the lengths */ 2201 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 2202 olens = dlens + m; 2203 for ( i=0; i<m; i++ ) { 2204 jend = ii[i+1] - ii[i]; 2205 olen = 0; 2206 dlen = 0; 2207 for ( j=0; j<jend; j++ ) { 2208 if ( *jj < rstart || *jj >= rend) olen++; 2209 else dlen++; 2210 jj++; 2211 } 2212 olens[i] = olen; 2213 dlens[i] = dlen; 2214 } 2215 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2216 ierr = PetscFree(dlens);CHKERRQ(ierr); 2217 } else { 2218 int ml,nl; 2219 2220 M = *newmat; 2221 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2222 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2223 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2224 /* 2225 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2226 rather than the slower MatSetValues(). 2227 */ 2228 M->was_assembled = PETSC_TRUE; 2229 M->assembled = PETSC_FALSE; 2230 } 2231 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2232 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2233 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2234 ii = aij->i; 2235 jj = aij->j; 2236 aa = aij->a; 2237 for (i=0; i<m; i++) { 2238 row = rstart + i; 2239 nz = ii[i+1] - ii[i]; 2240 cwork = jj; jj += nz; 2241 vwork = aa; aa += nz; 2242 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2243 } 2244 2245 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2246 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2247 *newmat = M; 2248 2249 /* save submatrix used in processor for next request */ 2250 if (call == MAT_INITIAL_MATRIX) { 2251 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2252 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2253 } 2254 2255 PetscFunctionReturn(0); 2256 } 2257