1 /*$Id: mpiaij.c,v 1.304 1999/10/13 20:37:20 bsmith Exp bsmith $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 extern int MatSetUpMultiply_MPIAIJ(Mat); 8 extern int DisAssemble_MPIAIJ(Mat); 9 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 10 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 11 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 12 extern int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNC__ 22 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 26 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 27 int n = B->n,i,ierr; 28 29 PetscFunctionBegin; 30 #if defined (PETSC_USE_CTABLE) 31 ierr = PetscTableCreate(aij->n/5,&aij->colmap);CHKERRQ(ierr); 32 for ( i=0; i<n; i++ ){ 33 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 34 } 35 #else 36 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 37 PLogObjectMemory(mat,aij->N*sizeof(int)); 38 ierr = PetscMemzero(aij->colmap,aij->N*sizeof(int));CHKERRQ(ierr); 39 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 40 #endif 41 PetscFunctionReturn(0); 42 } 43 44 #define CHUNKSIZE 15 45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 46 { \ 47 \ 48 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 49 rmax = aimax[row]; nrow = ailen[row]; \ 50 col1 = col - shift; \ 51 \ 52 low = 0; high = nrow; \ 53 while (high-low > 5) { \ 54 t = (low+high)/2; \ 55 if (rp[t] > col) high = t; \ 56 else low = t; \ 57 } \ 58 for ( _i=0; _i<nrow; _i++ ) { \ 59 if (rp[_i] > col1) break; \ 60 if (rp[_i] == col1) { \ 61 if (addv == ADD_VALUES) ap[_i] += value; \ 62 else ap[_i] = value; \ 63 goto a_noinsert; \ 64 } \ 65 } \ 66 if (nonew == 1) goto a_noinsert; \ 67 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 68 if (nrow >= rmax) { \ 69 /* there is no extra room in row, therefore enlarge */ \ 70 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 71 Scalar *new_a; \ 72 \ 73 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 74 \ 75 /* malloc new storage space */ \ 76 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 77 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 78 new_j = (int *) (new_a + new_nz); \ 79 new_i = new_j + new_nz; \ 80 \ 81 /* copy over old data into new slots */ \ 82 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 83 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 84 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 85 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 86 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 87 len*sizeof(int));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 89 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 90 len*sizeof(Scalar));CHKERRQ(ierr); \ 91 /* free up old matrix storage */ \ 92 \ 93 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 94 if (!a->singlemalloc) { \ 95 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 96 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 97 } \ 98 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 99 a->singlemalloc = 1; \ 100 \ 101 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 102 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 103 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 104 a->maxnz += CHUNKSIZE; \ 105 a->reallocs++; \ 106 } \ 107 N = nrow++ - 1; a->nz++; \ 108 /* shift up all the later entries in this row */ \ 109 for ( ii=N; ii>=_i; ii-- ) { \ 110 rp[ii+1] = rp[ii]; \ 111 ap[ii+1] = ap[ii]; \ 112 } \ 113 rp[_i] = col1; \ 114 ap[_i] = value; \ 115 a_noinsert: ; \ 116 ailen[row] = nrow; \ 117 } 118 119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 120 { \ 121 \ 122 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 123 rmax = bimax[row]; nrow = bilen[row]; \ 124 col1 = col - shift; \ 125 \ 126 low = 0; high = nrow; \ 127 while (high-low > 5) { \ 128 t = (low+high)/2; \ 129 if (rp[t] > col) high = t; \ 130 else low = t; \ 131 } \ 132 for ( _i=0; _i<nrow; _i++ ) { \ 133 if (rp[_i] > col1) break; \ 134 if (rp[_i] == col1) { \ 135 if (addv == ADD_VALUES) ap[_i] += value; \ 136 else ap[_i] = value; \ 137 goto b_noinsert; \ 138 } \ 139 } \ 140 if (nonew == 1) goto b_noinsert; \ 141 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 142 if (nrow >= rmax) { \ 143 /* there is no extra room in row, therefore enlarge */ \ 144 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 145 Scalar *new_a; \ 146 \ 147 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 148 \ 149 /* malloc new storage space */ \ 150 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 151 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 152 new_j = (int *) (new_a + new_nz); \ 153 new_i = new_j + new_nz; \ 154 \ 155 /* copy over old data into new slots */ \ 156 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 157 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 158 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 159 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 160 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 161 len*sizeof(int));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 163 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 164 len*sizeof(Scalar));CHKERRQ(ierr); \ 165 /* free up old matrix storage */ \ 166 \ 167 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 168 if (!b->singlemalloc) { \ 169 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 170 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 171 } \ 172 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 173 b->singlemalloc = 1; \ 174 \ 175 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 176 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 177 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 178 b->maxnz += CHUNKSIZE; \ 179 b->reallocs++; \ 180 } \ 181 N = nrow++ - 1; b->nz++; \ 182 /* shift up all the later entries in this row */ \ 183 for ( ii=N; ii>=_i; ii-- ) { \ 184 rp[ii+1] = rp[ii]; \ 185 ap[ii+1] = ap[ii]; \ 186 } \ 187 rp[_i] = col1; \ 188 ap[_i] = value; \ 189 b_noinsert: ; \ 190 bilen[row] = nrow; \ 191 } 192 193 #undef __FUNC__ 194 #define __FUNC__ "MatSetValues_MPIAIJ" 195 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 196 { 197 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 198 Scalar value; 199 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 200 int cstart = aij->cstart, cend = aij->cend,row,col; 201 int roworiented = aij->roworiented; 202 203 /* Some Variables required in the macro */ 204 Mat A = aij->A; 205 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 206 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 207 Scalar *aa = a->a; 208 209 Mat B = aij->B; 210 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 211 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 212 Scalar *ba = b->a; 213 214 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 215 int nonew = a->nonew,shift = a->indexshift; 216 Scalar *ap; 217 218 PetscFunctionBegin; 219 for ( i=0; i<m; i++ ) { 220 if (im[i] < 0) continue; 221 #if defined(PETSC_USE_BOPT_g) 222 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 223 #endif 224 if (im[i] >= rstart && im[i] < rend) { 225 row = im[i] - rstart; 226 for ( j=0; j<n; j++ ) { 227 if (in[j] >= cstart && in[j] < cend){ 228 col = in[j] - cstart; 229 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } 233 else if (in[j] < 0) continue; 234 #if defined(PETSC_USE_BOPT_g) 235 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 236 #endif 237 else { 238 if (mat->was_assembled) { 239 if (!aij->colmap) { 240 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 241 } 242 #if defined (PETSC_USE_CTABLE) 243 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 244 col--; 245 #else 246 col = aij->colmap[in[j]] - 1; 247 #endif 248 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 249 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 250 col = in[j]; 251 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 252 B = aij->B; 253 b = (Mat_SeqAIJ *) B->data; 254 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 255 ba = b->a; 256 } 257 } else col = in[j]; 258 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 267 } else { 268 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 269 } 270 } 271 } 272 } 273 PetscFunctionReturn(0); 274 } 275 276 #undef __FUNC__ 277 #define __FUNC__ "MatGetValues_MPIAIJ" 278 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 279 { 280 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 281 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 282 int cstart = aij->cstart, cend = aij->cend,row,col; 283 284 PetscFunctionBegin; 285 for ( i=0; i<m; i++ ) { 286 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 287 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 288 if (idxm[i] >= rstart && idxm[i] < rend) { 289 row = idxm[i] - rstart; 290 for ( j=0; j<n; j++ ) { 291 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 292 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 293 if (idxn[j] >= cstart && idxn[j] < cend){ 294 col = idxn[j] - cstart; 295 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 296 } else { 297 if (!aij->colmap) { 298 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 299 } 300 #if defined (PETSC_USE_CTABLE) 301 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 302 col --; 303 #else 304 col = aij->colmap[idxn[j]] - 1; 305 #endif 306 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 307 else { 308 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 309 } 310 } 311 } 312 } else { 313 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 314 } 315 } 316 PetscFunctionReturn(0); 317 } 318 319 #undef __FUNC__ 320 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 321 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 322 { 323 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 324 int ierr,nstash,reallocs; 325 InsertMode addv; 326 327 PetscFunctionBegin; 328 if (aij->donotstash) { 329 PetscFunctionReturn(0); 330 } 331 332 /* make sure all processors are either in INSERTMODE or ADDMODE */ 333 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 334 if (addv == (ADD_VALUES|INSERT_VALUES)) { 335 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 336 } 337 mat->insertmode = addv; /* in case this processor had no cache */ 338 339 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 340 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 341 PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 342 PetscFunctionReturn(0); 343 } 344 345 346 #undef __FUNC__ 347 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 348 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 349 { 350 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 351 int i,j,rstart,ncols,n,ierr,flg; 352 int *row,*col,other_disassembled; 353 Scalar *val; 354 InsertMode addv = mat->insertmode; 355 356 PetscFunctionBegin; 357 if (!aij->donotstash) { 358 while (1) { 359 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 360 if (!flg) break; 361 362 for ( i=0; i<n; ) { 363 /* Now identify the consecutive vals belonging to the same row */ 364 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 365 if (j < n) ncols = j-i; 366 else ncols = n-i; 367 /* Now assemble all these values with a single function call */ 368 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 369 i = j; 370 } 371 } 372 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 373 } 374 375 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 376 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 377 378 /* determine if any processor has disassembled, if so we must 379 also disassemble ourselfs, in order that we may reassemble. */ 380 /* 381 if nonzero structure of submatrix B cannot change then we know that 382 no processor disassembled thus we can skip this stuff 383 */ 384 if (!((Mat_SeqAIJ*) aij->B->data)->nonew) { 385 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 386 if (mat->was_assembled && !other_disassembled) { 387 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 388 } 389 } 390 391 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 392 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 393 } 394 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 395 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 396 397 if (aij->rowvalues) { 398 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 399 aij->rowvalues = 0; 400 } 401 PetscFunctionReturn(0); 402 } 403 404 #undef __FUNC__ 405 #define __FUNC__ "MatZeroEntries_MPIAIJ" 406 int MatZeroEntries_MPIAIJ(Mat A) 407 { 408 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 409 int ierr; 410 411 PetscFunctionBegin; 412 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 413 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNC__ 418 #define __FUNC__ "MatZeroRows_MPIAIJ" 419 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 420 { 421 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 422 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 423 int *procs,*nprocs,j,found,idx,nsends,*work,row; 424 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 425 int *rvalues,tag = A->tag,count,base,slen,n,*source; 426 int *lens,imdex,*lrows,*values,rstart=l->rstart; 427 MPI_Comm comm = A->comm; 428 MPI_Request *send_waits,*recv_waits; 429 MPI_Status recv_status,*send_status; 430 IS istmp; 431 432 PetscFunctionBegin; 433 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 434 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 435 436 /* first count number of contributors to each processor */ 437 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 438 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 439 procs = nprocs + size; 440 owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 441 for ( i=0; i<N; i++ ) { 442 idx = rows[i]; 443 found = 0; 444 for ( j=0; j<size; j++ ) { 445 if (idx >= owners[j] && idx < owners[j+1]) { 446 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 447 } 448 } 449 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 450 } 451 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 452 453 /* inform other processors of number of messages and max length*/ 454 work = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(work); 455 ierr = MPI_Allreduce( nprocs, work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 456 nrecvs = work[size+rank]; 457 nmax = work[rank]; 458 ierr = PetscFree(work);CHKERRQ(ierr); 459 460 /* post receives: */ 461 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 462 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 463 for ( i=0; i<nrecvs; i++ ) { 464 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 465 } 466 467 /* do sends: 468 1) starts[i] gives the starting index in svalues for stuff going to 469 the ith processor 470 */ 471 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 472 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 473 starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 474 starts[0] = 0; 475 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 476 for ( i=0; i<N; i++ ) { 477 svalues[starts[owner[i]]++] = rows[i]; 478 } 479 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 480 481 starts[0] = 0; 482 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 483 count = 0; 484 for ( i=0; i<size; i++ ) { 485 if (procs[i]) { 486 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 487 } 488 } 489 ierr = PetscFree(starts);CHKERRQ(ierr); 490 491 base = owners[rank]; 492 493 /* wait on receives */ 494 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 495 source = lens + nrecvs; 496 count = nrecvs; slen = 0; 497 while (count) { 498 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 499 /* unpack receives into our local space */ 500 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 501 source[imdex] = recv_status.MPI_SOURCE; 502 lens[imdex] = n; 503 slen += n; 504 count--; 505 } 506 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 507 508 /* move the data into the send scatter */ 509 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 510 count = 0; 511 for ( i=0; i<nrecvs; i++ ) { 512 values = rvalues + i*nmax; 513 for ( j=0; j<lens[i]; j++ ) { 514 lrows[count++] = values[j] - base; 515 } 516 } 517 ierr = PetscFree(rvalues);CHKERRQ(ierr); 518 ierr = PetscFree(lens);CHKERRQ(ierr); 519 ierr = PetscFree(owner);CHKERRQ(ierr); 520 ierr = PetscFree(nprocs);CHKERRQ(ierr); 521 522 /* actually zap the local rows */ 523 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 524 PLogObjectParent(A,istmp); 525 526 /* 527 Zero the required rows. If the "diagonal block" of the matrix 528 is square and the user wishes to set the diagonal we use seperate 529 code so that MatSetValues() is not called for each diagonal allocating 530 new memory, thus calling lots of mallocs and slowing things down. 531 532 Contributed by: Mathew Knepley 533 */ 534 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 535 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 536 if (diag && (l->A->M == l->A->N)) { 537 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 538 } else if (diag) { 539 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 540 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 541 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 542 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 543 } 544 for ( i = 0; i < slen; i++ ) { 545 row = lrows[i] + rstart; 546 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 547 } 548 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 549 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 550 } else { 551 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 552 } 553 ierr = ISDestroy(istmp);CHKERRQ(ierr); 554 ierr = PetscFree(lrows);CHKERRQ(ierr); 555 556 /* wait on sends */ 557 if (nsends) { 558 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 559 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 560 ierr = PetscFree(send_status);CHKERRQ(ierr); 561 } 562 ierr = PetscFree(send_waits);CHKERRQ(ierr); 563 ierr = PetscFree(svalues);CHKERRQ(ierr); 564 565 PetscFunctionReturn(0); 566 } 567 568 #undef __FUNC__ 569 #define __FUNC__ "MatMult_MPIAIJ" 570 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 571 { 572 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 573 int ierr,nt; 574 575 PetscFunctionBegin; 576 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 577 if (nt != a->n) { 578 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 579 } 580 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 581 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 582 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 583 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 584 PetscFunctionReturn(0); 585 } 586 587 #undef __FUNC__ 588 #define __FUNC__ "MatMultAdd_MPIAIJ" 589 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 590 { 591 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 592 int ierr; 593 594 PetscFunctionBegin; 595 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 596 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 597 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 598 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 599 PetscFunctionReturn(0); 600 } 601 602 #undef __FUNC__ 603 #define __FUNC__ "MatMultTrans_MPIAIJ" 604 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 605 { 606 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 607 int ierr; 608 609 PetscFunctionBegin; 610 /* do nondiagonal part */ 611 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 612 /* send it on its way */ 613 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 614 /* do local part */ 615 ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr); 616 /* receive remote parts: note this assumes the values are not actually */ 617 /* inserted in yy until the next line, which is true for my implementation*/ 618 /* but is not perhaps always true. */ 619 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 620 PetscFunctionReturn(0); 621 } 622 623 #undef __FUNC__ 624 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 625 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 626 { 627 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 628 int ierr; 629 630 PetscFunctionBegin; 631 /* do nondiagonal part */ 632 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 633 /* send it on its way */ 634 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 635 /* do local part */ 636 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 637 /* receive remote parts: note this assumes the values are not actually */ 638 /* inserted in yy until the next line, which is true for my implementation*/ 639 /* but is not perhaps always true. */ 640 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 641 PetscFunctionReturn(0); 642 } 643 644 /* 645 This only works correctly for square matrices where the subblock A->A is the 646 diagonal block 647 */ 648 #undef __FUNC__ 649 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 650 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 651 { 652 int ierr; 653 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 654 655 PetscFunctionBegin; 656 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 657 if (a->rstart != a->cstart || a->rend != a->cend) { 658 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 659 } 660 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 661 PetscFunctionReturn(0); 662 } 663 664 #undef __FUNC__ 665 #define __FUNC__ "MatScale_MPIAIJ" 666 int MatScale_MPIAIJ(Scalar *aa,Mat A) 667 { 668 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 669 int ierr; 670 671 PetscFunctionBegin; 672 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 673 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 674 PetscFunctionReturn(0); 675 } 676 677 #undef __FUNC__ 678 #define __FUNC__ "MatDestroy_MPIAIJ" 679 int MatDestroy_MPIAIJ(Mat mat) 680 { 681 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 682 int ierr; 683 684 PetscFunctionBegin; 685 686 if (mat->mapping) { 687 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 688 } 689 if (mat->bmapping) { 690 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 691 } 692 if (mat->rmap) { 693 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 694 } 695 if (mat->cmap) { 696 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 697 } 698 #if defined(PETSC_USE_LOG) 699 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 700 #endif 701 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 702 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 703 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 704 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 705 #if defined (PETSC_USE_CTABLE) 706 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 707 #else 708 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 709 #endif 710 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 711 if (aij->lvec) VecDestroy(aij->lvec); 712 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 713 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 714 ierr = PetscFree(aij);CHKERRQ(ierr); 715 PLogObjectDestroy(mat); 716 PetscHeaderDestroy(mat); 717 PetscFunctionReturn(0); 718 } 719 720 #undef __FUNC__ 721 #define __FUNC__ "MatView_MPIAIJ_Binary" 722 int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 723 { 724 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 725 int ierr; 726 727 PetscFunctionBegin; 728 if (aij->size == 1) { 729 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 730 } 731 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 732 PetscFunctionReturn(0); 733 } 734 735 #undef __FUNC__ 736 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket" 737 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 738 { 739 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 740 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 741 int ierr, format,shift = C->indexshift,rank = aij->rank,size = aij->size; 742 PetscTruth isdraw,isascii; 743 744 PetscFunctionBegin; 745 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 746 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 747 if (isascii) { 748 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 749 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 750 MatInfo info; 751 int flg; 752 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 753 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 754 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 755 if (flg) { 756 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 757 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 758 } else { 759 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 760 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 761 } 762 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 763 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 764 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 765 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 766 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 767 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 768 PetscFunctionReturn(0); 769 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 770 PetscFunctionReturn(0); 771 } 772 } else if (isdraw) { 773 Draw draw; 774 PetscTruth isnull; 775 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 776 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 777 } 778 779 if (size == 1) { 780 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 781 } else { 782 /* assemble the entire matrix onto first processor. */ 783 Mat A; 784 Mat_SeqAIJ *Aloc; 785 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 786 Scalar *a; 787 788 if (!rank) { 789 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 790 } else { 791 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 792 } 793 PLogObjectParent(mat,A); 794 795 /* copy over the A part */ 796 Aloc = (Mat_SeqAIJ*) aij->A->data; 797 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 798 row = aij->rstart; 799 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 800 for ( i=0; i<m; i++ ) { 801 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 802 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 803 } 804 aj = Aloc->j; 805 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 806 807 /* copy over the B part */ 808 Aloc = (Mat_SeqAIJ*) aij->B->data; 809 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 810 row = aij->rstart; 811 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) );CHKPTRQ(cols); 812 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 813 for ( i=0; i<m; i++ ) { 814 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 815 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 816 } 817 ierr = PetscFree(ct);CHKERRQ(ierr); 818 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 819 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 820 /* 821 Everyone has to call to draw the matrix since the graphics waits are 822 synchronized across all processors that share the Draw object 823 */ 824 if (!rank) { 825 Viewer sviewer; 826 ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 827 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 828 ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 829 } 830 ierr = MatDestroy(A);CHKERRQ(ierr); 831 } 832 PetscFunctionReturn(0); 833 } 834 835 #undef __FUNC__ 836 #define __FUNC__ "MatView_MPIAIJ" 837 int MatView_MPIAIJ(Mat mat,Viewer viewer) 838 { 839 int ierr; 840 PetscTruth isascii,isdraw,issocket,isbinary; 841 842 PetscFunctionBegin; 843 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 844 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 845 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 846 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 847 if (isascii || isdraw || isbinary || issocket) { 848 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 849 } else { 850 SETERRQ1(1,1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 851 } 852 PetscFunctionReturn(0); 853 } 854 855 /* 856 This has to provide several versions. 857 858 2) a) use only local smoothing updating outer values only once. 859 b) local smoothing updating outer values each inner iteration 860 3) color updating out values betwen colors. 861 */ 862 #undef __FUNC__ 863 #define __FUNC__ "MatRelax_MPIAIJ" 864 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 865 double fshift,int its,Vec xx) 866 { 867 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 868 Mat AA = mat->A, BB = mat->B; 869 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 870 Scalar *b,*x,*xs,*ls,d,*v,sum; 871 int ierr,*idx, *diag; 872 int n = mat->n, m = mat->m, i,shift = A->indexshift; 873 874 PetscFunctionBegin; 875 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA);CHKERRQ(ierr);} 876 diag = A->diag; 877 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 878 if (flag & SOR_ZERO_INITIAL_GUESS) { 879 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 880 PetscFunctionReturn(0); 881 } 882 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 883 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 884 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 885 if (xx != bb) { 886 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 887 } else { 888 b = x; 889 } 890 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 891 xs = x + shift; /* shift by one for index start of 1 */ 892 ls = ls + shift; 893 while (its--) { 894 /* go down through the rows */ 895 for ( i=0; i<m; i++ ) { 896 n = A->i[i+1] - A->i[i]; 897 PLogFlops(4*n+3); 898 idx = A->j + A->i[i] + shift; 899 v = A->a + A->i[i] + shift; 900 sum = b[i]; 901 SPARSEDENSEMDOT(sum,xs,v,idx,n); 902 d = fshift + A->a[diag[i]+shift]; 903 n = B->i[i+1] - B->i[i]; 904 idx = B->j + B->i[i] + shift; 905 v = B->a + B->i[i] + shift; 906 SPARSEDENSEMDOT(sum,ls,v,idx,n); 907 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 908 } 909 /* come up through the rows */ 910 for ( i=m-1; i>-1; i-- ) { 911 n = A->i[i+1] - A->i[i]; 912 PLogFlops(4*n+3) 913 idx = A->j + A->i[i] + shift; 914 v = A->a + A->i[i] + shift; 915 sum = b[i]; 916 SPARSEDENSEMDOT(sum,xs,v,idx,n); 917 d = fshift + A->a[diag[i]+shift]; 918 n = B->i[i+1] - B->i[i]; 919 idx = B->j + B->i[i] + shift; 920 v = B->a + B->i[i] + shift; 921 SPARSEDENSEMDOT(sum,ls,v,idx,n); 922 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 923 } 924 } 925 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 926 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 927 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 928 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 929 if (flag & SOR_ZERO_INITIAL_GUESS) { 930 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 931 PetscFunctionReturn(0); 932 } 933 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 934 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 935 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 936 if (xx != bb) { 937 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 938 } else { 939 b = x; 940 } 941 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 942 xs = x + shift; /* shift by one for index start of 1 */ 943 ls = ls + shift; 944 while (its--) { 945 for ( i=0; i<m; i++ ) { 946 n = A->i[i+1] - A->i[i]; 947 PLogFlops(4*n+3); 948 idx = A->j + A->i[i] + shift; 949 v = A->a + A->i[i] + shift; 950 sum = b[i]; 951 SPARSEDENSEMDOT(sum,xs,v,idx,n); 952 d = fshift + A->a[diag[i]+shift]; 953 n = B->i[i+1] - B->i[i]; 954 idx = B->j + B->i[i] + shift; 955 v = B->a + B->i[i] + shift; 956 SPARSEDENSEMDOT(sum,ls,v,idx,n); 957 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 958 } 959 } 960 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 961 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 962 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 963 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 964 if (flag & SOR_ZERO_INITIAL_GUESS) { 965 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } 968 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 969 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 970 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 971 if (xx != bb) { 972 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 973 } else { 974 b = x; 975 } 976 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 977 xs = x + shift; /* shift by one for index start of 1 */ 978 ls = ls + shift; 979 while (its--) { 980 for ( i=m-1; i>-1; i-- ) { 981 n = A->i[i+1] - A->i[i]; 982 PLogFlops(4*n+3); 983 idx = A->j + A->i[i] + shift; 984 v = A->a + A->i[i] + shift; 985 sum = b[i]; 986 SPARSEDENSEMDOT(sum,xs,v,idx,n); 987 d = fshift + A->a[diag[i]+shift]; 988 n = B->i[i+1] - B->i[i]; 989 idx = B->j + B->i[i] + shift; 990 v = B->a + B->i[i] + shift; 991 SPARSEDENSEMDOT(sum,ls,v,idx,n); 992 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 993 } 994 } 995 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 996 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 997 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 998 } else { 999 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 1000 } 1001 PetscFunctionReturn(0); 1002 } 1003 1004 #undef __FUNC__ 1005 #define __FUNC__ "MatGetInfo_MPIAIJ" 1006 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1007 { 1008 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1009 Mat A = mat->A, B = mat->B; 1010 int ierr; 1011 double isend[5], irecv[5]; 1012 1013 PetscFunctionBegin; 1014 info->block_size = 1.0; 1015 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1016 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1017 isend[3] = info->memory; isend[4] = info->mallocs; 1018 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1019 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1020 isend[3] += info->memory; isend[4] += info->mallocs; 1021 if (flag == MAT_LOCAL) { 1022 info->nz_used = isend[0]; 1023 info->nz_allocated = isend[1]; 1024 info->nz_unneeded = isend[2]; 1025 info->memory = isend[3]; 1026 info->mallocs = isend[4]; 1027 } else if (flag == MAT_GLOBAL_MAX) { 1028 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1029 info->nz_used = irecv[0]; 1030 info->nz_allocated = irecv[1]; 1031 info->nz_unneeded = irecv[2]; 1032 info->memory = irecv[3]; 1033 info->mallocs = irecv[4]; 1034 } else if (flag == MAT_GLOBAL_SUM) { 1035 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1036 info->nz_used = irecv[0]; 1037 info->nz_allocated = irecv[1]; 1038 info->nz_unneeded = irecv[2]; 1039 info->memory = irecv[3]; 1040 info->mallocs = irecv[4]; 1041 } 1042 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1043 info->fill_ratio_needed = 0; 1044 info->factor_mallocs = 0; 1045 info->rows_global = (double)mat->M; 1046 info->columns_global = (double)mat->N; 1047 info->rows_local = (double)mat->m; 1048 info->columns_local = (double)mat->N; 1049 1050 PetscFunctionReturn(0); 1051 } 1052 1053 #undef __FUNC__ 1054 #define __FUNC__ "MatSetOption_MPIAIJ" 1055 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1056 { 1057 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1058 int ierr; 1059 1060 PetscFunctionBegin; 1061 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1062 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1063 op == MAT_COLUMNS_UNSORTED || 1064 op == MAT_COLUMNS_SORTED || 1065 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1066 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1067 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1068 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1069 } else if (op == MAT_ROW_ORIENTED) { 1070 a->roworiented = 1; 1071 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1072 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1073 } else if (op == MAT_ROWS_SORTED || 1074 op == MAT_ROWS_UNSORTED || 1075 op == MAT_SYMMETRIC || 1076 op == MAT_STRUCTURALLY_SYMMETRIC || 1077 op == MAT_YES_NEW_DIAGONALS) 1078 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1079 else if (op == MAT_COLUMN_ORIENTED) { 1080 a->roworiented = 0; 1081 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1082 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1083 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1084 a->donotstash = 1; 1085 } else if (op == MAT_NO_NEW_DIAGONALS){ 1086 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1087 } else { 1088 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1089 } 1090 PetscFunctionReturn(0); 1091 } 1092 1093 #undef __FUNC__ 1094 #define __FUNC__ "MatGetSize_MPIAIJ" 1095 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1096 { 1097 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1098 1099 PetscFunctionBegin; 1100 if (m) *m = mat->M; 1101 if (n) *n = mat->N; 1102 PetscFunctionReturn(0); 1103 } 1104 1105 #undef __FUNC__ 1106 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1107 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1108 { 1109 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1110 1111 PetscFunctionBegin; 1112 if (m) *m = mat->m; 1113 if (n) *n = mat->n; 1114 PetscFunctionReturn(0); 1115 } 1116 1117 #undef __FUNC__ 1118 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1119 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1120 { 1121 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1122 1123 PetscFunctionBegin; 1124 *m = mat->rstart; *n = mat->rend; 1125 PetscFunctionReturn(0); 1126 } 1127 1128 #undef __FUNC__ 1129 #define __FUNC__ "MatGetRow_MPIAIJ" 1130 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1131 { 1132 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1133 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1134 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1135 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1136 int *cmap, *idx_p; 1137 1138 PetscFunctionBegin; 1139 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1140 mat->getrowactive = PETSC_TRUE; 1141 1142 if (!mat->rowvalues && (idx || v)) { 1143 /* 1144 allocate enough space to hold information from the longest row. 1145 */ 1146 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1147 int max = 1,m = mat->m,tmp; 1148 for ( i=0; i<m; i++ ) { 1149 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1150 if (max < tmp) { max = tmp; } 1151 } 1152 mat->rowvalues = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1153 mat->rowindices = (int *) (mat->rowvalues + max); 1154 } 1155 1156 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1157 lrow = row - rstart; 1158 1159 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1160 if (!v) {pvA = 0; pvB = 0;} 1161 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1162 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1163 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1164 nztot = nzA + nzB; 1165 1166 cmap = mat->garray; 1167 if (v || idx) { 1168 if (nztot) { 1169 /* Sort by increasing column numbers, assuming A and B already sorted */ 1170 int imark = -1; 1171 if (v) { 1172 *v = v_p = mat->rowvalues; 1173 for ( i=0; i<nzB; i++ ) { 1174 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1175 else break; 1176 } 1177 imark = i; 1178 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1179 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1180 } 1181 if (idx) { 1182 *idx = idx_p = mat->rowindices; 1183 if (imark > -1) { 1184 for ( i=0; i<imark; i++ ) { 1185 idx_p[i] = cmap[cworkB[i]]; 1186 } 1187 } else { 1188 for ( i=0; i<nzB; i++ ) { 1189 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1190 else break; 1191 } 1192 imark = i; 1193 } 1194 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1195 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1196 } 1197 } else { 1198 if (idx) *idx = 0; 1199 if (v) *v = 0; 1200 } 1201 } 1202 *nz = nztot; 1203 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1204 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1205 PetscFunctionReturn(0); 1206 } 1207 1208 #undef __FUNC__ 1209 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1210 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1211 { 1212 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1213 1214 PetscFunctionBegin; 1215 if (aij->getrowactive == PETSC_FALSE) { 1216 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1217 } 1218 aij->getrowactive = PETSC_FALSE; 1219 PetscFunctionReturn(0); 1220 } 1221 1222 #undef __FUNC__ 1223 #define __FUNC__ "MatNorm_MPIAIJ" 1224 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1225 { 1226 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1227 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1228 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1229 double sum = 0.0; 1230 Scalar *v; 1231 1232 PetscFunctionBegin; 1233 if (aij->size == 1) { 1234 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1235 } else { 1236 if (type == NORM_FROBENIUS) { 1237 v = amat->a; 1238 for (i=0; i<amat->nz; i++ ) { 1239 #if defined(PETSC_USE_COMPLEX) 1240 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1241 #else 1242 sum += (*v)*(*v); v++; 1243 #endif 1244 } 1245 v = bmat->a; 1246 for (i=0; i<bmat->nz; i++ ) { 1247 #if defined(PETSC_USE_COMPLEX) 1248 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1249 #else 1250 sum += (*v)*(*v); v++; 1251 #endif 1252 } 1253 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1254 *norm = sqrt(*norm); 1255 } else if (type == NORM_1) { /* max column norm */ 1256 double *tmp, *tmp2; 1257 int *jj, *garray = aij->garray; 1258 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp); 1259 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp2); 1260 ierr = PetscMemzero(tmp,aij->N*sizeof(double));CHKERRQ(ierr); 1261 *norm = 0.0; 1262 v = amat->a; jj = amat->j; 1263 for ( j=0; j<amat->nz; j++ ) { 1264 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1265 } 1266 v = bmat->a; jj = bmat->j; 1267 for ( j=0; j<bmat->nz; j++ ) { 1268 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1269 } 1270 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1271 for ( j=0; j<aij->N; j++ ) { 1272 if (tmp2[j] > *norm) *norm = tmp2[j]; 1273 } 1274 ierr = PetscFree(tmp);CHKERRQ(ierr); 1275 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1276 } else if (type == NORM_INFINITY) { /* max row norm */ 1277 double ntemp = 0.0; 1278 for ( j=0; j<amat->m; j++ ) { 1279 v = amat->a + amat->i[j] + shift; 1280 sum = 0.0; 1281 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1282 sum += PetscAbsScalar(*v); v++; 1283 } 1284 v = bmat->a + bmat->i[j] + shift; 1285 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1286 sum += PetscAbsScalar(*v); v++; 1287 } 1288 if (sum > ntemp) ntemp = sum; 1289 } 1290 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1291 } else { 1292 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1293 } 1294 } 1295 PetscFunctionReturn(0); 1296 } 1297 1298 #undef __FUNC__ 1299 #define __FUNC__ "MatTranspose_MPIAIJ" 1300 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1301 { 1302 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1303 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1304 int ierr,shift = Aloc->indexshift; 1305 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1306 Mat B; 1307 Scalar *array; 1308 1309 PetscFunctionBegin; 1310 if (matout == PETSC_NULL && M != N) { 1311 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1312 } 1313 1314 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1315 1316 /* copy over the A part */ 1317 Aloc = (Mat_SeqAIJ*) a->A->data; 1318 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1319 row = a->rstart; 1320 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1321 for ( i=0; i<m; i++ ) { 1322 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1323 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1324 } 1325 aj = Aloc->j; 1326 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1327 1328 /* copy over the B part */ 1329 Aloc = (Mat_SeqAIJ*) a->B->data; 1330 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1331 row = a->rstart; 1332 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) );CHKPTRQ(cols); 1333 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1334 for ( i=0; i<m; i++ ) { 1335 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1336 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1337 } 1338 ierr = PetscFree(ct);CHKERRQ(ierr); 1339 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1340 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1341 if (matout != PETSC_NULL) { 1342 *matout = B; 1343 } else { 1344 PetscOps *Abops; 1345 struct _MatOps *Aops; 1346 1347 /* This isn't really an in-place transpose .... but free data structures from a */ 1348 ierr = PetscFree(a->rowners);CHKERRQ(ierr); 1349 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1350 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1351 #if defined (PETSC_USE_CTABLE) 1352 if (a->colmap) {ierr = PetscTableDelete(a->colmap);CHKERRQ(ierr);} 1353 #else 1354 if (a->colmap) {ierr = PetscFree(a->colmap);CHKERRQ(ierr);} 1355 #endif 1356 if (a->garray) {ierr = PetscFree(a->garray);CHKERRQ(ierr);} 1357 if (a->lvec) VecDestroy(a->lvec); 1358 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1359 ierr = PetscFree(a);CHKERRQ(ierr); 1360 1361 /* 1362 This is horrible, horrible code. We need to keep the 1363 A pointers for the bops and ops but copy everything 1364 else from C. 1365 */ 1366 Abops = A->bops; 1367 Aops = A->ops; 1368 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1369 A->bops = Abops; 1370 A->ops = Aops; 1371 PetscHeaderDestroy(B); 1372 } 1373 PetscFunctionReturn(0); 1374 } 1375 1376 #undef __FUNC__ 1377 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1378 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1379 { 1380 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1381 Mat a = aij->A, b = aij->B; 1382 int ierr,s1,s2,s3; 1383 1384 PetscFunctionBegin; 1385 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1386 if (rr) { 1387 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1388 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1389 /* Overlap communication with computation. */ 1390 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1391 } 1392 if (ll) { 1393 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1394 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1395 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1396 } 1397 /* scale the diagonal block */ 1398 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1399 1400 if (rr) { 1401 /* Do a scatter end and then right scale the off-diagonal block */ 1402 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1403 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1404 } 1405 1406 PetscFunctionReturn(0); 1407 } 1408 1409 1410 #undef __FUNC__ 1411 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1412 int MatPrintHelp_MPIAIJ(Mat A) 1413 { 1414 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1415 int ierr; 1416 1417 PetscFunctionBegin; 1418 if (!a->rank) { 1419 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1420 } 1421 PetscFunctionReturn(0); 1422 } 1423 1424 #undef __FUNC__ 1425 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1426 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1427 { 1428 PetscFunctionBegin; 1429 *bs = 1; 1430 PetscFunctionReturn(0); 1431 } 1432 #undef __FUNC__ 1433 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1434 int MatSetUnfactored_MPIAIJ(Mat A) 1435 { 1436 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1437 int ierr; 1438 1439 PetscFunctionBegin; 1440 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1441 PetscFunctionReturn(0); 1442 } 1443 1444 #undef __FUNC__ 1445 #define __FUNC__ "MatEqual_MPIAIJ" 1446 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1447 { 1448 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1449 Mat a, b, c, d; 1450 PetscTruth flg; 1451 int ierr; 1452 1453 PetscFunctionBegin; 1454 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1455 a = matA->A; b = matA->B; 1456 c = matB->A; d = matB->B; 1457 1458 ierr = MatEqual(a, c, &flg);CHKERRQ(ierr); 1459 if (flg == PETSC_TRUE) { 1460 ierr = MatEqual(b, d, &flg);CHKERRQ(ierr); 1461 } 1462 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1463 PetscFunctionReturn(0); 1464 } 1465 1466 #undef __FUNC__ 1467 #define __FUNC__ "MatCopy_MPIAIJ" 1468 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1469 { 1470 int ierr; 1471 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1472 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1473 1474 PetscFunctionBegin; 1475 if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) { 1476 /* because of the column compression in the off-processor part of the matrix a->B, 1477 the number of columns in a->B and b->B may be different, hence we cannot call 1478 the MatCopy() directly on the two parts. If need be, we can provide a more 1479 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1480 then copying the submatrices */ 1481 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1482 } else { 1483 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1484 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1485 } 1486 PetscFunctionReturn(0); 1487 } 1488 1489 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1490 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1491 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1492 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **); 1493 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *); 1494 1495 /* -------------------------------------------------------------------*/ 1496 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1497 MatGetRow_MPIAIJ, 1498 MatRestoreRow_MPIAIJ, 1499 MatMult_MPIAIJ, 1500 MatMultAdd_MPIAIJ, 1501 MatMultTrans_MPIAIJ, 1502 MatMultTransAdd_MPIAIJ, 1503 0, 1504 0, 1505 0, 1506 0, 1507 0, 1508 0, 1509 MatRelax_MPIAIJ, 1510 MatTranspose_MPIAIJ, 1511 MatGetInfo_MPIAIJ, 1512 MatEqual_MPIAIJ, 1513 MatGetDiagonal_MPIAIJ, 1514 MatDiagonalScale_MPIAIJ, 1515 MatNorm_MPIAIJ, 1516 MatAssemblyBegin_MPIAIJ, 1517 MatAssemblyEnd_MPIAIJ, 1518 0, 1519 MatSetOption_MPIAIJ, 1520 MatZeroEntries_MPIAIJ, 1521 MatZeroRows_MPIAIJ, 1522 0, 1523 0, 1524 0, 1525 0, 1526 MatGetSize_MPIAIJ, 1527 MatGetLocalSize_MPIAIJ, 1528 MatGetOwnershipRange_MPIAIJ, 1529 0, 1530 0, 1531 0, 1532 0, 1533 MatDuplicate_MPIAIJ, 1534 0, 1535 0, 1536 0, 1537 0, 1538 0, 1539 MatGetSubMatrices_MPIAIJ, 1540 MatIncreaseOverlap_MPIAIJ, 1541 MatGetValues_MPIAIJ, 1542 MatCopy_MPIAIJ, 1543 MatPrintHelp_MPIAIJ, 1544 MatScale_MPIAIJ, 1545 0, 1546 0, 1547 0, 1548 MatGetBlockSize_MPIAIJ, 1549 0, 1550 0, 1551 0, 1552 0, 1553 MatFDColoringCreate_MPIAIJ, 1554 0, 1555 MatSetUnfactored_MPIAIJ, 1556 0, 1557 0, 1558 MatGetSubMatrix_MPIAIJ, 1559 0, 1560 0, 1561 MatGetMaps_Petsc}; 1562 1563 /* ----------------------------------------------------------------------------------------*/ 1564 1565 EXTERN_C_BEGIN 1566 #undef __FUNC__ 1567 #define __FUNC__ "MatStoreValues_MPIAIJ" 1568 int MatStoreValues_MPIAIJ(Mat mat) 1569 { 1570 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1571 int ierr; 1572 1573 PetscFunctionBegin; 1574 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1575 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1576 PetscFunctionReturn(0); 1577 } 1578 EXTERN_C_END 1579 1580 EXTERN_C_BEGIN 1581 #undef __FUNC__ 1582 #define __FUNC__ "MatRetrieveValues_MPIAIJ" 1583 int MatRetrieveValues_MPIAIJ(Mat mat) 1584 { 1585 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1586 int ierr; 1587 1588 PetscFunctionBegin; 1589 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1590 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1591 PetscFunctionReturn(0); 1592 } 1593 EXTERN_C_END 1594 1595 #include "pc.h" 1596 EXTERN_C_BEGIN 1597 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1598 EXTERN_C_END 1599 1600 #undef __FUNC__ 1601 #define __FUNC__ "MatCreateMPIAIJ" 1602 /*@C 1603 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1604 (the default parallel PETSc format). For good matrix assembly performance 1605 the user should preallocate the matrix storage by setting the parameters 1606 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1607 performance can be increased by more than a factor of 50. 1608 1609 Collective on MPI_Comm 1610 1611 Input Parameters: 1612 + comm - MPI communicator 1613 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1614 This value should be the same as the local size used in creating the 1615 y vector for the matrix-vector product y = Ax. 1616 . n - This value should be the same as the local size used in creating the 1617 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1618 calculated if N is given) For square matrices n is almost always m. 1619 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1620 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1621 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1622 (same value is used for all local rows) 1623 . d_nnz - array containing the number of nonzeros in the various rows of the 1624 DIAGONAL portion of the local submatrix (possibly different for each row) 1625 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1626 The size of this array is equal to the number of local rows, i.e 'm'. 1627 You must leave room for the diagonal entry even if it is zero. 1628 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 1629 submatrix (same value is used for all local rows). 1630 - o_nnz - array containing the number of nonzeros in the various rows of the 1631 OFF-DIAGONAL portion of the local submatrix (possibly different for 1632 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 1633 structure. The size of this array is equal to the number 1634 of local rows, i.e 'm'. 1635 1636 Output Parameter: 1637 . A - the matrix 1638 1639 Notes: 1640 m,n,M,N parameters specify the size of the matrix, and its partitioning across 1641 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 1642 storage requirements for this matrix. 1643 1644 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 1645 processor than it must be used on all processors that share the object for 1646 that argument. 1647 1648 The AIJ format (also called the Yale sparse matrix format or 1649 compressed row storage), is fully compatible with standard Fortran 77 1650 storage. That is, the stored row and column indices can begin at 1651 either one (as in Fortran) or zero. See the users manual for details. 1652 1653 The user MUST specify either the local or global matrix dimensions 1654 (possibly both). 1655 1656 The parallel matrix is partitioned such that the first m0 rows belong to 1657 process 0, the next m1 rows belong to process 1, the next m2 rows belong 1658 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 1659 1660 The DIAGONAL portion of the local submatrix of a processor can be defined 1661 as the submatrix which is obtained by extraction the part corresponding 1662 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 1663 first row that belongs to the processor, and r2 is the last row belonging 1664 to the this processor. This is a square mxm matrix. The remaining portion 1665 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 1666 1667 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 1668 1669 By default, this format uses inodes (identical nodes) when possible. 1670 We search for consecutive rows with the same nonzero structure, thereby 1671 reusing matrix information to achieve increased efficiency. 1672 1673 Options Database Keys: 1674 + -mat_aij_no_inode - Do not use inodes 1675 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1676 - -mat_aij_oneindex - Internally use indexing starting at 1 1677 rather than 0. Note that when calling MatSetValues(), 1678 the user still MUST index entries starting at 0! 1679 . -mat_mpi - use the parallel matrix data structures even on one processor 1680 (defaults to using SeqBAIJ format on one processor) 1681 . -mat_mpi - use the parallel matrix data structures even on one processor 1682 (defaults to using SeqAIJ format on one processor) 1683 1684 1685 Example usage: 1686 1687 Consider the following 8x8 matrix with 34 non-zero values, that is 1688 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 1689 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 1690 as follows: 1691 1692 .vb 1693 1 2 0 | 0 3 0 | 0 4 1694 Proc0 0 5 6 | 7 0 0 | 8 0 1695 9 0 10 | 11 0 0 | 12 0 1696 ------------------------------------- 1697 13 0 14 | 15 16 17 | 0 0 1698 Proc1 0 18 0 | 19 20 21 | 0 0 1699 0 0 0 | 22 23 0 | 24 0 1700 ------------------------------------- 1701 Proc2 25 26 27 | 0 0 28 | 29 0 1702 30 0 0 | 31 32 33 | 0 34 1703 .ve 1704 1705 This can be represented as a collection of submatrices as: 1706 1707 .vb 1708 A B C 1709 D E F 1710 G H I 1711 .ve 1712 1713 Where the submatrices A,B,C are owned by proc0, D,E,F are 1714 owned by proc1, G,H,I are owned by proc2. 1715 1716 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1717 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1718 The 'M','N' parameters are 8,8, and have the same values on all procs. 1719 1720 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 1721 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 1722 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 1723 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 1724 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 1725 matrix, ans [DF] as another SeqAIJ matrix. 1726 1727 When d_nz, o_nz parameters are specified, d_nz storage elements are 1728 allocated for every row of the local diagonal submatrix, and o_nz 1729 storage locations are allocated for every row of the OFF-DIAGONAL submat. 1730 One way to choose d_nz and o_nz is to use the max nonzerors per local 1731 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 1732 In this case, the values of d_nz,o_nz are: 1733 .vb 1734 proc0 : dnz = 2, o_nz = 2 1735 proc1 : dnz = 3, o_nz = 2 1736 proc2 : dnz = 1, o_nz = 4 1737 .ve 1738 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 1739 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 1740 for proc3. i.e we are using 12+15+10=37 storage locations to store 1741 34 values. 1742 1743 When d_nnz, o_nnz parameters are specified, the storage is specified 1744 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 1745 In the above case the values for d_nnz,o_nnz are: 1746 .vb 1747 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 1748 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 1749 proc2: d_nnz = [1,1] and o_nnz = [4,4] 1750 .ve 1751 Here the space allocated is sum of all the above values i.e 34, and 1752 hence pre-allocation is perfect. 1753 1754 Level: intermediate 1755 1756 .keywords: matrix, aij, compressed row, sparse, parallel 1757 1758 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1759 @*/ 1760 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1761 { 1762 Mat B; 1763 Mat_MPIAIJ *b; 1764 int ierr, i,size,flag1 = 0, flag2 = 0; 1765 1766 PetscFunctionBegin; 1767 1768 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -2: value %d",d_nz); 1769 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -2: value %d",o_nz); 1770 if (d_nnz) { 1771 for (i=0; i<m; i++) { 1772 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1773 } 1774 } 1775 if (o_nnz) { 1776 for (i=0; i<m; i++) { 1777 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1778 } 1779 } 1780 1781 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1782 ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1);CHKERRQ(ierr); 1783 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 1784 if (!flag1 && !flag2 && size == 1) { 1785 if (M == PETSC_DECIDE) M = m; 1786 if (N == PETSC_DECIDE) N = n; 1787 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 1788 PetscFunctionReturn(0); 1789 } 1790 1791 *A = 0; 1792 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView); 1793 PLogObjectCreate(B); 1794 B->data = (void *) (b = PetscNew(Mat_MPIAIJ));CHKPTRQ(b); 1795 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1796 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1797 B->ops->destroy = MatDestroy_MPIAIJ; 1798 B->ops->view = MatView_MPIAIJ; 1799 B->factor = 0; 1800 B->assembled = PETSC_FALSE; 1801 B->mapping = 0; 1802 1803 B->insertmode = NOT_SET_VALUES; 1804 b->size = size; 1805 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 1806 1807 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1808 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1809 } 1810 1811 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1812 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1813 b->m = m; B->m = m; 1814 b->n = n; B->n = n; 1815 b->N = N; B->N = N; 1816 b->M = M; B->M = M; 1817 1818 /* the information in the maps duplicates the information computed below, eventually 1819 we should remove the duplicate information that is not contained in the maps */ 1820 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 1821 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 1822 1823 /* build local table of row and column ownerships */ 1824 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 1825 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1826 b->cowners = b->rowners + b->size + 2; 1827 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1828 b->rowners[0] = 0; 1829 for ( i=2; i<=b->size; i++ ) { 1830 b->rowners[i] += b->rowners[i-1]; 1831 } 1832 b->rstart = b->rowners[b->rank]; 1833 b->rend = b->rowners[b->rank+1]; 1834 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1835 b->cowners[0] = 0; 1836 for ( i=2; i<=b->size; i++ ) { 1837 b->cowners[i] += b->cowners[i-1]; 1838 } 1839 b->cstart = b->cowners[b->rank]; 1840 b->cend = b->cowners[b->rank+1]; 1841 1842 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1843 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1844 PLogObjectParent(B,b->A); 1845 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1846 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1847 PLogObjectParent(B,b->B); 1848 1849 /* build cache for off array entries formed */ 1850 ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr); 1851 b->donotstash = 0; 1852 b->colmap = 0; 1853 b->garray = 0; 1854 b->roworiented = 1; 1855 1856 /* stuff used for matrix vector multiply */ 1857 b->lvec = 0; 1858 b->Mvctx = 0; 1859 1860 /* stuff for MatGetRow() */ 1861 b->rowindices = 0; 1862 b->rowvalues = 0; 1863 b->getrowactive = PETSC_FALSE; 1864 1865 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 1866 "MatStoreValues_MPIAIJ", 1867 (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1868 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 1869 "MatRetrieveValues_MPIAIJ", 1870 (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1871 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 1872 "MatGetDiagonalBlock_MPIAIJ", 1873 (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1874 *A = B; 1875 PetscFunctionReturn(0); 1876 } 1877 1878 #undef __FUNC__ 1879 #define __FUNC__ "MatDuplicate_MPIAIJ" 1880 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1881 { 1882 Mat mat; 1883 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1884 int ierr, len=0, flg; 1885 1886 PetscFunctionBegin; 1887 *newmat = 0; 1888 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView); 1889 PLogObjectCreate(mat); 1890 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ));CHKPTRQ(a); 1891 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1892 mat->ops->destroy = MatDestroy_MPIAIJ; 1893 mat->ops->view = MatView_MPIAIJ; 1894 mat->factor = matin->factor; 1895 mat->assembled = PETSC_TRUE; 1896 mat->insertmode = NOT_SET_VALUES; 1897 1898 a->m = mat->m = oldmat->m; 1899 a->n = mat->n = oldmat->n; 1900 a->M = mat->M = oldmat->M; 1901 a->N = mat->N = oldmat->N; 1902 1903 a->rstart = oldmat->rstart; 1904 a->rend = oldmat->rend; 1905 a->cstart = oldmat->cstart; 1906 a->cend = oldmat->cend; 1907 a->size = oldmat->size; 1908 a->rank = oldmat->rank; 1909 a->donotstash = oldmat->donotstash; 1910 a->roworiented = oldmat->roworiented; 1911 a->rowindices = 0; 1912 a->rowvalues = 0; 1913 a->getrowactive = PETSC_FALSE; 1914 1915 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 1916 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1917 a->cowners = a->rowners + a->size + 2; 1918 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1919 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1920 if (oldmat->colmap) { 1921 #if defined (PETSC_USE_CTABLE) 1922 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1923 #else 1924 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1925 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1926 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));CHKERRQ(ierr); 1927 #endif 1928 } else a->colmap = 0; 1929 if (oldmat->garray) { 1930 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1931 a->garray = (int *) PetscMalloc((len+1)*sizeof(int));CHKPTRQ(a->garray); 1932 PLogObjectMemory(mat,len*sizeof(int)); 1933 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1934 } else a->garray = 0; 1935 1936 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1937 PLogObjectParent(mat,a->lvec); 1938 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1939 PLogObjectParent(mat,a->Mvctx); 1940 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1941 PLogObjectParent(mat,a->A); 1942 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1943 PLogObjectParent(mat,a->B); 1944 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 1945 if (flg) { 1946 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 1947 } 1948 ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1949 *newmat = mat; 1950 PetscFunctionReturn(0); 1951 } 1952 1953 #include "sys.h" 1954 1955 #undef __FUNC__ 1956 #define __FUNC__ "MatLoad_MPIAIJ" 1957 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1958 { 1959 Mat A; 1960 Scalar *vals,*svals; 1961 MPI_Comm comm = ((PetscObject)viewer)->comm; 1962 MPI_Status status; 1963 int i, nz, ierr, j,rstart, rend, fd; 1964 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1965 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1966 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1967 1968 PetscFunctionBegin; 1969 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1970 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1971 if (!rank) { 1972 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1973 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1974 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1975 if (header[3] < 0) { 1976 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 1977 } 1978 } 1979 1980 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1981 M = header[1]; N = header[2]; 1982 /* determine ownership of all rows */ 1983 m = M/size + ((M % size) > rank); 1984 rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1985 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1986 rowners[0] = 0; 1987 for ( i=2; i<=size; i++ ) { 1988 rowners[i] += rowners[i-1]; 1989 } 1990 rstart = rowners[rank]; 1991 rend = rowners[rank+1]; 1992 1993 /* distribute row lengths to all processors */ 1994 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens); 1995 offlens = ourlens + (rend-rstart); 1996 if (!rank) { 1997 rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths); 1998 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1999 sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 2000 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 2001 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2002 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2003 } else { 2004 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 2005 } 2006 2007 if (!rank) { 2008 /* calculate the number of nonzeros on each processor */ 2009 procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 2010 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2011 for ( i=0; i<size; i++ ) { 2012 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 2013 procsnz[i] += rowlengths[j]; 2014 } 2015 } 2016 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2017 2018 /* determine max buffer needed and allocate it */ 2019 maxnz = 0; 2020 for ( i=0; i<size; i++ ) { 2021 maxnz = PetscMax(maxnz,procsnz[i]); 2022 } 2023 cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 2024 2025 /* read in my part of the matrix column indices */ 2026 nz = procsnz[0]; 2027 mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 2028 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2029 2030 /* read in every one elses and ship off */ 2031 for ( i=1; i<size; i++ ) { 2032 nz = procsnz[i]; 2033 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2034 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2035 } 2036 ierr = PetscFree(cols);CHKERRQ(ierr); 2037 } else { 2038 /* determine buffer space needed for message */ 2039 nz = 0; 2040 for ( i=0; i<m; i++ ) { 2041 nz += ourlens[i]; 2042 } 2043 mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 2044 2045 /* receive message of column indices*/ 2046 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2047 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2048 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2049 } 2050 2051 /* determine column ownership if matrix is not square */ 2052 if (N != M) { 2053 n = N/size + ((N % size) > rank); 2054 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2055 cstart = cend - n; 2056 } else { 2057 cstart = rstart; 2058 cend = rend; 2059 n = cend - cstart; 2060 } 2061 2062 /* loop over local rows, determining number of off diagonal entries */ 2063 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2064 jj = 0; 2065 for ( i=0; i<m; i++ ) { 2066 for ( j=0; j<ourlens[i]; j++ ) { 2067 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2068 jj++; 2069 } 2070 } 2071 2072 /* create our matrix */ 2073 for ( i=0; i<m; i++ ) { 2074 ourlens[i] -= offlens[i]; 2075 } 2076 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 2077 A = *newmat; 2078 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2079 for ( i=0; i<m; i++ ) { 2080 ourlens[i] += offlens[i]; 2081 } 2082 2083 if (!rank) { 2084 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals); 2085 2086 /* read in my part of the matrix numerical values */ 2087 nz = procsnz[0]; 2088 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2089 2090 /* insert into matrix */ 2091 jj = rstart; 2092 smycols = mycols; 2093 svals = vals; 2094 for ( i=0; i<m; i++ ) { 2095 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2096 smycols += ourlens[i]; 2097 svals += ourlens[i]; 2098 jj++; 2099 } 2100 2101 /* read in other processors and ship out */ 2102 for ( i=1; i<size; i++ ) { 2103 nz = procsnz[i]; 2104 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2105 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2106 } 2107 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2108 } else { 2109 /* receive numeric values */ 2110 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals); 2111 2112 /* receive message of values*/ 2113 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2114 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2115 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2116 2117 /* insert into matrix */ 2118 jj = rstart; 2119 smycols = mycols; 2120 svals = vals; 2121 for ( i=0; i<m; i++ ) { 2122 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2123 smycols += ourlens[i]; 2124 svals += ourlens[i]; 2125 jj++; 2126 } 2127 } 2128 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2129 ierr = PetscFree(vals);CHKERRQ(ierr); 2130 ierr = PetscFree(mycols);CHKERRQ(ierr); 2131 ierr = PetscFree(rowners);CHKERRQ(ierr); 2132 2133 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2134 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2135 PetscFunctionReturn(0); 2136 } 2137 2138 #undef __FUNC__ 2139 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 2140 /* 2141 Not great since it makes two copies of the submatrix, first an SeqAIJ 2142 in local and then by concatenating the local matrices the end result. 2143 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2144 */ 2145 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2146 { 2147 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2148 Mat *local,M, Mreuse; 2149 Scalar *vwork,*aa; 2150 MPI_Comm comm = mat->comm; 2151 Mat_SeqAIJ *aij; 2152 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 2153 2154 PetscFunctionBegin; 2155 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2156 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2157 2158 if (call == MAT_REUSE_MATRIX) { 2159 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2160 if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse"); 2161 local = &Mreuse; 2162 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2163 } else { 2164 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2165 Mreuse = *local; 2166 ierr = PetscFree(local);CHKERRQ(ierr); 2167 } 2168 2169 /* 2170 m - number of local rows 2171 n - number of columns (same on all processors) 2172 rstart - first row in new global matrix generated 2173 */ 2174 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2175 if (call == MAT_INITIAL_MATRIX) { 2176 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2177 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2178 ii = aij->i; 2179 jj = aij->j; 2180 2181 /* 2182 Determine the number of non-zeros in the diagonal and off-diagonal 2183 portions of the matrix in order to do correct preallocation 2184 */ 2185 2186 /* first get start and end of "diagonal" columns */ 2187 if (csize == PETSC_DECIDE) { 2188 nlocal = n/size + ((n % size) > rank); 2189 } else { 2190 nlocal = csize; 2191 } 2192 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2193 rstart = rend - nlocal; 2194 if (rank == size - 1 && rend != n) { 2195 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 2196 } 2197 2198 /* next, compute all the lengths */ 2199 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 2200 olens = dlens + m; 2201 for ( i=0; i<m; i++ ) { 2202 jend = ii[i+1] - ii[i]; 2203 olen = 0; 2204 dlen = 0; 2205 for ( j=0; j<jend; j++ ) { 2206 if ( *jj < rstart || *jj >= rend) olen++; 2207 else dlen++; 2208 jj++; 2209 } 2210 olens[i] = olen; 2211 dlens[i] = dlen; 2212 } 2213 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2214 ierr = PetscFree(dlens);CHKERRQ(ierr); 2215 } else { 2216 int ml,nl; 2217 2218 M = *newmat; 2219 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2220 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2221 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2222 /* 2223 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2224 rather than the slower MatSetValues(). 2225 */ 2226 M->was_assembled = PETSC_TRUE; 2227 M->assembled = PETSC_FALSE; 2228 } 2229 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2230 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2231 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2232 ii = aij->i; 2233 jj = aij->j; 2234 aa = aij->a; 2235 for (i=0; i<m; i++) { 2236 row = rstart + i; 2237 nz = ii[i+1] - ii[i]; 2238 cwork = jj; jj += nz; 2239 vwork = aa; aa += nz; 2240 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2241 } 2242 2243 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2244 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2245 *newmat = M; 2246 2247 /* save submatrix used in processor for next request */ 2248 if (call == MAT_INITIAL_MATRIX) { 2249 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2250 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2251 } 2252 2253 PetscFunctionReturn(0); 2254 } 2255