1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNCT__ 22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 26 int n = aij->B->n,i,ierr; 27 28 PetscFunctionBegin; 29 #if defined (PETSC_USE_CTABLE) 30 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 31 for (i=0; i<n; i++){ 32 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 33 } 34 #else 35 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 36 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 37 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 38 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 39 #endif 40 PetscFunctionReturn(0); 41 } 42 43 #define CHUNKSIZE 15 44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 45 { \ 46 \ 47 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 48 rmax = aimax[row]; nrow = ailen[row]; \ 49 col1 = col - shift; \ 50 \ 51 low = 0; high = nrow; \ 52 while (high-low > 5) { \ 53 t = (low+high)/2; \ 54 if (rp[t] > col) high = t; \ 55 else low = t; \ 56 } \ 57 for (_i=low; _i<high; _i++) { \ 58 if (rp[_i] > col1) break; \ 59 if (rp[_i] == col1) { \ 60 if (addv == ADD_VALUES) ap[_i] += value; \ 61 else ap[_i] = value; \ 62 goto a_noinsert; \ 63 } \ 64 } \ 65 if (nonew == 1) goto a_noinsert; \ 66 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 67 if (nrow >= rmax) { \ 68 /* there is no extra room in row, therefore enlarge */ \ 69 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 70 PetscScalar *new_a; \ 71 \ 72 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 73 \ 74 /* malloc new storage space */ \ 75 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 76 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 77 new_j = (int*)(new_a + new_nz); \ 78 new_i = new_j + new_nz; \ 79 \ 80 /* copy over old data into new slots */ \ 81 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 82 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 83 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 84 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 85 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 86 len*sizeof(int));CHKERRQ(ierr); \ 87 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 89 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 90 /* free up old matrix storage */ \ 91 \ 92 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 93 if (!a->singlemalloc) { \ 94 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 95 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 96 } \ 97 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 98 a->singlemalloc = PETSC_TRUE; \ 99 \ 100 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 101 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 102 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 103 a->maxnz += CHUNKSIZE; \ 104 a->reallocs++; \ 105 } \ 106 N = nrow++ - 1; a->nz++; \ 107 /* shift up all the later entries in this row */ \ 108 for (ii=N; ii>=_i; ii--) { \ 109 rp[ii+1] = rp[ii]; \ 110 ap[ii+1] = ap[ii]; \ 111 } \ 112 rp[_i] = col1; \ 113 ap[_i] = value; \ 114 a_noinsert: ; \ 115 ailen[row] = nrow; \ 116 } 117 118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 119 { \ 120 \ 121 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 122 rmax = bimax[row]; nrow = bilen[row]; \ 123 col1 = col - shift; \ 124 \ 125 low = 0; high = nrow; \ 126 while (high-low > 5) { \ 127 t = (low+high)/2; \ 128 if (rp[t] > col) high = t; \ 129 else low = t; \ 130 } \ 131 for (_i=low; _i<high; _i++) { \ 132 if (rp[_i] > col1) break; \ 133 if (rp[_i] == col1) { \ 134 if (addv == ADD_VALUES) ap[_i] += value; \ 135 else ap[_i] = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (nonew == 1) goto b_noinsert; \ 140 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 141 if (nrow >= rmax) { \ 142 /* there is no extra room in row, therefore enlarge */ \ 143 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 144 PetscScalar *new_a; \ 145 \ 146 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 147 \ 148 /* malloc new storage space */ \ 149 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 150 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 151 new_j = (int*)(new_a + new_nz); \ 152 new_i = new_j + new_nz; \ 153 \ 154 /* copy over old data into new slots */ \ 155 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 156 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 157 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 158 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 159 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 160 len*sizeof(int));CHKERRQ(ierr); \ 161 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 163 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 164 /* free up old matrix storage */ \ 165 \ 166 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 167 if (!b->singlemalloc) { \ 168 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 169 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 170 } \ 171 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 172 b->singlemalloc = PETSC_TRUE; \ 173 \ 174 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 175 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 176 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 177 b->maxnz += CHUNKSIZE; \ 178 b->reallocs++; \ 179 } \ 180 N = nrow++ - 1; b->nz++; \ 181 /* shift up all the later entries in this row */ \ 182 for (ii=N; ii>=_i; ii--) { \ 183 rp[ii+1] = rp[ii]; \ 184 ap[ii+1] = ap[ii]; \ 185 } \ 186 rp[_i] = col1; \ 187 ap[_i] = value; \ 188 b_noinsert: ; \ 189 bilen[row] = nrow; \ 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatSetValues_MPIAIJ" 194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 195 { 196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 197 PetscScalar value; 198 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 199 int cstart = aij->cstart,cend = aij->cend,row,col; 200 PetscTruth roworiented = aij->roworiented; 201 202 /* Some Variables required in the macro */ 203 Mat A = aij->A; 204 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 205 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 206 PetscScalar *aa = a->a; 207 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 208 Mat B = aij->B; 209 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 210 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 211 PetscScalar *ba = b->a; 212 213 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 214 int nonew = a->nonew,shift = a->indexshift; 215 PetscScalar *ap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_BOPT_g) 221 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 222 #endif 223 if (im[i] >= rstart && im[i] < rend) { 224 row = im[i] - rstart; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart && in[j] < cend){ 227 col = in[j] - cstart; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 if (ignorezeroentries && value == 0.0) continue; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_BOPT_g) 234 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!aij->colmap) { 239 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 243 col--; 244 #else 245 col = aij->colmap[in[j]] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 248 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 251 B = aij->B; 252 b = (Mat_SeqAIJ*)B->data; 253 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 254 ba = b->a; 255 } 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 if (ignorezeroentries && value == 0.0) continue; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 if (ignorezeroentries && v[i*n] == 0.0) continue; 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 268 } else { 269 if (ignorezeroentries && v[i] == 0.0) continue; 270 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 271 } 272 } 273 } 274 } 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatGetValues_MPIAIJ" 280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 283 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 284 int cstart = aij->cstart,cend = aij->cend,row,col; 285 286 PetscFunctionBegin; 287 for (i=0; i<m; i++) { 288 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 289 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 290 if (idxm[i] >= rstart && idxm[i] < rend) { 291 row = idxm[i] - rstart; 292 for (j=0; j<n; j++) { 293 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 294 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 295 if (idxn[j] >= cstart && idxn[j] < cend){ 296 col = idxn[j] - cstart; 297 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 298 } else { 299 if (!aij->colmap) { 300 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 #if defined (PETSC_USE_CTABLE) 303 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 304 col --; 305 #else 306 col = aij->colmap[idxn[j]] - 1; 307 #endif 308 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 309 else { 310 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 311 } 312 } 313 } 314 } else { 315 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 316 } 317 } 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 324 { 325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 326 int ierr,nstash,reallocs; 327 InsertMode addv; 328 329 PetscFunctionBegin; 330 if (aij->donotstash) { 331 PetscFunctionReturn(0); 332 } 333 334 /* make sure all processors are either in INSERTMODE or ADDMODE */ 335 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 336 if (addv == (ADD_VALUES|INSERT_VALUES)) { 337 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 338 } 339 mat->insertmode = addv; /* in case this processor had no cache */ 340 341 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 342 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 343 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 344 PetscFunctionReturn(0); 345 } 346 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 351 { 352 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 353 int i,j,rstart,ncols,n,ierr,flg; 354 int *row,*col,other_disassembled; 355 PetscScalar *val; 356 InsertMode addv = mat->insertmode; 357 358 PetscFunctionBegin; 359 if (!aij->donotstash) { 360 while (1) { 361 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 362 if (!flg) break; 363 364 for (i=0; i<n;) { 365 /* Now identify the consecutive vals belonging to the same row */ 366 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 367 if (j < n) ncols = j-i; 368 else ncols = n-i; 369 /* Now assemble all these values with a single function call */ 370 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 371 i = j; 372 } 373 } 374 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 375 } 376 377 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 378 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 379 380 /* determine if any processor has disassembled, if so we must 381 also disassemble ourselfs, in order that we may reassemble. */ 382 /* 383 if nonzero structure of submatrix B cannot change then we know that 384 no processor disassembled thus we can skip this stuff 385 */ 386 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 387 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 388 if (mat->was_assembled && !other_disassembled) { 389 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 390 } 391 } 392 393 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 394 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 395 } 396 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 398 399 if (aij->rowvalues) { 400 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 401 aij->rowvalues = 0; 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 408 int MatZeroEntries_MPIAIJ(Mat A) 409 { 410 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 411 int ierr; 412 413 PetscFunctionBegin; 414 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 415 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 416 PetscFunctionReturn(0); 417 } 418 419 #undef __FUNCT__ 420 #define __FUNCT__ "MatZeroRows_MPIAIJ" 421 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag) 422 { 423 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 424 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 425 int *procs,*nprocs,j,idx,nsends,*work,row; 426 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 427 int *rvalues,tag = A->tag,count,base,slen,n,*source; 428 int *lens,imdex,*lrows,*values,rstart=l->rstart; 429 MPI_Comm comm = A->comm; 430 MPI_Request *send_waits,*recv_waits; 431 MPI_Status recv_status,*send_status; 432 IS istmp; 433 PetscTruth found; 434 435 PetscFunctionBegin; 436 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 437 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 438 439 /* first count number of contributors to each processor */ 440 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 441 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 442 procs = nprocs + size; 443 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 444 for (i=0; i<N; i++) { 445 idx = rows[i]; 446 found = PETSC_FALSE; 447 for (j=0; j<size; j++) { 448 if (idx >= owners[j] && idx < owners[j+1]) { 449 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 450 } 451 } 452 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 453 } 454 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 455 456 /* inform other processors of number of messages and max length*/ 457 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 458 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 459 nrecvs = work[size+rank]; 460 nmax = work[rank]; 461 ierr = PetscFree(work);CHKERRQ(ierr); 462 463 /* post receives: */ 464 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 465 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 466 for (i=0; i<nrecvs; i++) { 467 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 468 } 469 470 /* do sends: 471 1) starts[i] gives the starting index in svalues for stuff going to 472 the ith processor 473 */ 474 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 475 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 476 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 477 starts[0] = 0; 478 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 479 for (i=0; i<N; i++) { 480 svalues[starts[owner[i]]++] = rows[i]; 481 } 482 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 483 484 starts[0] = 0; 485 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 486 count = 0; 487 for (i=0; i<size; i++) { 488 if (procs[i]) { 489 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 490 } 491 } 492 ierr = PetscFree(starts);CHKERRQ(ierr); 493 494 base = owners[rank]; 495 496 /* wait on receives */ 497 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 498 source = lens + nrecvs; 499 count = nrecvs; slen = 0; 500 while (count) { 501 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 502 /* unpack receives into our local space */ 503 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 504 source[imdex] = recv_status.MPI_SOURCE; 505 lens[imdex] = n; 506 slen += n; 507 count--; 508 } 509 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 510 511 /* move the data into the send scatter */ 512 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 513 count = 0; 514 for (i=0; i<nrecvs; i++) { 515 values = rvalues + i*nmax; 516 for (j=0; j<lens[i]; j++) { 517 lrows[count++] = values[j] - base; 518 } 519 } 520 ierr = PetscFree(rvalues);CHKERRQ(ierr); 521 ierr = PetscFree(lens);CHKERRQ(ierr); 522 ierr = PetscFree(owner);CHKERRQ(ierr); 523 ierr = PetscFree(nprocs);CHKERRQ(ierr); 524 525 /* actually zap the local rows */ 526 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 527 PetscLogObjectParent(A,istmp); 528 529 /* 530 Zero the required rows. If the "diagonal block" of the matrix 531 is square and the user wishes to set the diagonal we use seperate 532 code so that MatSetValues() is not called for each diagonal allocating 533 new memory, thus calling lots of mallocs and slowing things down. 534 535 Contributed by: Mathew Knepley 536 */ 537 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 538 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 539 if (diag && (l->A->M == l->A->N)) { 540 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 541 } else if (diag) { 542 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 543 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 544 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 545 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 546 } 547 for (i = 0; i < slen; i++) { 548 row = lrows[i] + rstart; 549 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 550 } 551 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 552 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 553 } else { 554 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 555 } 556 ierr = ISDestroy(istmp);CHKERRQ(ierr); 557 ierr = PetscFree(lrows);CHKERRQ(ierr); 558 559 /* wait on sends */ 560 if (nsends) { 561 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 562 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 563 ierr = PetscFree(send_status);CHKERRQ(ierr); 564 } 565 ierr = PetscFree(send_waits);CHKERRQ(ierr); 566 ierr = PetscFree(svalues);CHKERRQ(ierr); 567 568 PetscFunctionReturn(0); 569 } 570 571 #undef __FUNCT__ 572 #define __FUNCT__ "MatMult_MPIAIJ" 573 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 574 { 575 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 576 int ierr,nt; 577 578 PetscFunctionBegin; 579 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 580 if (nt != A->n) { 581 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 582 } 583 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 584 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 585 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 586 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 587 PetscFunctionReturn(0); 588 } 589 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatMultAdd_MPIAIJ" 592 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 593 { 594 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 595 int ierr; 596 597 PetscFunctionBegin; 598 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 599 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 600 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 601 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 602 PetscFunctionReturn(0); 603 } 604 605 #undef __FUNCT__ 606 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 607 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 608 { 609 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 610 int ierr; 611 612 PetscFunctionBegin; 613 /* do nondiagonal part */ 614 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 615 /* send it on its way */ 616 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 617 /* do local part */ 618 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 619 /* receive remote parts: note this assumes the values are not actually */ 620 /* inserted in yy until the next line, which is true for my implementation*/ 621 /* but is not perhaps always true. */ 622 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 628 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 629 { 630 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 631 int ierr; 632 633 PetscFunctionBegin; 634 /* do nondiagonal part */ 635 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 636 /* send it on its way */ 637 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 638 /* do local part */ 639 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 640 /* receive remote parts: note this assumes the values are not actually */ 641 /* inserted in yy until the next line, which is true for my implementation*/ 642 /* but is not perhaps always true. */ 643 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 644 PetscFunctionReturn(0); 645 } 646 647 /* 648 This only works correctly for square matrices where the subblock A->A is the 649 diagonal block 650 */ 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 653 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 654 { 655 int ierr; 656 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 657 658 PetscFunctionBegin; 659 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 660 if (a->rstart != a->cstart || a->rend != a->cend) { 661 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 662 } 663 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 664 PetscFunctionReturn(0); 665 } 666 667 #undef __FUNCT__ 668 #define __FUNCT__ "MatScale_MPIAIJ" 669 int MatScale_MPIAIJ(PetscScalar *aa,Mat A) 670 { 671 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 672 int ierr; 673 674 PetscFunctionBegin; 675 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 676 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 677 PetscFunctionReturn(0); 678 } 679 680 #undef __FUNCT__ 681 #define __FUNCT__ "MatDestroy_MPIAIJ" 682 int MatDestroy_MPIAIJ(Mat mat) 683 { 684 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 685 int ierr; 686 687 PetscFunctionBegin; 688 #if defined(PETSC_USE_LOG) 689 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 690 #endif 691 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 692 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 693 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 694 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 695 #if defined (PETSC_USE_CTABLE) 696 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 697 #else 698 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 699 #endif 700 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 701 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 702 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 703 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 704 ierr = PetscFree(aij);CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 710 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 711 { 712 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 713 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 714 int ierr,shift = C->indexshift,rank = aij->rank,size = aij->size; 715 PetscTruth isdraw,isascii,flg; 716 PetscViewer sviewer; 717 PetscViewerFormat format; 718 719 PetscFunctionBegin; 720 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 721 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 722 if (isascii) { 723 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 724 if (format == PETSC_VIEWER_ASCII_INFO_LONG) { 725 MatInfo info; 726 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 727 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 728 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 729 if (flg) { 730 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 731 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 732 } else { 733 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 734 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 735 } 736 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 737 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 738 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 739 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 740 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 741 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 742 PetscFunctionReturn(0); 743 } else if (format == PETSC_VIEWER_ASCII_INFO) { 744 PetscFunctionReturn(0); 745 } 746 } else if (isdraw) { 747 PetscDraw draw; 748 PetscTruth isnull; 749 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 750 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 751 } 752 753 if (size == 1) { 754 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 755 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 756 } else { 757 /* assemble the entire matrix onto first processor. */ 758 Mat A; 759 Mat_SeqAIJ *Aloc; 760 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 761 PetscScalar *a; 762 763 if (!rank) { 764 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 765 } else { 766 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 767 } 768 PetscLogObjectParent(mat,A); 769 770 /* copy over the A part */ 771 Aloc = (Mat_SeqAIJ*)aij->A->data; 772 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 773 row = aij->rstart; 774 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 775 for (i=0; i<m; i++) { 776 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 777 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 778 } 779 aj = Aloc->j; 780 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 781 782 /* copy over the B part */ 783 Aloc = (Mat_SeqAIJ*)aij->B->data; 784 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 785 row = aij->rstart; 786 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 787 ct = cols; 788 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 789 for (i=0; i<m; i++) { 790 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 791 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 792 } 793 ierr = PetscFree(ct);CHKERRQ(ierr); 794 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 795 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 796 /* 797 Everyone has to call to draw the matrix since the graphics waits are 798 synchronized across all processors that share the PetscDraw object 799 */ 800 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 801 if (!rank) { 802 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 803 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 804 } 805 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 806 ierr = MatDestroy(A);CHKERRQ(ierr); 807 } 808 PetscFunctionReturn(0); 809 } 810 811 #undef __FUNCT__ 812 #define __FUNCT__ "MatView_MPIAIJ" 813 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 814 { 815 int ierr; 816 PetscTruth isascii,isdraw,issocket,isbinary; 817 818 PetscFunctionBegin; 819 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 820 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 821 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 822 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 823 if (isascii || isdraw || isbinary || issocket) { 824 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 825 } else { 826 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 827 } 828 PetscFunctionReturn(0); 829 } 830 831 832 833 #undef __FUNCT__ 834 #define __FUNCT__ "MatRelax_MPIAIJ" 835 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 836 { 837 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 838 int ierr; 839 Vec bb1; 840 PetscReal mone=-1.0; 841 842 PetscFunctionBegin; 843 844 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 845 846 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 847 if (flag & SOR_ZERO_INITIAL_GUESS) { 848 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 849 its--; 850 } 851 852 /* ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); */ 853 while (its--) { 854 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 855 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 856 857 /* update rhs: bb1 = bb - B*x */ 858 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 859 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 860 861 /* local sweep */ 862 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,(MatSORType)SOR_SYMMETRIC_SWEEP,fshift,lits,PETSC_NULL,xx); 863 CHKERRQ(ierr); 864 } 865 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 866 if (flag & SOR_ZERO_INITIAL_GUESS) { 867 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 868 its--; 869 } 870 while (its--) { 871 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 872 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 873 874 /* update rhs: bb1 = bb - B*x */ 875 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 876 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 877 878 /* local sweep */ 879 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,(MatSORType)SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 880 CHKERRQ(ierr); 881 } 882 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 883 if (flag & SOR_ZERO_INITIAL_GUESS) { 884 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 885 its--; 886 } 887 while (its--) { 888 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 889 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 890 891 /* update rhs: bb1 = bb - B*x */ 892 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 893 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 894 895 /* local sweep */ 896 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,(MatSORType)SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 897 CHKERRQ(ierr); 898 } 899 } else { 900 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 901 } 902 903 ierr = VecDestroy(bb1);CHKERRQ(ierr); 904 PetscFunctionReturn(0); 905 } 906 907 #undef __FUNCT__ 908 #define __FUNCT__ "MatGetInfo_MPIAIJ" 909 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 910 { 911 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 912 Mat A = mat->A,B = mat->B; 913 int ierr; 914 PetscReal isend[5],irecv[5]; 915 916 PetscFunctionBegin; 917 info->block_size = 1.0; 918 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 919 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 920 isend[3] = info->memory; isend[4] = info->mallocs; 921 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 922 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 923 isend[3] += info->memory; isend[4] += info->mallocs; 924 if (flag == MAT_LOCAL) { 925 info->nz_used = isend[0]; 926 info->nz_allocated = isend[1]; 927 info->nz_unneeded = isend[2]; 928 info->memory = isend[3]; 929 info->mallocs = isend[4]; 930 } else if (flag == MAT_GLOBAL_MAX) { 931 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 932 info->nz_used = irecv[0]; 933 info->nz_allocated = irecv[1]; 934 info->nz_unneeded = irecv[2]; 935 info->memory = irecv[3]; 936 info->mallocs = irecv[4]; 937 } else if (flag == MAT_GLOBAL_SUM) { 938 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 939 info->nz_used = irecv[0]; 940 info->nz_allocated = irecv[1]; 941 info->nz_unneeded = irecv[2]; 942 info->memory = irecv[3]; 943 info->mallocs = irecv[4]; 944 } 945 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 946 info->fill_ratio_needed = 0; 947 info->factor_mallocs = 0; 948 info->rows_global = (double)matin->M; 949 info->columns_global = (double)matin->N; 950 info->rows_local = (double)matin->m; 951 info->columns_local = (double)matin->N; 952 953 PetscFunctionReturn(0); 954 } 955 956 #undef __FUNCT__ 957 #define __FUNCT__ "MatSetOption_MPIAIJ" 958 int MatSetOption_MPIAIJ(Mat A,MatOption op) 959 { 960 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 961 int ierr; 962 963 PetscFunctionBegin; 964 switch (op) { 965 case MAT_NO_NEW_NONZERO_LOCATIONS: 966 case MAT_YES_NEW_NONZERO_LOCATIONS: 967 case MAT_COLUMNS_UNSORTED: 968 case MAT_COLUMNS_SORTED: 969 case MAT_NEW_NONZERO_ALLOCATION_ERR: 970 case MAT_KEEP_ZEROED_ROWS: 971 case MAT_NEW_NONZERO_LOCATION_ERR: 972 case MAT_USE_INODES: 973 case MAT_DO_NOT_USE_INODES: 974 case MAT_IGNORE_ZERO_ENTRIES: 975 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 976 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 977 break; 978 case MAT_ROW_ORIENTED: 979 a->roworiented = PETSC_TRUE; 980 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 981 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 982 break; 983 case MAT_ROWS_SORTED: 984 case MAT_ROWS_UNSORTED: 985 case MAT_YES_NEW_DIAGONALS: 986 case MAT_USE_SINGLE_PRECISION_SOLVES: 987 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 988 break; 989 case MAT_COLUMN_ORIENTED: 990 a->roworiented = PETSC_FALSE; 991 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 992 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 993 break; 994 case MAT_IGNORE_OFF_PROC_ENTRIES: 995 a->donotstash = PETSC_TRUE; 996 break; 997 case MAT_NO_NEW_DIAGONALS: 998 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 999 break; 1000 default: 1001 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1002 break; 1003 } 1004 PetscFunctionReturn(0); 1005 } 1006 1007 #undef __FUNCT__ 1008 #define __FUNCT__ "MatGetRow_MPIAIJ" 1009 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1010 { 1011 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1012 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1013 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1014 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1015 int *cmap,*idx_p; 1016 1017 PetscFunctionBegin; 1018 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1019 mat->getrowactive = PETSC_TRUE; 1020 1021 if (!mat->rowvalues && (idx || v)) { 1022 /* 1023 allocate enough space to hold information from the longest row. 1024 */ 1025 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1026 int max = 1,tmp; 1027 for (i=0; i<matin->m; i++) { 1028 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1029 if (max < tmp) { max = tmp; } 1030 } 1031 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1032 mat->rowindices = (int*)(mat->rowvalues + max); 1033 } 1034 1035 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1036 lrow = row - rstart; 1037 1038 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1039 if (!v) {pvA = 0; pvB = 0;} 1040 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1041 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1042 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1043 nztot = nzA + nzB; 1044 1045 cmap = mat->garray; 1046 if (v || idx) { 1047 if (nztot) { 1048 /* Sort by increasing column numbers, assuming A and B already sorted */ 1049 int imark = -1; 1050 if (v) { 1051 *v = v_p = mat->rowvalues; 1052 for (i=0; i<nzB; i++) { 1053 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1054 else break; 1055 } 1056 imark = i; 1057 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1058 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1059 } 1060 if (idx) { 1061 *idx = idx_p = mat->rowindices; 1062 if (imark > -1) { 1063 for (i=0; i<imark; i++) { 1064 idx_p[i] = cmap[cworkB[i]]; 1065 } 1066 } else { 1067 for (i=0; i<nzB; i++) { 1068 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1069 else break; 1070 } 1071 imark = i; 1072 } 1073 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1074 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1075 } 1076 } else { 1077 if (idx) *idx = 0; 1078 if (v) *v = 0; 1079 } 1080 } 1081 *nz = nztot; 1082 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1083 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1089 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1090 { 1091 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1092 1093 PetscFunctionBegin; 1094 if (aij->getrowactive == PETSC_FALSE) { 1095 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1096 } 1097 aij->getrowactive = PETSC_FALSE; 1098 PetscFunctionReturn(0); 1099 } 1100 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "MatNorm_MPIAIJ" 1103 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1104 { 1105 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1106 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1107 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1108 PetscReal sum = 0.0; 1109 PetscScalar *v; 1110 1111 PetscFunctionBegin; 1112 if (aij->size == 1) { 1113 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1114 } else { 1115 if (type == NORM_FROBENIUS) { 1116 v = amat->a; 1117 for (i=0; i<amat->nz; i++) { 1118 #if defined(PETSC_USE_COMPLEX) 1119 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1120 #else 1121 sum += (*v)*(*v); v++; 1122 #endif 1123 } 1124 v = bmat->a; 1125 for (i=0; i<bmat->nz; i++) { 1126 #if defined(PETSC_USE_COMPLEX) 1127 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1128 #else 1129 sum += (*v)*(*v); v++; 1130 #endif 1131 } 1132 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1133 *norm = sqrt(*norm); 1134 } else if (type == NORM_1) { /* max column norm */ 1135 PetscReal *tmp,*tmp2; 1136 int *jj,*garray = aij->garray; 1137 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1138 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1139 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1140 *norm = 0.0; 1141 v = amat->a; jj = amat->j; 1142 for (j=0; j<amat->nz; j++) { 1143 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1144 } 1145 v = bmat->a; jj = bmat->j; 1146 for (j=0; j<bmat->nz; j++) { 1147 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1148 } 1149 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1150 for (j=0; j<mat->N; j++) { 1151 if (tmp2[j] > *norm) *norm = tmp2[j]; 1152 } 1153 ierr = PetscFree(tmp);CHKERRQ(ierr); 1154 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1155 } else if (type == NORM_INFINITY) { /* max row norm */ 1156 PetscReal ntemp = 0.0; 1157 for (j=0; j<aij->A->m; j++) { 1158 v = amat->a + amat->i[j] + shift; 1159 sum = 0.0; 1160 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1161 sum += PetscAbsScalar(*v); v++; 1162 } 1163 v = bmat->a + bmat->i[j] + shift; 1164 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1165 sum += PetscAbsScalar(*v); v++; 1166 } 1167 if (sum > ntemp) ntemp = sum; 1168 } 1169 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1170 } else { 1171 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1172 } 1173 } 1174 PetscFunctionReturn(0); 1175 } 1176 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatTranspose_MPIAIJ" 1179 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1180 { 1181 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1182 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1183 int ierr,shift = Aloc->indexshift; 1184 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1185 Mat B; 1186 PetscScalar *array; 1187 1188 PetscFunctionBegin; 1189 if (!matout && M != N) { 1190 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1191 } 1192 1193 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1194 1195 /* copy over the A part */ 1196 Aloc = (Mat_SeqAIJ*)a->A->data; 1197 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1198 row = a->rstart; 1199 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1200 for (i=0; i<m; i++) { 1201 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1202 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1203 } 1204 aj = Aloc->j; 1205 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1206 1207 /* copy over the B part */ 1208 Aloc = (Mat_SeqAIJ*)a->B->data; 1209 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1210 row = a->rstart; 1211 ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr); 1212 ct = cols; 1213 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1214 for (i=0; i<m; i++) { 1215 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1216 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1217 } 1218 ierr = PetscFree(ct);CHKERRQ(ierr); 1219 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1220 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1221 if (matout) { 1222 *matout = B; 1223 } else { 1224 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1225 } 1226 PetscFunctionReturn(0); 1227 } 1228 1229 #undef __FUNCT__ 1230 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1231 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1232 { 1233 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1234 Mat a = aij->A,b = aij->B; 1235 int ierr,s1,s2,s3; 1236 1237 PetscFunctionBegin; 1238 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1239 if (rr) { 1240 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1241 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1242 /* Overlap communication with computation. */ 1243 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1244 } 1245 if (ll) { 1246 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1247 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1248 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1249 } 1250 /* scale the diagonal block */ 1251 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1252 1253 if (rr) { 1254 /* Do a scatter end and then right scale the off-diagonal block */ 1255 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1256 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1257 } 1258 1259 PetscFunctionReturn(0); 1260 } 1261 1262 1263 #undef __FUNCT__ 1264 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1265 int MatPrintHelp_MPIAIJ(Mat A) 1266 { 1267 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1268 int ierr; 1269 1270 PetscFunctionBegin; 1271 if (!a->rank) { 1272 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1273 } 1274 PetscFunctionReturn(0); 1275 } 1276 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1279 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1280 { 1281 PetscFunctionBegin; 1282 *bs = 1; 1283 PetscFunctionReturn(0); 1284 } 1285 #undef __FUNCT__ 1286 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1287 int MatSetUnfactored_MPIAIJ(Mat A) 1288 { 1289 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1290 int ierr; 1291 1292 PetscFunctionBegin; 1293 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1294 PetscFunctionReturn(0); 1295 } 1296 1297 #undef __FUNCT__ 1298 #define __FUNCT__ "MatEqual_MPIAIJ" 1299 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1300 { 1301 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1302 Mat a,b,c,d; 1303 PetscTruth flg; 1304 int ierr; 1305 1306 PetscFunctionBegin; 1307 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1308 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1309 a = matA->A; b = matA->B; 1310 c = matB->A; d = matB->B; 1311 1312 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1313 if (flg == PETSC_TRUE) { 1314 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1315 } 1316 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1317 PetscFunctionReturn(0); 1318 } 1319 1320 #undef __FUNCT__ 1321 #define __FUNCT__ "MatCopy_MPIAIJ" 1322 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1323 { 1324 int ierr; 1325 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1326 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1327 PetscTruth flg; 1328 1329 PetscFunctionBegin; 1330 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1331 if (str != SAME_NONZERO_PATTERN || !flg) { 1332 /* because of the column compression in the off-processor part of the matrix a->B, 1333 the number of columns in a->B and b->B may be different, hence we cannot call 1334 the MatCopy() directly on the two parts. If need be, we can provide a more 1335 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1336 then copying the submatrices */ 1337 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1338 } else { 1339 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1340 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1341 } 1342 PetscFunctionReturn(0); 1343 } 1344 1345 #undef __FUNCT__ 1346 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1347 int MatSetUpPreallocation_MPIAIJ(Mat A) 1348 { 1349 int ierr; 1350 1351 PetscFunctionBegin; 1352 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1353 PetscFunctionReturn(0); 1354 } 1355 1356 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1357 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1358 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1359 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1360 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1361 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1362 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*); 1363 #endif 1364 1365 /* -------------------------------------------------------------------*/ 1366 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1367 MatGetRow_MPIAIJ, 1368 MatRestoreRow_MPIAIJ, 1369 MatMult_MPIAIJ, 1370 MatMultAdd_MPIAIJ, 1371 MatMultTranspose_MPIAIJ, 1372 MatMultTransposeAdd_MPIAIJ, 1373 0, 1374 0, 1375 0, 1376 0, 1377 0, 1378 0, 1379 MatRelax_MPIAIJ, 1380 MatTranspose_MPIAIJ, 1381 MatGetInfo_MPIAIJ, 1382 MatEqual_MPIAIJ, 1383 MatGetDiagonal_MPIAIJ, 1384 MatDiagonalScale_MPIAIJ, 1385 MatNorm_MPIAIJ, 1386 MatAssemblyBegin_MPIAIJ, 1387 MatAssemblyEnd_MPIAIJ, 1388 0, 1389 MatSetOption_MPIAIJ, 1390 MatZeroEntries_MPIAIJ, 1391 MatZeroRows_MPIAIJ, 1392 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1393 MatLUFactorSymbolic_MPIAIJ_TFS, 1394 #else 1395 0, 1396 #endif 1397 0, 1398 0, 1399 0, 1400 MatSetUpPreallocation_MPIAIJ, 1401 0, 1402 0, 1403 0, 1404 0, 1405 MatDuplicate_MPIAIJ, 1406 0, 1407 0, 1408 0, 1409 0, 1410 0, 1411 MatGetSubMatrices_MPIAIJ, 1412 MatIncreaseOverlap_MPIAIJ, 1413 MatGetValues_MPIAIJ, 1414 MatCopy_MPIAIJ, 1415 MatPrintHelp_MPIAIJ, 1416 MatScale_MPIAIJ, 1417 0, 1418 0, 1419 0, 1420 MatGetBlockSize_MPIAIJ, 1421 0, 1422 0, 1423 0, 1424 0, 1425 MatFDColoringCreate_MPIAIJ, 1426 0, 1427 MatSetUnfactored_MPIAIJ, 1428 0, 1429 0, 1430 MatGetSubMatrix_MPIAIJ, 1431 MatDestroy_MPIAIJ, 1432 MatView_MPIAIJ, 1433 MatGetPetscMaps_Petsc, 1434 0, 1435 0, 1436 0, 1437 0, 1438 0, 1439 0, 1440 0, 1441 0, 1442 MatSetColoring_MPIAIJ, 1443 MatSetValuesAdic_MPIAIJ, 1444 MatSetValuesAdifor_MPIAIJ 1445 }; 1446 1447 /* ----------------------------------------------------------------------------------------*/ 1448 1449 EXTERN_C_BEGIN 1450 #undef __FUNCT__ 1451 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1452 int MatStoreValues_MPIAIJ(Mat mat) 1453 { 1454 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1455 int ierr; 1456 1457 PetscFunctionBegin; 1458 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1459 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1460 PetscFunctionReturn(0); 1461 } 1462 EXTERN_C_END 1463 1464 EXTERN_C_BEGIN 1465 #undef __FUNCT__ 1466 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1467 int MatRetrieveValues_MPIAIJ(Mat mat) 1468 { 1469 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1470 int ierr; 1471 1472 PetscFunctionBegin; 1473 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1474 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1475 PetscFunctionReturn(0); 1476 } 1477 EXTERN_C_END 1478 1479 #include "petscpc.h" 1480 EXTERN_C_BEGIN 1481 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1482 EXTERN_C_END 1483 1484 EXTERN_C_BEGIN 1485 #undef __FUNCT__ 1486 #define __FUNCT__ "MatCreate_MPIAIJ" 1487 int MatCreate_MPIAIJ(Mat B) 1488 { 1489 Mat_MPIAIJ *b; 1490 int ierr,i,size; 1491 1492 PetscFunctionBegin; 1493 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1494 1495 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1496 B->data = (void*)b; 1497 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1498 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1499 B->factor = 0; 1500 B->assembled = PETSC_FALSE; 1501 B->mapping = 0; 1502 1503 B->insertmode = NOT_SET_VALUES; 1504 b->size = size; 1505 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1506 1507 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1508 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1509 1510 /* the information in the maps duplicates the information computed below, eventually 1511 we should remove the duplicate information that is not contained in the maps */ 1512 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1513 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1514 1515 /* build local table of row and column ownerships */ 1516 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1517 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1518 b->cowners = b->rowners + b->size + 2; 1519 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1520 b->rowners[0] = 0; 1521 for (i=2; i<=b->size; i++) { 1522 b->rowners[i] += b->rowners[i-1]; 1523 } 1524 b->rstart = b->rowners[b->rank]; 1525 b->rend = b->rowners[b->rank+1]; 1526 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1527 b->cowners[0] = 0; 1528 for (i=2; i<=b->size; i++) { 1529 b->cowners[i] += b->cowners[i-1]; 1530 } 1531 b->cstart = b->cowners[b->rank]; 1532 b->cend = b->cowners[b->rank+1]; 1533 1534 /* build cache for off array entries formed */ 1535 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1536 b->donotstash = PETSC_FALSE; 1537 b->colmap = 0; 1538 b->garray = 0; 1539 b->roworiented = PETSC_TRUE; 1540 1541 /* stuff used for matrix vector multiply */ 1542 b->lvec = PETSC_NULL; 1543 b->Mvctx = PETSC_NULL; 1544 1545 /* stuff for MatGetRow() */ 1546 b->rowindices = 0; 1547 b->rowvalues = 0; 1548 b->getrowactive = PETSC_FALSE; 1549 1550 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1551 "MatStoreValues_MPIAIJ", 1552 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1553 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1554 "MatRetrieveValues_MPIAIJ", 1555 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1556 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1557 "MatGetDiagonalBlock_MPIAIJ", 1558 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1559 PetscFunctionReturn(0); 1560 } 1561 EXTERN_C_END 1562 1563 #undef __FUNCT__ 1564 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1565 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1566 { 1567 Mat mat; 1568 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1569 int ierr; 1570 1571 PetscFunctionBegin; 1572 *newmat = 0; 1573 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1574 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1575 a = (Mat_MPIAIJ*)mat->data; 1576 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1577 mat->factor = matin->factor; 1578 mat->assembled = PETSC_TRUE; 1579 mat->insertmode = NOT_SET_VALUES; 1580 mat->preallocated = PETSC_TRUE; 1581 1582 a->rstart = oldmat->rstart; 1583 a->rend = oldmat->rend; 1584 a->cstart = oldmat->cstart; 1585 a->cend = oldmat->cend; 1586 a->size = oldmat->size; 1587 a->rank = oldmat->rank; 1588 a->donotstash = oldmat->donotstash; 1589 a->roworiented = oldmat->roworiented; 1590 a->rowindices = 0; 1591 a->rowvalues = 0; 1592 a->getrowactive = PETSC_FALSE; 1593 1594 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1595 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1596 if (oldmat->colmap) { 1597 #if defined (PETSC_USE_CTABLE) 1598 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1599 #else 1600 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1601 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1602 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1603 #endif 1604 } else a->colmap = 0; 1605 if (oldmat->garray) { 1606 int len; 1607 len = oldmat->B->n; 1608 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1609 PetscLogObjectMemory(mat,len*sizeof(int)); 1610 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1611 } else a->garray = 0; 1612 1613 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1614 PetscLogObjectParent(mat,a->lvec); 1615 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1616 PetscLogObjectParent(mat,a->Mvctx); 1617 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1618 PetscLogObjectParent(mat,a->A); 1619 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1620 PetscLogObjectParent(mat,a->B); 1621 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1622 *newmat = mat; 1623 PetscFunctionReturn(0); 1624 } 1625 1626 #include "petscsys.h" 1627 1628 EXTERN_C_BEGIN 1629 #undef __FUNCT__ 1630 #define __FUNCT__ "MatLoad_MPIAIJ" 1631 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1632 { 1633 Mat A; 1634 PetscScalar *vals,*svals; 1635 MPI_Comm comm = ((PetscObject)viewer)->comm; 1636 MPI_Status status; 1637 int i,nz,ierr,j,rstart,rend,fd; 1638 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1639 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1640 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1641 1642 PetscFunctionBegin; 1643 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1644 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1645 if (!rank) { 1646 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1647 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1648 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1649 if (header[3] < 0) { 1650 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1651 } 1652 } 1653 1654 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1655 M = header[1]; N = header[2]; 1656 /* determine ownership of all rows */ 1657 m = M/size + ((M % size) > rank); 1658 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1659 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1660 rowners[0] = 0; 1661 for (i=2; i<=size; i++) { 1662 rowners[i] += rowners[i-1]; 1663 } 1664 rstart = rowners[rank]; 1665 rend = rowners[rank+1]; 1666 1667 /* distribute row lengths to all processors */ 1668 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1669 offlens = ourlens + (rend-rstart); 1670 if (!rank) { 1671 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1672 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1673 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1674 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1675 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1676 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1677 } else { 1678 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1679 } 1680 1681 if (!rank) { 1682 /* calculate the number of nonzeros on each processor */ 1683 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1684 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1685 for (i=0; i<size; i++) { 1686 for (j=rowners[i]; j< rowners[i+1]; j++) { 1687 procsnz[i] += rowlengths[j]; 1688 } 1689 } 1690 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1691 1692 /* determine max buffer needed and allocate it */ 1693 maxnz = 0; 1694 for (i=0; i<size; i++) { 1695 maxnz = PetscMax(maxnz,procsnz[i]); 1696 } 1697 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1698 1699 /* read in my part of the matrix column indices */ 1700 nz = procsnz[0]; 1701 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1702 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1703 1704 /* read in every one elses and ship off */ 1705 for (i=1; i<size; i++) { 1706 nz = procsnz[i]; 1707 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1708 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1709 } 1710 ierr = PetscFree(cols);CHKERRQ(ierr); 1711 } else { 1712 /* determine buffer space needed for message */ 1713 nz = 0; 1714 for (i=0; i<m; i++) { 1715 nz += ourlens[i]; 1716 } 1717 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1718 1719 /* receive message of column indices*/ 1720 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1721 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1722 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1723 } 1724 1725 /* determine column ownership if matrix is not square */ 1726 if (N != M) { 1727 n = N/size + ((N % size) > rank); 1728 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1729 cstart = cend - n; 1730 } else { 1731 cstart = rstart; 1732 cend = rend; 1733 n = cend - cstart; 1734 } 1735 1736 /* loop over local rows, determining number of off diagonal entries */ 1737 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1738 jj = 0; 1739 for (i=0; i<m; i++) { 1740 for (j=0; j<ourlens[i]; j++) { 1741 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1742 jj++; 1743 } 1744 } 1745 1746 /* create our matrix */ 1747 for (i=0; i<m; i++) { 1748 ourlens[i] -= offlens[i]; 1749 } 1750 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1751 A = *newmat; 1752 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1753 for (i=0; i<m; i++) { 1754 ourlens[i] += offlens[i]; 1755 } 1756 1757 if (!rank) { 1758 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1759 1760 /* read in my part of the matrix numerical values */ 1761 nz = procsnz[0]; 1762 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1763 1764 /* insert into matrix */ 1765 jj = rstart; 1766 smycols = mycols; 1767 svals = vals; 1768 for (i=0; i<m; i++) { 1769 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1770 smycols += ourlens[i]; 1771 svals += ourlens[i]; 1772 jj++; 1773 } 1774 1775 /* read in other processors and ship out */ 1776 for (i=1; i<size; i++) { 1777 nz = procsnz[i]; 1778 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1779 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1780 } 1781 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1782 } else { 1783 /* receive numeric values */ 1784 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1785 1786 /* receive message of values*/ 1787 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1788 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1789 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1790 1791 /* insert into matrix */ 1792 jj = rstart; 1793 smycols = mycols; 1794 svals = vals; 1795 for (i=0; i<m; i++) { 1796 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1797 smycols += ourlens[i]; 1798 svals += ourlens[i]; 1799 jj++; 1800 } 1801 } 1802 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1803 ierr = PetscFree(vals);CHKERRQ(ierr); 1804 ierr = PetscFree(mycols);CHKERRQ(ierr); 1805 ierr = PetscFree(rowners);CHKERRQ(ierr); 1806 1807 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1808 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1809 PetscFunctionReturn(0); 1810 } 1811 EXTERN_C_END 1812 1813 #undef __FUNCT__ 1814 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 1815 /* 1816 Not great since it makes two copies of the submatrix, first an SeqAIJ 1817 in local and then by concatenating the local matrices the end result. 1818 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1819 */ 1820 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 1821 { 1822 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1823 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend; 1824 Mat *local,M,Mreuse; 1825 PetscScalar *vwork,*aa; 1826 MPI_Comm comm = mat->comm; 1827 Mat_SeqAIJ *aij; 1828 1829 1830 PetscFunctionBegin; 1831 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1832 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1833 1834 if (call == MAT_REUSE_MATRIX) { 1835 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1836 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 1837 local = &Mreuse; 1838 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1839 } else { 1840 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1841 Mreuse = *local; 1842 ierr = PetscFree(local);CHKERRQ(ierr); 1843 } 1844 1845 /* 1846 m - number of local rows 1847 n - number of columns (same on all processors) 1848 rstart - first row in new global matrix generated 1849 */ 1850 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1851 if (call == MAT_INITIAL_MATRIX) { 1852 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1853 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1854 ii = aij->i; 1855 jj = aij->j; 1856 1857 /* 1858 Determine the number of non-zeros in the diagonal and off-diagonal 1859 portions of the matrix in order to do correct preallocation 1860 */ 1861 1862 /* first get start and end of "diagonal" columns */ 1863 if (csize == PETSC_DECIDE) { 1864 nlocal = n/size + ((n % size) > rank); 1865 } else { 1866 nlocal = csize; 1867 } 1868 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1869 rstart = rend - nlocal; 1870 if (rank == size - 1 && rend != n) { 1871 SETERRQ(1,"Local column sizes do not add up to total number of columns"); 1872 } 1873 1874 /* next, compute all the lengths */ 1875 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 1876 olens = dlens + m; 1877 for (i=0; i<m; i++) { 1878 jend = ii[i+1] - ii[i]; 1879 olen = 0; 1880 dlen = 0; 1881 for (j=0; j<jend; j++) { 1882 if (*jj < rstart || *jj >= rend) olen++; 1883 else dlen++; 1884 jj++; 1885 } 1886 olens[i] = olen; 1887 dlens[i] = dlen; 1888 } 1889 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 1890 ierr = PetscFree(dlens);CHKERRQ(ierr); 1891 } else { 1892 int ml,nl; 1893 1894 M = *newmat; 1895 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1896 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1897 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1898 /* 1899 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1900 rather than the slower MatSetValues(). 1901 */ 1902 M->was_assembled = PETSC_TRUE; 1903 M->assembled = PETSC_FALSE; 1904 } 1905 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1906 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1907 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1908 ii = aij->i; 1909 jj = aij->j; 1910 aa = aij->a; 1911 for (i=0; i<m; i++) { 1912 row = rstart + i; 1913 nz = ii[i+1] - ii[i]; 1914 cwork = jj; jj += nz; 1915 vwork = aa; aa += nz; 1916 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1917 } 1918 1919 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1920 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1921 *newmat = M; 1922 1923 /* save submatrix used in processor for next request */ 1924 if (call == MAT_INITIAL_MATRIX) { 1925 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 1926 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 1927 } 1928 1929 PetscFunctionReturn(0); 1930 } 1931 1932 #undef __FUNCT__ 1933 #define __FUNCT__ "MatMPIAIJSetPreallocation" 1934 /*@C 1935 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 1936 (the default parallel PETSc format). For good matrix assembly performance 1937 the user should preallocate the matrix storage by setting the parameters 1938 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1939 performance can be increased by more than a factor of 50. 1940 1941 Collective on MPI_Comm 1942 1943 Input Parameters: 1944 + A - the matrix 1945 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1946 (same value is used for all local rows) 1947 . d_nnz - array containing the number of nonzeros in the various rows of the 1948 DIAGONAL portion of the local submatrix (possibly different for each row) 1949 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1950 The size of this array is equal to the number of local rows, i.e 'm'. 1951 You must leave room for the diagonal entry even if it is zero. 1952 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 1953 submatrix (same value is used for all local rows). 1954 - o_nnz - array containing the number of nonzeros in the various rows of the 1955 OFF-DIAGONAL portion of the local submatrix (possibly different for 1956 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 1957 structure. The size of this array is equal to the number 1958 of local rows, i.e 'm'. 1959 1960 The AIJ format (also called the Yale sparse matrix format or 1961 compressed row storage), is fully compatible with standard Fortran 77 1962 storage. That is, the stored row and column indices can begin at 1963 either one (as in Fortran) or zero. See the users manual for details. 1964 1965 The user MUST specify either the local or global matrix dimensions 1966 (possibly both). 1967 1968 The parallel matrix is partitioned such that the first m0 rows belong to 1969 process 0, the next m1 rows belong to process 1, the next m2 rows belong 1970 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 1971 1972 The DIAGONAL portion of the local submatrix of a processor can be defined 1973 as the submatrix which is obtained by extraction the part corresponding 1974 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 1975 first row that belongs to the processor, and r2 is the last row belonging 1976 to the this processor. This is a square mxm matrix. The remaining portion 1977 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 1978 1979 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 1980 1981 By default, this format uses inodes (identical nodes) when possible. 1982 We search for consecutive rows with the same nonzero structure, thereby 1983 reusing matrix information to achieve increased efficiency. 1984 1985 Options Database Keys: 1986 + -mat_aij_no_inode - Do not use inodes 1987 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1988 - -mat_aij_oneindex - Internally use indexing starting at 1 1989 rather than 0. Note that when calling MatSetValues(), 1990 the user still MUST index entries starting at 0! 1991 1992 Example usage: 1993 1994 Consider the following 8x8 matrix with 34 non-zero values, that is 1995 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 1996 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 1997 as follows: 1998 1999 .vb 2000 1 2 0 | 0 3 0 | 0 4 2001 Proc0 0 5 6 | 7 0 0 | 8 0 2002 9 0 10 | 11 0 0 | 12 0 2003 ------------------------------------- 2004 13 0 14 | 15 16 17 | 0 0 2005 Proc1 0 18 0 | 19 20 21 | 0 0 2006 0 0 0 | 22 23 0 | 24 0 2007 ------------------------------------- 2008 Proc2 25 26 27 | 0 0 28 | 29 0 2009 30 0 0 | 31 32 33 | 0 34 2010 .ve 2011 2012 This can be represented as a collection of submatrices as: 2013 2014 .vb 2015 A B C 2016 D E F 2017 G H I 2018 .ve 2019 2020 Where the submatrices A,B,C are owned by proc0, D,E,F are 2021 owned by proc1, G,H,I are owned by proc2. 2022 2023 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2024 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2025 The 'M','N' parameters are 8,8, and have the same values on all procs. 2026 2027 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2028 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2029 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2030 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2031 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2032 matrix, ans [DF] as another SeqAIJ matrix. 2033 2034 When d_nz, o_nz parameters are specified, d_nz storage elements are 2035 allocated for every row of the local diagonal submatrix, and o_nz 2036 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2037 One way to choose d_nz and o_nz is to use the max nonzerors per local 2038 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2039 In this case, the values of d_nz,o_nz are: 2040 .vb 2041 proc0 : dnz = 2, o_nz = 2 2042 proc1 : dnz = 3, o_nz = 2 2043 proc2 : dnz = 1, o_nz = 4 2044 .ve 2045 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2046 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2047 for proc3. i.e we are using 12+15+10=37 storage locations to store 2048 34 values. 2049 2050 When d_nnz, o_nnz parameters are specified, the storage is specified 2051 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2052 In the above case the values for d_nnz,o_nnz are: 2053 .vb 2054 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2055 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2056 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2057 .ve 2058 Here the space allocated is sum of all the above values i.e 34, and 2059 hence pre-allocation is perfect. 2060 2061 Level: intermediate 2062 2063 .keywords: matrix, aij, compressed row, sparse, parallel 2064 2065 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2066 @*/ 2067 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2068 { 2069 Mat_MPIAIJ *b; 2070 int ierr,i; 2071 PetscTruth flg2; 2072 2073 PetscFunctionBegin; 2074 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr); 2075 if (!flg2) PetscFunctionReturn(0); 2076 B->preallocated = PETSC_TRUE; 2077 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2078 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2079 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2080 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2081 if (d_nnz) { 2082 for (i=0; i<B->m; i++) { 2083 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 2084 } 2085 } 2086 if (o_nnz) { 2087 for (i=0; i<B->m; i++) { 2088 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 2089 } 2090 } 2091 b = (Mat_MPIAIJ*)B->data; 2092 2093 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2094 PetscLogObjectParent(B,b->A); 2095 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2096 PetscLogObjectParent(B,b->B); 2097 2098 PetscFunctionReturn(0); 2099 } 2100 2101 #undef __FUNCT__ 2102 #define __FUNCT__ "MatCreateMPIAIJ" 2103 /*@C 2104 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2105 (the default parallel PETSc format). For good matrix assembly performance 2106 the user should preallocate the matrix storage by setting the parameters 2107 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2108 performance can be increased by more than a factor of 50. 2109 2110 Collective on MPI_Comm 2111 2112 Input Parameters: 2113 + comm - MPI communicator 2114 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2115 This value should be the same as the local size used in creating the 2116 y vector for the matrix-vector product y = Ax. 2117 . n - This value should be the same as the local size used in creating the 2118 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2119 calculated if N is given) For square matrices n is almost always m. 2120 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2121 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2122 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2123 (same value is used for all local rows) 2124 . d_nnz - array containing the number of nonzeros in the various rows of the 2125 DIAGONAL portion of the local submatrix (possibly different for each row) 2126 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2127 The size of this array is equal to the number of local rows, i.e 'm'. 2128 You must leave room for the diagonal entry even if it is zero. 2129 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2130 submatrix (same value is used for all local rows). 2131 - o_nnz - array containing the number of nonzeros in the various rows of the 2132 OFF-DIAGONAL portion of the local submatrix (possibly different for 2133 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2134 structure. The size of this array is equal to the number 2135 of local rows, i.e 'm'. 2136 2137 Output Parameter: 2138 . A - the matrix 2139 2140 Notes: 2141 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2142 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2143 storage requirements for this matrix. 2144 2145 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2146 processor than it must be used on all processors that share the object for 2147 that argument. 2148 2149 The AIJ format (also called the Yale sparse matrix format or 2150 compressed row storage), is fully compatible with standard Fortran 77 2151 storage. That is, the stored row and column indices can begin at 2152 either one (as in Fortran) or zero. See the users manual for details. 2153 2154 The user MUST specify either the local or global matrix dimensions 2155 (possibly both). 2156 2157 The parallel matrix is partitioned such that the first m0 rows belong to 2158 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2159 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2160 2161 The DIAGONAL portion of the local submatrix of a processor can be defined 2162 as the submatrix which is obtained by extraction the part corresponding 2163 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2164 first row that belongs to the processor, and r2 is the last row belonging 2165 to the this processor. This is a square mxm matrix. The remaining portion 2166 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2167 2168 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2169 2170 By default, this format uses inodes (identical nodes) when possible. 2171 We search for consecutive rows with the same nonzero structure, thereby 2172 reusing matrix information to achieve increased efficiency. 2173 2174 Options Database Keys: 2175 + -mat_aij_no_inode - Do not use inodes 2176 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2177 - -mat_aij_oneindex - Internally use indexing starting at 1 2178 rather than 0. Note that when calling MatSetValues(), 2179 the user still MUST index entries starting at 0! 2180 2181 2182 Example usage: 2183 2184 Consider the following 8x8 matrix with 34 non-zero values, that is 2185 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2186 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2187 as follows: 2188 2189 .vb 2190 1 2 0 | 0 3 0 | 0 4 2191 Proc0 0 5 6 | 7 0 0 | 8 0 2192 9 0 10 | 11 0 0 | 12 0 2193 ------------------------------------- 2194 13 0 14 | 15 16 17 | 0 0 2195 Proc1 0 18 0 | 19 20 21 | 0 0 2196 0 0 0 | 22 23 0 | 24 0 2197 ------------------------------------- 2198 Proc2 25 26 27 | 0 0 28 | 29 0 2199 30 0 0 | 31 32 33 | 0 34 2200 .ve 2201 2202 This can be represented as a collection of submatrices as: 2203 2204 .vb 2205 A B C 2206 D E F 2207 G H I 2208 .ve 2209 2210 Where the submatrices A,B,C are owned by proc0, D,E,F are 2211 owned by proc1, G,H,I are owned by proc2. 2212 2213 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2214 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2215 The 'M','N' parameters are 8,8, and have the same values on all procs. 2216 2217 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2218 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2219 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2220 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2221 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2222 matrix, ans [DF] as another SeqAIJ matrix. 2223 2224 When d_nz, o_nz parameters are specified, d_nz storage elements are 2225 allocated for every row of the local diagonal submatrix, and o_nz 2226 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2227 One way to choose d_nz and o_nz is to use the max nonzerors per local 2228 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2229 In this case, the values of d_nz,o_nz are: 2230 .vb 2231 proc0 : dnz = 2, o_nz = 2 2232 proc1 : dnz = 3, o_nz = 2 2233 proc2 : dnz = 1, o_nz = 4 2234 .ve 2235 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2236 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2237 for proc3. i.e we are using 12+15+10=37 storage locations to store 2238 34 values. 2239 2240 When d_nnz, o_nnz parameters are specified, the storage is specified 2241 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2242 In the above case the values for d_nnz,o_nnz are: 2243 .vb 2244 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2245 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2246 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2247 .ve 2248 Here the space allocated is sum of all the above values i.e 34, and 2249 hence pre-allocation is perfect. 2250 2251 Level: intermediate 2252 2253 .keywords: matrix, aij, compressed row, sparse, parallel 2254 2255 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2256 @*/ 2257 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2258 { 2259 int ierr,size; 2260 2261 PetscFunctionBegin; 2262 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2263 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2264 if (size > 1) { 2265 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2266 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2267 } else { 2268 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2269 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2270 } 2271 PetscFunctionReturn(0); 2272 } 2273 2274 #undef __FUNCT__ 2275 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2276 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2277 { 2278 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2279 PetscFunctionBegin; 2280 *Ad = a->A; 2281 *Ao = a->B; 2282 *colmap = a->garray; 2283 PetscFunctionReturn(0); 2284 } 2285 2286 #undef __FUNCT__ 2287 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2288 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2289 { 2290 int ierr; 2291 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2292 2293 PetscFunctionBegin; 2294 if (coloring->ctype == IS_COLORING_LOCAL) { 2295 int *allcolors,*colors,i; 2296 ISColoring ocoloring; 2297 2298 /* set coloring for diagonal portion */ 2299 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2300 2301 /* set coloring for off-diagonal portion */ 2302 ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2303 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2304 for (i=0; i<a->B->n; i++) { 2305 colors[i] = allcolors[a->garray[i]]; 2306 } 2307 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2308 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2309 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2310 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2311 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2312 int *colors,i,*larray; 2313 ISColoring ocoloring; 2314 2315 /* set coloring for diagonal portion */ 2316 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2317 for (i=0; i<a->A->n; i++) { 2318 larray[i] = i + a->cstart; 2319 } 2320 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2321 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2322 for (i=0; i<a->A->n; i++) { 2323 colors[i] = coloring->colors[larray[i]]; 2324 } 2325 ierr = PetscFree(larray);CHKERRQ(ierr); 2326 ierr = ISColoringCreate(MPI_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2327 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2328 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2329 2330 /* set coloring for off-diagonal portion */ 2331 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2332 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2333 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2334 for (i=0; i<a->B->n; i++) { 2335 colors[i] = coloring->colors[larray[i]]; 2336 } 2337 ierr = PetscFree(larray);CHKERRQ(ierr); 2338 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2339 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2340 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2341 } else { 2342 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2343 } 2344 2345 PetscFunctionReturn(0); 2346 } 2347 2348 #undef __FUNCT__ 2349 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2350 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2351 { 2352 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2353 int ierr; 2354 2355 PetscFunctionBegin; 2356 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2357 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2358 PetscFunctionReturn(0); 2359 } 2360 2361 #undef __FUNCT__ 2362 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2363 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2364 { 2365 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2366 int ierr; 2367 2368 PetscFunctionBegin; 2369 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2370 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2371 PetscFunctionReturn(0); 2372 } 2373