1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNCT__ 22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 26 int n = aij->B->n,i,ierr; 27 28 PetscFunctionBegin; 29 #if defined (PETSC_USE_CTABLE) 30 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 31 for (i=0; i<n; i++){ 32 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 33 } 34 #else 35 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 36 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 37 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 38 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 39 #endif 40 PetscFunctionReturn(0); 41 } 42 43 #define CHUNKSIZE 15 44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 45 { \ 46 \ 47 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 48 rmax = aimax[row]; nrow = ailen[row]; \ 49 col1 = col - shift; \ 50 \ 51 low = 0; high = nrow; \ 52 while (high-low > 5) { \ 53 t = (low+high)/2; \ 54 if (rp[t] > col) high = t; \ 55 else low = t; \ 56 } \ 57 for (_i=low; _i<high; _i++) { \ 58 if (rp[_i] > col1) break; \ 59 if (rp[_i] == col1) { \ 60 if (addv == ADD_VALUES) ap[_i] += value; \ 61 else ap[_i] = value; \ 62 goto a_noinsert; \ 63 } \ 64 } \ 65 if (nonew == 1) goto a_noinsert; \ 66 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 67 if (nrow >= rmax) { \ 68 /* there is no extra room in row, therefore enlarge */ \ 69 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 70 PetscScalar *new_a; \ 71 \ 72 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 73 \ 74 /* malloc new storage space */ \ 75 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 76 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 77 new_j = (int*)(new_a + new_nz); \ 78 new_i = new_j + new_nz; \ 79 \ 80 /* copy over old data into new slots */ \ 81 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 82 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 83 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 84 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 85 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 86 len*sizeof(int));CHKERRQ(ierr); \ 87 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 89 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 90 /* free up old matrix storage */ \ 91 \ 92 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 93 if (!a->singlemalloc) { \ 94 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 95 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 96 } \ 97 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 98 a->singlemalloc = PETSC_TRUE; \ 99 \ 100 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 101 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 102 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 103 a->maxnz += CHUNKSIZE; \ 104 a->reallocs++; \ 105 } \ 106 N = nrow++ - 1; a->nz++; \ 107 /* shift up all the later entries in this row */ \ 108 for (ii=N; ii>=_i; ii--) { \ 109 rp[ii+1] = rp[ii]; \ 110 ap[ii+1] = ap[ii]; \ 111 } \ 112 rp[_i] = col1; \ 113 ap[_i] = value; \ 114 a_noinsert: ; \ 115 ailen[row] = nrow; \ 116 } 117 118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 119 { \ 120 \ 121 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 122 rmax = bimax[row]; nrow = bilen[row]; \ 123 col1 = col - shift; \ 124 \ 125 low = 0; high = nrow; \ 126 while (high-low > 5) { \ 127 t = (low+high)/2; \ 128 if (rp[t] > col) high = t; \ 129 else low = t; \ 130 } \ 131 for (_i=low; _i<high; _i++) { \ 132 if (rp[_i] > col1) break; \ 133 if (rp[_i] == col1) { \ 134 if (addv == ADD_VALUES) ap[_i] += value; \ 135 else ap[_i] = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (nonew == 1) goto b_noinsert; \ 140 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 141 if (nrow >= rmax) { \ 142 /* there is no extra room in row, therefore enlarge */ \ 143 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 144 PetscScalar *new_a; \ 145 \ 146 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 147 \ 148 /* malloc new storage space */ \ 149 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 150 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 151 new_j = (int*)(new_a + new_nz); \ 152 new_i = new_j + new_nz; \ 153 \ 154 /* copy over old data into new slots */ \ 155 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 156 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 157 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 158 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 159 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 160 len*sizeof(int));CHKERRQ(ierr); \ 161 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 163 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 164 /* free up old matrix storage */ \ 165 \ 166 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 167 if (!b->singlemalloc) { \ 168 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 169 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 170 } \ 171 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 172 b->singlemalloc = PETSC_TRUE; \ 173 \ 174 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 175 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 176 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 177 b->maxnz += CHUNKSIZE; \ 178 b->reallocs++; \ 179 } \ 180 N = nrow++ - 1; b->nz++; \ 181 /* shift up all the later entries in this row */ \ 182 for (ii=N; ii>=_i; ii--) { \ 183 rp[ii+1] = rp[ii]; \ 184 ap[ii+1] = ap[ii]; \ 185 } \ 186 rp[_i] = col1; \ 187 ap[_i] = value; \ 188 b_noinsert: ; \ 189 bilen[row] = nrow; \ 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatSetValues_MPIAIJ" 194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 195 { 196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 197 PetscScalar value; 198 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 199 int cstart = aij->cstart,cend = aij->cend,row,col; 200 PetscTruth roworiented = aij->roworiented; 201 202 /* Some Variables required in the macro */ 203 Mat A = aij->A; 204 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 205 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 206 PetscScalar *aa = a->a; 207 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 208 Mat B = aij->B; 209 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 210 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 211 PetscScalar *ba = b->a; 212 213 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 214 int nonew = a->nonew,shift = a->indexshift; 215 PetscScalar *ap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_BOPT_g) 221 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 222 #endif 223 if (im[i] >= rstart && im[i] < rend) { 224 row = im[i] - rstart; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart && in[j] < cend){ 227 col = in[j] - cstart; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 if (ignorezeroentries && value == 0.0) continue; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_BOPT_g) 234 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!aij->colmap) { 239 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 243 col--; 244 #else 245 col = aij->colmap[in[j]] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 248 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 251 B = aij->B; 252 b = (Mat_SeqAIJ*)B->data; 253 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 254 ba = b->a; 255 } 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 if (ignorezeroentries && value == 0.0) continue; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 if (ignorezeroentries && v[i*n] == 0.0) continue; 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 268 } else { 269 if (ignorezeroentries && v[i] == 0.0) continue; 270 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 271 } 272 } 273 } 274 } 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatGetValues_MPIAIJ" 280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 283 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 284 int cstart = aij->cstart,cend = aij->cend,row,col; 285 286 PetscFunctionBegin; 287 for (i=0; i<m; i++) { 288 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 289 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 290 if (idxm[i] >= rstart && idxm[i] < rend) { 291 row = idxm[i] - rstart; 292 for (j=0; j<n; j++) { 293 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 294 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 295 if (idxn[j] >= cstart && idxn[j] < cend){ 296 col = idxn[j] - cstart; 297 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 298 } else { 299 if (!aij->colmap) { 300 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 #if defined (PETSC_USE_CTABLE) 303 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 304 col --; 305 #else 306 col = aij->colmap[idxn[j]] - 1; 307 #endif 308 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 309 else { 310 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 311 } 312 } 313 } 314 } else { 315 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 316 } 317 } 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 324 { 325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 326 int ierr,nstash,reallocs; 327 InsertMode addv; 328 329 PetscFunctionBegin; 330 if (aij->donotstash) { 331 PetscFunctionReturn(0); 332 } 333 334 /* make sure all processors are either in INSERTMODE or ADDMODE */ 335 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 336 if (addv == (ADD_VALUES|INSERT_VALUES)) { 337 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 338 } 339 mat->insertmode = addv; /* in case this processor had no cache */ 340 341 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 342 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 343 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 344 PetscFunctionReturn(0); 345 } 346 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 351 { 352 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 353 int i,j,rstart,ncols,n,ierr,flg; 354 int *row,*col,other_disassembled; 355 PetscScalar *val; 356 InsertMode addv = mat->insertmode; 357 358 PetscFunctionBegin; 359 if (!aij->donotstash) { 360 while (1) { 361 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 362 if (!flg) break; 363 364 for (i=0; i<n;) { 365 /* Now identify the consecutive vals belonging to the same row */ 366 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 367 if (j < n) ncols = j-i; 368 else ncols = n-i; 369 /* Now assemble all these values with a single function call */ 370 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 371 i = j; 372 } 373 } 374 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 375 } 376 377 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 378 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 379 380 /* determine if any processor has disassembled, if so we must 381 also disassemble ourselfs, in order that we may reassemble. */ 382 /* 383 if nonzero structure of submatrix B cannot change then we know that 384 no processor disassembled thus we can skip this stuff 385 */ 386 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 387 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 388 if (mat->was_assembled && !other_disassembled) { 389 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 390 } 391 } 392 393 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 394 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 395 } 396 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 398 399 if (aij->rowvalues) { 400 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 401 aij->rowvalues = 0; 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 408 int MatZeroEntries_MPIAIJ(Mat A) 409 { 410 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 411 int ierr; 412 413 PetscFunctionBegin; 414 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 415 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 416 PetscFunctionReturn(0); 417 } 418 419 #undef __FUNCT__ 420 #define __FUNCT__ "MatZeroRows_MPIAIJ" 421 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag) 422 { 423 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 424 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 425 int *procs,*nprocs,j,idx,nsends,*work,row; 426 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 427 int *rvalues,tag = A->tag,count,base,slen,n,*source; 428 int *lens,imdex,*lrows,*values,rstart=l->rstart; 429 MPI_Comm comm = A->comm; 430 MPI_Request *send_waits,*recv_waits; 431 MPI_Status recv_status,*send_status; 432 IS istmp; 433 PetscTruth found; 434 435 PetscFunctionBegin; 436 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 437 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 438 439 /* first count number of contributors to each processor */ 440 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 441 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 442 procs = nprocs + size; 443 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 444 for (i=0; i<N; i++) { 445 idx = rows[i]; 446 found = PETSC_FALSE; 447 for (j=0; j<size; j++) { 448 if (idx >= owners[j] && idx < owners[j+1]) { 449 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 450 } 451 } 452 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 453 } 454 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 455 456 /* inform other processors of number of messages and max length*/ 457 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 458 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 459 nrecvs = work[size+rank]; 460 nmax = work[rank]; 461 ierr = PetscFree(work);CHKERRQ(ierr); 462 463 /* post receives: */ 464 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 465 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 466 for (i=0; i<nrecvs; i++) { 467 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 468 } 469 470 /* do sends: 471 1) starts[i] gives the starting index in svalues for stuff going to 472 the ith processor 473 */ 474 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 475 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 476 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 477 starts[0] = 0; 478 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 479 for (i=0; i<N; i++) { 480 svalues[starts[owner[i]]++] = rows[i]; 481 } 482 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 483 484 starts[0] = 0; 485 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 486 count = 0; 487 for (i=0; i<size; i++) { 488 if (procs[i]) { 489 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 490 } 491 } 492 ierr = PetscFree(starts);CHKERRQ(ierr); 493 494 base = owners[rank]; 495 496 /* wait on receives */ 497 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 498 source = lens + nrecvs; 499 count = nrecvs; slen = 0; 500 while (count) { 501 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 502 /* unpack receives into our local space */ 503 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 504 source[imdex] = recv_status.MPI_SOURCE; 505 lens[imdex] = n; 506 slen += n; 507 count--; 508 } 509 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 510 511 /* move the data into the send scatter */ 512 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 513 count = 0; 514 for (i=0; i<nrecvs; i++) { 515 values = rvalues + i*nmax; 516 for (j=0; j<lens[i]; j++) { 517 lrows[count++] = values[j] - base; 518 } 519 } 520 ierr = PetscFree(rvalues);CHKERRQ(ierr); 521 ierr = PetscFree(lens);CHKERRQ(ierr); 522 ierr = PetscFree(owner);CHKERRQ(ierr); 523 ierr = PetscFree(nprocs);CHKERRQ(ierr); 524 525 /* actually zap the local rows */ 526 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 527 PetscLogObjectParent(A,istmp); 528 529 /* 530 Zero the required rows. If the "diagonal block" of the matrix 531 is square and the user wishes to set the diagonal we use seperate 532 code so that MatSetValues() is not called for each diagonal allocating 533 new memory, thus calling lots of mallocs and slowing things down. 534 535 Contributed by: Mathew Knepley 536 */ 537 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 538 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 539 if (diag && (l->A->M == l->A->N)) { 540 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 541 } else if (diag) { 542 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 543 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 544 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 545 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 546 } 547 for (i = 0; i < slen; i++) { 548 row = lrows[i] + rstart; 549 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 550 } 551 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 552 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 553 } else { 554 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 555 } 556 ierr = ISDestroy(istmp);CHKERRQ(ierr); 557 ierr = PetscFree(lrows);CHKERRQ(ierr); 558 559 /* wait on sends */ 560 if (nsends) { 561 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 562 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 563 ierr = PetscFree(send_status);CHKERRQ(ierr); 564 } 565 ierr = PetscFree(send_waits);CHKERRQ(ierr); 566 ierr = PetscFree(svalues);CHKERRQ(ierr); 567 568 PetscFunctionReturn(0); 569 } 570 571 #undef __FUNCT__ 572 #define __FUNCT__ "MatMult_MPIAIJ" 573 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 574 { 575 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 576 int ierr,nt; 577 578 PetscFunctionBegin; 579 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 580 if (nt != A->n) { 581 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 582 } 583 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 584 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 585 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 586 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 587 PetscFunctionReturn(0); 588 } 589 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatMultAdd_MPIAIJ" 592 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 593 { 594 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 595 int ierr; 596 597 PetscFunctionBegin; 598 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 599 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 600 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 601 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 602 PetscFunctionReturn(0); 603 } 604 605 #undef __FUNCT__ 606 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 607 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 608 { 609 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 610 int ierr; 611 612 PetscFunctionBegin; 613 /* do nondiagonal part */ 614 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 615 /* send it on its way */ 616 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 617 /* do local part */ 618 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 619 /* receive remote parts: note this assumes the values are not actually */ 620 /* inserted in yy until the next line, which is true for my implementation*/ 621 /* but is not perhaps always true. */ 622 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 628 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 629 { 630 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 631 int ierr; 632 633 PetscFunctionBegin; 634 /* do nondiagonal part */ 635 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 636 /* send it on its way */ 637 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 638 /* do local part */ 639 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 640 /* receive remote parts: note this assumes the values are not actually */ 641 /* inserted in yy until the next line, which is true for my implementation*/ 642 /* but is not perhaps always true. */ 643 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 644 PetscFunctionReturn(0); 645 } 646 647 /* 648 This only works correctly for square matrices where the subblock A->A is the 649 diagonal block 650 */ 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 653 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 654 { 655 int ierr; 656 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 657 658 PetscFunctionBegin; 659 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 660 if (a->rstart != a->cstart || a->rend != a->cend) { 661 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 662 } 663 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 664 PetscFunctionReturn(0); 665 } 666 667 #undef __FUNCT__ 668 #define __FUNCT__ "MatScale_MPIAIJ" 669 int MatScale_MPIAIJ(PetscScalar *aa,Mat A) 670 { 671 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 672 int ierr; 673 674 PetscFunctionBegin; 675 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 676 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 677 PetscFunctionReturn(0); 678 } 679 680 #undef __FUNCT__ 681 #define __FUNCT__ "MatDestroy_MPIAIJ" 682 int MatDestroy_MPIAIJ(Mat mat) 683 { 684 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 685 int ierr; 686 687 PetscFunctionBegin; 688 #if defined(PETSC_USE_LOG) 689 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 690 #endif 691 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 692 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 693 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 694 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 695 #if defined (PETSC_USE_CTABLE) 696 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 697 #else 698 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 699 #endif 700 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 701 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 702 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 703 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 704 ierr = PetscFree(aij);CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 710 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 711 { 712 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 713 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 714 int ierr,shift = C->indexshift,rank = aij->rank,size = aij->size; 715 PetscTruth isdraw,isascii,flg; 716 PetscViewer sviewer; 717 PetscViewerFormat format; 718 719 PetscFunctionBegin; 720 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 721 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 722 if (isascii) { 723 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 724 if (format == PETSC_VIEWER_ASCII_INFO_LONG) { 725 MatInfo info; 726 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 727 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 728 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 729 if (flg) { 730 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 731 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 732 } else { 733 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 734 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 735 } 736 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 737 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 738 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 739 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 740 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 741 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 742 PetscFunctionReturn(0); 743 } else if (format == PETSC_VIEWER_ASCII_INFO) { 744 PetscFunctionReturn(0); 745 } 746 } else if (isdraw) { 747 PetscDraw draw; 748 PetscTruth isnull; 749 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 750 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 751 } 752 753 if (size == 1) { 754 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 755 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 756 } else { 757 /* assemble the entire matrix onto first processor. */ 758 Mat A; 759 Mat_SeqAIJ *Aloc; 760 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 761 PetscScalar *a; 762 763 if (!rank) { 764 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 765 } else { 766 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 767 } 768 PetscLogObjectParent(mat,A); 769 770 /* copy over the A part */ 771 Aloc = (Mat_SeqAIJ*)aij->A->data; 772 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 773 row = aij->rstart; 774 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 775 for (i=0; i<m; i++) { 776 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 777 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 778 } 779 aj = Aloc->j; 780 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 781 782 /* copy over the B part */ 783 Aloc = (Mat_SeqAIJ*)aij->B->data; 784 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 785 row = aij->rstart; 786 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 787 ct = cols; 788 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 789 for (i=0; i<m; i++) { 790 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 791 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 792 } 793 ierr = PetscFree(ct);CHKERRQ(ierr); 794 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 795 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 796 /* 797 Everyone has to call to draw the matrix since the graphics waits are 798 synchronized across all processors that share the PetscDraw object 799 */ 800 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 801 if (!rank) { 802 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 803 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 804 } 805 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 806 ierr = MatDestroy(A);CHKERRQ(ierr); 807 } 808 PetscFunctionReturn(0); 809 } 810 811 #undef __FUNCT__ 812 #define __FUNCT__ "MatView_MPIAIJ" 813 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 814 { 815 int ierr; 816 PetscTruth isascii,isdraw,issocket,isbinary; 817 818 PetscFunctionBegin; 819 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 820 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 821 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 822 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 823 if (isascii || isdraw || isbinary || issocket) { 824 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 825 } else { 826 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 827 } 828 PetscFunctionReturn(0); 829 } 830 831 832 833 #undef __FUNCT__ 834 #define __FUNCT__ "MatRelax_MPIAIJ" 835 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 836 { 837 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 838 int ierr; 839 Vec bb1; 840 PetscScalar mone=-1.0; 841 842 PetscFunctionBegin; 843 844 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 845 846 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 847 if (flag & SOR_ZERO_INITIAL_GUESS) { 848 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 849 its--; 850 } 851 852 while (its--) { 853 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 854 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 855 856 /* update rhs: bb1 = bb - B*x */ 857 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 858 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 859 860 /* local sweep */ 861 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,PETSC_NULL,xx); 862 CHKERRQ(ierr); 863 } 864 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 865 if (flag & SOR_ZERO_INITIAL_GUESS) { 866 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 867 its--; 868 } 869 while (its--) { 870 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 871 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 872 873 /* update rhs: bb1 = bb - B*x */ 874 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 875 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 876 877 /* local sweep */ 878 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 879 CHKERRQ(ierr); 880 } 881 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 882 if (flag & SOR_ZERO_INITIAL_GUESS) { 883 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 884 its--; 885 } 886 while (its--) { 887 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 888 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 889 890 /* update rhs: bb1 = bb - B*x */ 891 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 892 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 893 894 /* local sweep */ 895 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 896 CHKERRQ(ierr); 897 } 898 } else { 899 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 900 } 901 902 ierr = VecDestroy(bb1);CHKERRQ(ierr); 903 PetscFunctionReturn(0); 904 } 905 906 #undef __FUNCT__ 907 #define __FUNCT__ "MatGetInfo_MPIAIJ" 908 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 909 { 910 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 911 Mat A = mat->A,B = mat->B; 912 int ierr; 913 PetscReal isend[5],irecv[5]; 914 915 PetscFunctionBegin; 916 info->block_size = 1.0; 917 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 918 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 919 isend[3] = info->memory; isend[4] = info->mallocs; 920 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 921 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 922 isend[3] += info->memory; isend[4] += info->mallocs; 923 if (flag == MAT_LOCAL) { 924 info->nz_used = isend[0]; 925 info->nz_allocated = isend[1]; 926 info->nz_unneeded = isend[2]; 927 info->memory = isend[3]; 928 info->mallocs = isend[4]; 929 } else if (flag == MAT_GLOBAL_MAX) { 930 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 931 info->nz_used = irecv[0]; 932 info->nz_allocated = irecv[1]; 933 info->nz_unneeded = irecv[2]; 934 info->memory = irecv[3]; 935 info->mallocs = irecv[4]; 936 } else if (flag == MAT_GLOBAL_SUM) { 937 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 938 info->nz_used = irecv[0]; 939 info->nz_allocated = irecv[1]; 940 info->nz_unneeded = irecv[2]; 941 info->memory = irecv[3]; 942 info->mallocs = irecv[4]; 943 } 944 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 945 info->fill_ratio_needed = 0; 946 info->factor_mallocs = 0; 947 info->rows_global = (double)matin->M; 948 info->columns_global = (double)matin->N; 949 info->rows_local = (double)matin->m; 950 info->columns_local = (double)matin->N; 951 952 PetscFunctionReturn(0); 953 } 954 955 #undef __FUNCT__ 956 #define __FUNCT__ "MatSetOption_MPIAIJ" 957 int MatSetOption_MPIAIJ(Mat A,MatOption op) 958 { 959 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 960 int ierr; 961 962 PetscFunctionBegin; 963 switch (op) { 964 case MAT_NO_NEW_NONZERO_LOCATIONS: 965 case MAT_YES_NEW_NONZERO_LOCATIONS: 966 case MAT_COLUMNS_UNSORTED: 967 case MAT_COLUMNS_SORTED: 968 case MAT_NEW_NONZERO_ALLOCATION_ERR: 969 case MAT_KEEP_ZEROED_ROWS: 970 case MAT_NEW_NONZERO_LOCATION_ERR: 971 case MAT_USE_INODES: 972 case MAT_DO_NOT_USE_INODES: 973 case MAT_IGNORE_ZERO_ENTRIES: 974 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 975 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 976 break; 977 case MAT_ROW_ORIENTED: 978 a->roworiented = PETSC_TRUE; 979 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 980 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 981 break; 982 case MAT_ROWS_SORTED: 983 case MAT_ROWS_UNSORTED: 984 case MAT_YES_NEW_DIAGONALS: 985 case MAT_USE_SINGLE_PRECISION_SOLVES: 986 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 987 break; 988 case MAT_COLUMN_ORIENTED: 989 a->roworiented = PETSC_FALSE; 990 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 991 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 992 break; 993 case MAT_IGNORE_OFF_PROC_ENTRIES: 994 a->donotstash = PETSC_TRUE; 995 break; 996 case MAT_NO_NEW_DIAGONALS: 997 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 998 break; 999 default: 1000 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1001 break; 1002 } 1003 PetscFunctionReturn(0); 1004 } 1005 1006 #undef __FUNCT__ 1007 #define __FUNCT__ "MatGetRow_MPIAIJ" 1008 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1009 { 1010 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1011 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1012 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1013 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1014 int *cmap,*idx_p; 1015 1016 PetscFunctionBegin; 1017 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1018 mat->getrowactive = PETSC_TRUE; 1019 1020 if (!mat->rowvalues && (idx || v)) { 1021 /* 1022 allocate enough space to hold information from the longest row. 1023 */ 1024 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1025 int max = 1,tmp; 1026 for (i=0; i<matin->m; i++) { 1027 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1028 if (max < tmp) { max = tmp; } 1029 } 1030 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1031 mat->rowindices = (int*)(mat->rowvalues + max); 1032 } 1033 1034 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1035 lrow = row - rstart; 1036 1037 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1038 if (!v) {pvA = 0; pvB = 0;} 1039 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1040 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1041 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1042 nztot = nzA + nzB; 1043 1044 cmap = mat->garray; 1045 if (v || idx) { 1046 if (nztot) { 1047 /* Sort by increasing column numbers, assuming A and B already sorted */ 1048 int imark = -1; 1049 if (v) { 1050 *v = v_p = mat->rowvalues; 1051 for (i=0; i<nzB; i++) { 1052 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1053 else break; 1054 } 1055 imark = i; 1056 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1057 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1058 } 1059 if (idx) { 1060 *idx = idx_p = mat->rowindices; 1061 if (imark > -1) { 1062 for (i=0; i<imark; i++) { 1063 idx_p[i] = cmap[cworkB[i]]; 1064 } 1065 } else { 1066 for (i=0; i<nzB; i++) { 1067 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1068 else break; 1069 } 1070 imark = i; 1071 } 1072 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1073 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1074 } 1075 } else { 1076 if (idx) *idx = 0; 1077 if (v) *v = 0; 1078 } 1079 } 1080 *nz = nztot; 1081 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1082 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1083 PetscFunctionReturn(0); 1084 } 1085 1086 #undef __FUNCT__ 1087 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1088 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1089 { 1090 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1091 1092 PetscFunctionBegin; 1093 if (aij->getrowactive == PETSC_FALSE) { 1094 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1095 } 1096 aij->getrowactive = PETSC_FALSE; 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatNorm_MPIAIJ" 1102 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1103 { 1104 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1105 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1106 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1107 PetscReal sum = 0.0; 1108 PetscScalar *v; 1109 1110 PetscFunctionBegin; 1111 if (aij->size == 1) { 1112 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1113 } else { 1114 if (type == NORM_FROBENIUS) { 1115 v = amat->a; 1116 for (i=0; i<amat->nz; i++) { 1117 #if defined(PETSC_USE_COMPLEX) 1118 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1119 #else 1120 sum += (*v)*(*v); v++; 1121 #endif 1122 } 1123 v = bmat->a; 1124 for (i=0; i<bmat->nz; i++) { 1125 #if defined(PETSC_USE_COMPLEX) 1126 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1127 #else 1128 sum += (*v)*(*v); v++; 1129 #endif 1130 } 1131 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1132 *norm = sqrt(*norm); 1133 } else if (type == NORM_1) { /* max column norm */ 1134 PetscReal *tmp,*tmp2; 1135 int *jj,*garray = aij->garray; 1136 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1137 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1138 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1139 *norm = 0.0; 1140 v = amat->a; jj = amat->j; 1141 for (j=0; j<amat->nz; j++) { 1142 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1143 } 1144 v = bmat->a; jj = bmat->j; 1145 for (j=0; j<bmat->nz; j++) { 1146 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1147 } 1148 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1149 for (j=0; j<mat->N; j++) { 1150 if (tmp2[j] > *norm) *norm = tmp2[j]; 1151 } 1152 ierr = PetscFree(tmp);CHKERRQ(ierr); 1153 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1154 } else if (type == NORM_INFINITY) { /* max row norm */ 1155 PetscReal ntemp = 0.0; 1156 for (j=0; j<aij->A->m; j++) { 1157 v = amat->a + amat->i[j] + shift; 1158 sum = 0.0; 1159 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1160 sum += PetscAbsScalar(*v); v++; 1161 } 1162 v = bmat->a + bmat->i[j] + shift; 1163 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1164 sum += PetscAbsScalar(*v); v++; 1165 } 1166 if (sum > ntemp) ntemp = sum; 1167 } 1168 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1169 } else { 1170 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1171 } 1172 } 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "MatTranspose_MPIAIJ" 1178 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1179 { 1180 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1181 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1182 int ierr,shift = Aloc->indexshift; 1183 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1184 Mat B; 1185 PetscScalar *array; 1186 1187 PetscFunctionBegin; 1188 if (!matout && M != N) { 1189 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1190 } 1191 1192 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1193 1194 /* copy over the A part */ 1195 Aloc = (Mat_SeqAIJ*)a->A->data; 1196 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1197 row = a->rstart; 1198 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1199 for (i=0; i<m; i++) { 1200 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1201 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1202 } 1203 aj = Aloc->j; 1204 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1205 1206 /* copy over the B part */ 1207 Aloc = (Mat_SeqAIJ*)a->B->data; 1208 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1209 row = a->rstart; 1210 ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr); 1211 ct = cols; 1212 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1213 for (i=0; i<m; i++) { 1214 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1215 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1216 } 1217 ierr = PetscFree(ct);CHKERRQ(ierr); 1218 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1219 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1220 if (matout) { 1221 *matout = B; 1222 } else { 1223 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1224 } 1225 PetscFunctionReturn(0); 1226 } 1227 1228 #undef __FUNCT__ 1229 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1230 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1231 { 1232 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1233 Mat a = aij->A,b = aij->B; 1234 int ierr,s1,s2,s3; 1235 1236 PetscFunctionBegin; 1237 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1238 if (rr) { 1239 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1240 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1241 /* Overlap communication with computation. */ 1242 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1243 } 1244 if (ll) { 1245 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1246 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1247 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1248 } 1249 /* scale the diagonal block */ 1250 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1251 1252 if (rr) { 1253 /* Do a scatter end and then right scale the off-diagonal block */ 1254 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1255 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1256 } 1257 1258 PetscFunctionReturn(0); 1259 } 1260 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1264 int MatPrintHelp_MPIAIJ(Mat A) 1265 { 1266 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1267 int ierr; 1268 1269 PetscFunctionBegin; 1270 if (!a->rank) { 1271 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1272 } 1273 PetscFunctionReturn(0); 1274 } 1275 1276 #undef __FUNCT__ 1277 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1278 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1279 { 1280 PetscFunctionBegin; 1281 *bs = 1; 1282 PetscFunctionReturn(0); 1283 } 1284 #undef __FUNCT__ 1285 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1286 int MatSetUnfactored_MPIAIJ(Mat A) 1287 { 1288 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1289 int ierr; 1290 1291 PetscFunctionBegin; 1292 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1293 PetscFunctionReturn(0); 1294 } 1295 1296 #undef __FUNCT__ 1297 #define __FUNCT__ "MatEqual_MPIAIJ" 1298 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1299 { 1300 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1301 Mat a,b,c,d; 1302 PetscTruth flg; 1303 int ierr; 1304 1305 PetscFunctionBegin; 1306 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1307 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1308 a = matA->A; b = matA->B; 1309 c = matB->A; d = matB->B; 1310 1311 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1312 if (flg == PETSC_TRUE) { 1313 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1314 } 1315 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1316 PetscFunctionReturn(0); 1317 } 1318 1319 #undef __FUNCT__ 1320 #define __FUNCT__ "MatCopy_MPIAIJ" 1321 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1322 { 1323 int ierr; 1324 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1325 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1326 PetscTruth flg; 1327 1328 PetscFunctionBegin; 1329 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1330 if (str != SAME_NONZERO_PATTERN || !flg) { 1331 /* because of the column compression in the off-processor part of the matrix a->B, 1332 the number of columns in a->B and b->B may be different, hence we cannot call 1333 the MatCopy() directly on the two parts. If need be, we can provide a more 1334 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1335 then copying the submatrices */ 1336 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1337 } else { 1338 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1339 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1340 } 1341 PetscFunctionReturn(0); 1342 } 1343 1344 #undef __FUNCT__ 1345 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1346 int MatSetUpPreallocation_MPIAIJ(Mat A) 1347 { 1348 int ierr; 1349 1350 PetscFunctionBegin; 1351 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1352 PetscFunctionReturn(0); 1353 } 1354 1355 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1356 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1357 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1358 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1359 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1360 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1361 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*); 1362 #endif 1363 1364 /* -------------------------------------------------------------------*/ 1365 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1366 MatGetRow_MPIAIJ, 1367 MatRestoreRow_MPIAIJ, 1368 MatMult_MPIAIJ, 1369 MatMultAdd_MPIAIJ, 1370 MatMultTranspose_MPIAIJ, 1371 MatMultTransposeAdd_MPIAIJ, 1372 0, 1373 0, 1374 0, 1375 0, 1376 0, 1377 0, 1378 MatRelax_MPIAIJ, 1379 MatTranspose_MPIAIJ, 1380 MatGetInfo_MPIAIJ, 1381 MatEqual_MPIAIJ, 1382 MatGetDiagonal_MPIAIJ, 1383 MatDiagonalScale_MPIAIJ, 1384 MatNorm_MPIAIJ, 1385 MatAssemblyBegin_MPIAIJ, 1386 MatAssemblyEnd_MPIAIJ, 1387 0, 1388 MatSetOption_MPIAIJ, 1389 MatZeroEntries_MPIAIJ, 1390 MatZeroRows_MPIAIJ, 1391 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1392 MatLUFactorSymbolic_MPIAIJ_TFS, 1393 #else 1394 0, 1395 #endif 1396 0, 1397 0, 1398 0, 1399 MatSetUpPreallocation_MPIAIJ, 1400 0, 1401 0, 1402 0, 1403 0, 1404 MatDuplicate_MPIAIJ, 1405 0, 1406 0, 1407 0, 1408 0, 1409 0, 1410 MatGetSubMatrices_MPIAIJ, 1411 MatIncreaseOverlap_MPIAIJ, 1412 MatGetValues_MPIAIJ, 1413 MatCopy_MPIAIJ, 1414 MatPrintHelp_MPIAIJ, 1415 MatScale_MPIAIJ, 1416 0, 1417 0, 1418 0, 1419 MatGetBlockSize_MPIAIJ, 1420 0, 1421 0, 1422 0, 1423 0, 1424 MatFDColoringCreate_MPIAIJ, 1425 0, 1426 MatSetUnfactored_MPIAIJ, 1427 0, 1428 0, 1429 MatGetSubMatrix_MPIAIJ, 1430 MatDestroy_MPIAIJ, 1431 MatView_MPIAIJ, 1432 MatGetPetscMaps_Petsc, 1433 0, 1434 0, 1435 0, 1436 0, 1437 0, 1438 0, 1439 0, 1440 0, 1441 MatSetColoring_MPIAIJ, 1442 MatSetValuesAdic_MPIAIJ, 1443 MatSetValuesAdifor_MPIAIJ 1444 }; 1445 1446 /* ----------------------------------------------------------------------------------------*/ 1447 1448 EXTERN_C_BEGIN 1449 #undef __FUNCT__ 1450 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1451 int MatStoreValues_MPIAIJ(Mat mat) 1452 { 1453 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1454 int ierr; 1455 1456 PetscFunctionBegin; 1457 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1458 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1459 PetscFunctionReturn(0); 1460 } 1461 EXTERN_C_END 1462 1463 EXTERN_C_BEGIN 1464 #undef __FUNCT__ 1465 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1466 int MatRetrieveValues_MPIAIJ(Mat mat) 1467 { 1468 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1469 int ierr; 1470 1471 PetscFunctionBegin; 1472 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1473 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1474 PetscFunctionReturn(0); 1475 } 1476 EXTERN_C_END 1477 1478 #include "petscpc.h" 1479 EXTERN_C_BEGIN 1480 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1481 EXTERN_C_END 1482 1483 EXTERN_C_BEGIN 1484 #undef __FUNCT__ 1485 #define __FUNCT__ "MatCreate_MPIAIJ" 1486 int MatCreate_MPIAIJ(Mat B) 1487 { 1488 Mat_MPIAIJ *b; 1489 int ierr,i,size; 1490 1491 PetscFunctionBegin; 1492 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1493 1494 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1495 B->data = (void*)b; 1496 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1497 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1498 B->factor = 0; 1499 B->assembled = PETSC_FALSE; 1500 B->mapping = 0; 1501 1502 B->insertmode = NOT_SET_VALUES; 1503 b->size = size; 1504 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1505 1506 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1507 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1508 1509 /* the information in the maps duplicates the information computed below, eventually 1510 we should remove the duplicate information that is not contained in the maps */ 1511 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1512 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1513 1514 /* build local table of row and column ownerships */ 1515 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1516 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1517 b->cowners = b->rowners + b->size + 2; 1518 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1519 b->rowners[0] = 0; 1520 for (i=2; i<=b->size; i++) { 1521 b->rowners[i] += b->rowners[i-1]; 1522 } 1523 b->rstart = b->rowners[b->rank]; 1524 b->rend = b->rowners[b->rank+1]; 1525 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1526 b->cowners[0] = 0; 1527 for (i=2; i<=b->size; i++) { 1528 b->cowners[i] += b->cowners[i-1]; 1529 } 1530 b->cstart = b->cowners[b->rank]; 1531 b->cend = b->cowners[b->rank+1]; 1532 1533 /* build cache for off array entries formed */ 1534 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1535 b->donotstash = PETSC_FALSE; 1536 b->colmap = 0; 1537 b->garray = 0; 1538 b->roworiented = PETSC_TRUE; 1539 1540 /* stuff used for matrix vector multiply */ 1541 b->lvec = PETSC_NULL; 1542 b->Mvctx = PETSC_NULL; 1543 1544 /* stuff for MatGetRow() */ 1545 b->rowindices = 0; 1546 b->rowvalues = 0; 1547 b->getrowactive = PETSC_FALSE; 1548 1549 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1550 "MatStoreValues_MPIAIJ", 1551 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1552 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1553 "MatRetrieveValues_MPIAIJ", 1554 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1555 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1556 "MatGetDiagonalBlock_MPIAIJ", 1557 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1558 PetscFunctionReturn(0); 1559 } 1560 EXTERN_C_END 1561 1562 #undef __FUNCT__ 1563 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1564 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1565 { 1566 Mat mat; 1567 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1568 int ierr; 1569 1570 PetscFunctionBegin; 1571 *newmat = 0; 1572 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1573 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1574 a = (Mat_MPIAIJ*)mat->data; 1575 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1576 mat->factor = matin->factor; 1577 mat->assembled = PETSC_TRUE; 1578 mat->insertmode = NOT_SET_VALUES; 1579 mat->preallocated = PETSC_TRUE; 1580 1581 a->rstart = oldmat->rstart; 1582 a->rend = oldmat->rend; 1583 a->cstart = oldmat->cstart; 1584 a->cend = oldmat->cend; 1585 a->size = oldmat->size; 1586 a->rank = oldmat->rank; 1587 a->donotstash = oldmat->donotstash; 1588 a->roworiented = oldmat->roworiented; 1589 a->rowindices = 0; 1590 a->rowvalues = 0; 1591 a->getrowactive = PETSC_FALSE; 1592 1593 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1594 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1595 if (oldmat->colmap) { 1596 #if defined (PETSC_USE_CTABLE) 1597 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1598 #else 1599 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1600 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1601 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1602 #endif 1603 } else a->colmap = 0; 1604 if (oldmat->garray) { 1605 int len; 1606 len = oldmat->B->n; 1607 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1608 PetscLogObjectMemory(mat,len*sizeof(int)); 1609 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1610 } else a->garray = 0; 1611 1612 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1613 PetscLogObjectParent(mat,a->lvec); 1614 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1615 PetscLogObjectParent(mat,a->Mvctx); 1616 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1617 PetscLogObjectParent(mat,a->A); 1618 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1619 PetscLogObjectParent(mat,a->B); 1620 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1621 *newmat = mat; 1622 PetscFunctionReturn(0); 1623 } 1624 1625 #include "petscsys.h" 1626 1627 EXTERN_C_BEGIN 1628 #undef __FUNCT__ 1629 #define __FUNCT__ "MatLoad_MPIAIJ" 1630 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1631 { 1632 Mat A; 1633 PetscScalar *vals,*svals; 1634 MPI_Comm comm = ((PetscObject)viewer)->comm; 1635 MPI_Status status; 1636 int i,nz,ierr,j,rstart,rend,fd; 1637 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1638 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1639 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1640 1641 PetscFunctionBegin; 1642 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1643 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1644 if (!rank) { 1645 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1646 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1647 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1648 if (header[3] < 0) { 1649 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1650 } 1651 } 1652 1653 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1654 M = header[1]; N = header[2]; 1655 /* determine ownership of all rows */ 1656 m = M/size + ((M % size) > rank); 1657 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1658 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1659 rowners[0] = 0; 1660 for (i=2; i<=size; i++) { 1661 rowners[i] += rowners[i-1]; 1662 } 1663 rstart = rowners[rank]; 1664 rend = rowners[rank+1]; 1665 1666 /* distribute row lengths to all processors */ 1667 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1668 offlens = ourlens + (rend-rstart); 1669 if (!rank) { 1670 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1671 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1672 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1673 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1674 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1675 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1676 } else { 1677 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1678 } 1679 1680 if (!rank) { 1681 /* calculate the number of nonzeros on each processor */ 1682 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1683 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1684 for (i=0; i<size; i++) { 1685 for (j=rowners[i]; j< rowners[i+1]; j++) { 1686 procsnz[i] += rowlengths[j]; 1687 } 1688 } 1689 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1690 1691 /* determine max buffer needed and allocate it */ 1692 maxnz = 0; 1693 for (i=0; i<size; i++) { 1694 maxnz = PetscMax(maxnz,procsnz[i]); 1695 } 1696 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1697 1698 /* read in my part of the matrix column indices */ 1699 nz = procsnz[0]; 1700 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1701 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1702 1703 /* read in every one elses and ship off */ 1704 for (i=1; i<size; i++) { 1705 nz = procsnz[i]; 1706 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1707 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1708 } 1709 ierr = PetscFree(cols);CHKERRQ(ierr); 1710 } else { 1711 /* determine buffer space needed for message */ 1712 nz = 0; 1713 for (i=0; i<m; i++) { 1714 nz += ourlens[i]; 1715 } 1716 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1717 1718 /* receive message of column indices*/ 1719 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1720 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1721 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1722 } 1723 1724 /* determine column ownership if matrix is not square */ 1725 if (N != M) { 1726 n = N/size + ((N % size) > rank); 1727 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1728 cstart = cend - n; 1729 } else { 1730 cstart = rstart; 1731 cend = rend; 1732 n = cend - cstart; 1733 } 1734 1735 /* loop over local rows, determining number of off diagonal entries */ 1736 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1737 jj = 0; 1738 for (i=0; i<m; i++) { 1739 for (j=0; j<ourlens[i]; j++) { 1740 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1741 jj++; 1742 } 1743 } 1744 1745 /* create our matrix */ 1746 for (i=0; i<m; i++) { 1747 ourlens[i] -= offlens[i]; 1748 } 1749 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1750 A = *newmat; 1751 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1752 for (i=0; i<m; i++) { 1753 ourlens[i] += offlens[i]; 1754 } 1755 1756 if (!rank) { 1757 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1758 1759 /* read in my part of the matrix numerical values */ 1760 nz = procsnz[0]; 1761 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1762 1763 /* insert into matrix */ 1764 jj = rstart; 1765 smycols = mycols; 1766 svals = vals; 1767 for (i=0; i<m; i++) { 1768 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1769 smycols += ourlens[i]; 1770 svals += ourlens[i]; 1771 jj++; 1772 } 1773 1774 /* read in other processors and ship out */ 1775 for (i=1; i<size; i++) { 1776 nz = procsnz[i]; 1777 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1778 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1779 } 1780 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1781 } else { 1782 /* receive numeric values */ 1783 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1784 1785 /* receive message of values*/ 1786 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1787 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1788 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1789 1790 /* insert into matrix */ 1791 jj = rstart; 1792 smycols = mycols; 1793 svals = vals; 1794 for (i=0; i<m; i++) { 1795 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1796 smycols += ourlens[i]; 1797 svals += ourlens[i]; 1798 jj++; 1799 } 1800 } 1801 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1802 ierr = PetscFree(vals);CHKERRQ(ierr); 1803 ierr = PetscFree(mycols);CHKERRQ(ierr); 1804 ierr = PetscFree(rowners);CHKERRQ(ierr); 1805 1806 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1807 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1808 PetscFunctionReturn(0); 1809 } 1810 EXTERN_C_END 1811 1812 #undef __FUNCT__ 1813 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 1814 /* 1815 Not great since it makes two copies of the submatrix, first an SeqAIJ 1816 in local and then by concatenating the local matrices the end result. 1817 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1818 */ 1819 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 1820 { 1821 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1822 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend; 1823 Mat *local,M,Mreuse; 1824 PetscScalar *vwork,*aa; 1825 MPI_Comm comm = mat->comm; 1826 Mat_SeqAIJ *aij; 1827 1828 1829 PetscFunctionBegin; 1830 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1831 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1832 1833 if (call == MAT_REUSE_MATRIX) { 1834 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1835 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 1836 local = &Mreuse; 1837 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1838 } else { 1839 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1840 Mreuse = *local; 1841 ierr = PetscFree(local);CHKERRQ(ierr); 1842 } 1843 1844 /* 1845 m - number of local rows 1846 n - number of columns (same on all processors) 1847 rstart - first row in new global matrix generated 1848 */ 1849 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1850 if (call == MAT_INITIAL_MATRIX) { 1851 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1852 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1853 ii = aij->i; 1854 jj = aij->j; 1855 1856 /* 1857 Determine the number of non-zeros in the diagonal and off-diagonal 1858 portions of the matrix in order to do correct preallocation 1859 */ 1860 1861 /* first get start and end of "diagonal" columns */ 1862 if (csize == PETSC_DECIDE) { 1863 nlocal = n/size + ((n % size) > rank); 1864 } else { 1865 nlocal = csize; 1866 } 1867 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1868 rstart = rend - nlocal; 1869 if (rank == size - 1 && rend != n) { 1870 SETERRQ(1,"Local column sizes do not add up to total number of columns"); 1871 } 1872 1873 /* next, compute all the lengths */ 1874 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 1875 olens = dlens + m; 1876 for (i=0; i<m; i++) { 1877 jend = ii[i+1] - ii[i]; 1878 olen = 0; 1879 dlen = 0; 1880 for (j=0; j<jend; j++) { 1881 if (*jj < rstart || *jj >= rend) olen++; 1882 else dlen++; 1883 jj++; 1884 } 1885 olens[i] = olen; 1886 dlens[i] = dlen; 1887 } 1888 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 1889 ierr = PetscFree(dlens);CHKERRQ(ierr); 1890 } else { 1891 int ml,nl; 1892 1893 M = *newmat; 1894 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1895 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1896 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1897 /* 1898 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1899 rather than the slower MatSetValues(). 1900 */ 1901 M->was_assembled = PETSC_TRUE; 1902 M->assembled = PETSC_FALSE; 1903 } 1904 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1905 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1906 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1907 ii = aij->i; 1908 jj = aij->j; 1909 aa = aij->a; 1910 for (i=0; i<m; i++) { 1911 row = rstart + i; 1912 nz = ii[i+1] - ii[i]; 1913 cwork = jj; jj += nz; 1914 vwork = aa; aa += nz; 1915 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1916 } 1917 1918 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1919 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1920 *newmat = M; 1921 1922 /* save submatrix used in processor for next request */ 1923 if (call == MAT_INITIAL_MATRIX) { 1924 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 1925 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 1926 } 1927 1928 PetscFunctionReturn(0); 1929 } 1930 1931 #undef __FUNCT__ 1932 #define __FUNCT__ "MatMPIAIJSetPreallocation" 1933 /*@C 1934 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 1935 (the default parallel PETSc format). For good matrix assembly performance 1936 the user should preallocate the matrix storage by setting the parameters 1937 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1938 performance can be increased by more than a factor of 50. 1939 1940 Collective on MPI_Comm 1941 1942 Input Parameters: 1943 + A - the matrix 1944 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1945 (same value is used for all local rows) 1946 . d_nnz - array containing the number of nonzeros in the various rows of the 1947 DIAGONAL portion of the local submatrix (possibly different for each row) 1948 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1949 The size of this array is equal to the number of local rows, i.e 'm'. 1950 You must leave room for the diagonal entry even if it is zero. 1951 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 1952 submatrix (same value is used for all local rows). 1953 - o_nnz - array containing the number of nonzeros in the various rows of the 1954 OFF-DIAGONAL portion of the local submatrix (possibly different for 1955 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 1956 structure. The size of this array is equal to the number 1957 of local rows, i.e 'm'. 1958 1959 The AIJ format (also called the Yale sparse matrix format or 1960 compressed row storage), is fully compatible with standard Fortran 77 1961 storage. That is, the stored row and column indices can begin at 1962 either one (as in Fortran) or zero. See the users manual for details. 1963 1964 The user MUST specify either the local or global matrix dimensions 1965 (possibly both). 1966 1967 The parallel matrix is partitioned such that the first m0 rows belong to 1968 process 0, the next m1 rows belong to process 1, the next m2 rows belong 1969 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 1970 1971 The DIAGONAL portion of the local submatrix of a processor can be defined 1972 as the submatrix which is obtained by extraction the part corresponding 1973 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 1974 first row that belongs to the processor, and r2 is the last row belonging 1975 to the this processor. This is a square mxm matrix. The remaining portion 1976 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 1977 1978 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 1979 1980 By default, this format uses inodes (identical nodes) when possible. 1981 We search for consecutive rows with the same nonzero structure, thereby 1982 reusing matrix information to achieve increased efficiency. 1983 1984 Options Database Keys: 1985 + -mat_aij_no_inode - Do not use inodes 1986 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1987 - -mat_aij_oneindex - Internally use indexing starting at 1 1988 rather than 0. Note that when calling MatSetValues(), 1989 the user still MUST index entries starting at 0! 1990 1991 Example usage: 1992 1993 Consider the following 8x8 matrix with 34 non-zero values, that is 1994 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 1995 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 1996 as follows: 1997 1998 .vb 1999 1 2 0 | 0 3 0 | 0 4 2000 Proc0 0 5 6 | 7 0 0 | 8 0 2001 9 0 10 | 11 0 0 | 12 0 2002 ------------------------------------- 2003 13 0 14 | 15 16 17 | 0 0 2004 Proc1 0 18 0 | 19 20 21 | 0 0 2005 0 0 0 | 22 23 0 | 24 0 2006 ------------------------------------- 2007 Proc2 25 26 27 | 0 0 28 | 29 0 2008 30 0 0 | 31 32 33 | 0 34 2009 .ve 2010 2011 This can be represented as a collection of submatrices as: 2012 2013 .vb 2014 A B C 2015 D E F 2016 G H I 2017 .ve 2018 2019 Where the submatrices A,B,C are owned by proc0, D,E,F are 2020 owned by proc1, G,H,I are owned by proc2. 2021 2022 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2023 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2024 The 'M','N' parameters are 8,8, and have the same values on all procs. 2025 2026 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2027 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2028 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2029 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2030 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2031 matrix, ans [DF] as another SeqAIJ matrix. 2032 2033 When d_nz, o_nz parameters are specified, d_nz storage elements are 2034 allocated for every row of the local diagonal submatrix, and o_nz 2035 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2036 One way to choose d_nz and o_nz is to use the max nonzerors per local 2037 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2038 In this case, the values of d_nz,o_nz are: 2039 .vb 2040 proc0 : dnz = 2, o_nz = 2 2041 proc1 : dnz = 3, o_nz = 2 2042 proc2 : dnz = 1, o_nz = 4 2043 .ve 2044 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2045 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2046 for proc3. i.e we are using 12+15+10=37 storage locations to store 2047 34 values. 2048 2049 When d_nnz, o_nnz parameters are specified, the storage is specified 2050 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2051 In the above case the values for d_nnz,o_nnz are: 2052 .vb 2053 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2054 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2055 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2056 .ve 2057 Here the space allocated is sum of all the above values i.e 34, and 2058 hence pre-allocation is perfect. 2059 2060 Level: intermediate 2061 2062 .keywords: matrix, aij, compressed row, sparse, parallel 2063 2064 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2065 @*/ 2066 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2067 { 2068 Mat_MPIAIJ *b; 2069 int ierr,i; 2070 PetscTruth flg2; 2071 2072 PetscFunctionBegin; 2073 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr); 2074 if (!flg2) PetscFunctionReturn(0); 2075 B->preallocated = PETSC_TRUE; 2076 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2077 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2078 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2079 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2080 if (d_nnz) { 2081 for (i=0; i<B->m; i++) { 2082 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 2083 } 2084 } 2085 if (o_nnz) { 2086 for (i=0; i<B->m; i++) { 2087 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 2088 } 2089 } 2090 b = (Mat_MPIAIJ*)B->data; 2091 2092 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2093 PetscLogObjectParent(B,b->A); 2094 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2095 PetscLogObjectParent(B,b->B); 2096 2097 PetscFunctionReturn(0); 2098 } 2099 2100 #undef __FUNCT__ 2101 #define __FUNCT__ "MatCreateMPIAIJ" 2102 /*@C 2103 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2104 (the default parallel PETSc format). For good matrix assembly performance 2105 the user should preallocate the matrix storage by setting the parameters 2106 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2107 performance can be increased by more than a factor of 50. 2108 2109 Collective on MPI_Comm 2110 2111 Input Parameters: 2112 + comm - MPI communicator 2113 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2114 This value should be the same as the local size used in creating the 2115 y vector for the matrix-vector product y = Ax. 2116 . n - This value should be the same as the local size used in creating the 2117 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2118 calculated if N is given) For square matrices n is almost always m. 2119 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2120 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2121 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2122 (same value is used for all local rows) 2123 . d_nnz - array containing the number of nonzeros in the various rows of the 2124 DIAGONAL portion of the local submatrix (possibly different for each row) 2125 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2126 The size of this array is equal to the number of local rows, i.e 'm'. 2127 You must leave room for the diagonal entry even if it is zero. 2128 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2129 submatrix (same value is used for all local rows). 2130 - o_nnz - array containing the number of nonzeros in the various rows of the 2131 OFF-DIAGONAL portion of the local submatrix (possibly different for 2132 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2133 structure. The size of this array is equal to the number 2134 of local rows, i.e 'm'. 2135 2136 Output Parameter: 2137 . A - the matrix 2138 2139 Notes: 2140 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2141 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2142 storage requirements for this matrix. 2143 2144 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2145 processor than it must be used on all processors that share the object for 2146 that argument. 2147 2148 The AIJ format (also called the Yale sparse matrix format or 2149 compressed row storage), is fully compatible with standard Fortran 77 2150 storage. That is, the stored row and column indices can begin at 2151 either one (as in Fortran) or zero. See the users manual for details. 2152 2153 The user MUST specify either the local or global matrix dimensions 2154 (possibly both). 2155 2156 The parallel matrix is partitioned such that the first m0 rows belong to 2157 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2158 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2159 2160 The DIAGONAL portion of the local submatrix of a processor can be defined 2161 as the submatrix which is obtained by extraction the part corresponding 2162 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2163 first row that belongs to the processor, and r2 is the last row belonging 2164 to the this processor. This is a square mxm matrix. The remaining portion 2165 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2166 2167 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2168 2169 By default, this format uses inodes (identical nodes) when possible. 2170 We search for consecutive rows with the same nonzero structure, thereby 2171 reusing matrix information to achieve increased efficiency. 2172 2173 Options Database Keys: 2174 + -mat_aij_no_inode - Do not use inodes 2175 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2176 - -mat_aij_oneindex - Internally use indexing starting at 1 2177 rather than 0. Note that when calling MatSetValues(), 2178 the user still MUST index entries starting at 0! 2179 2180 2181 Example usage: 2182 2183 Consider the following 8x8 matrix with 34 non-zero values, that is 2184 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2185 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2186 as follows: 2187 2188 .vb 2189 1 2 0 | 0 3 0 | 0 4 2190 Proc0 0 5 6 | 7 0 0 | 8 0 2191 9 0 10 | 11 0 0 | 12 0 2192 ------------------------------------- 2193 13 0 14 | 15 16 17 | 0 0 2194 Proc1 0 18 0 | 19 20 21 | 0 0 2195 0 0 0 | 22 23 0 | 24 0 2196 ------------------------------------- 2197 Proc2 25 26 27 | 0 0 28 | 29 0 2198 30 0 0 | 31 32 33 | 0 34 2199 .ve 2200 2201 This can be represented as a collection of submatrices as: 2202 2203 .vb 2204 A B C 2205 D E F 2206 G H I 2207 .ve 2208 2209 Where the submatrices A,B,C are owned by proc0, D,E,F are 2210 owned by proc1, G,H,I are owned by proc2. 2211 2212 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2213 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2214 The 'M','N' parameters are 8,8, and have the same values on all procs. 2215 2216 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2217 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2218 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2219 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2220 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2221 matrix, ans [DF] as another SeqAIJ matrix. 2222 2223 When d_nz, o_nz parameters are specified, d_nz storage elements are 2224 allocated for every row of the local diagonal submatrix, and o_nz 2225 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2226 One way to choose d_nz and o_nz is to use the max nonzerors per local 2227 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2228 In this case, the values of d_nz,o_nz are: 2229 .vb 2230 proc0 : dnz = 2, o_nz = 2 2231 proc1 : dnz = 3, o_nz = 2 2232 proc2 : dnz = 1, o_nz = 4 2233 .ve 2234 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2235 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2236 for proc3. i.e we are using 12+15+10=37 storage locations to store 2237 34 values. 2238 2239 When d_nnz, o_nnz parameters are specified, the storage is specified 2240 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2241 In the above case the values for d_nnz,o_nnz are: 2242 .vb 2243 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2244 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2245 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2246 .ve 2247 Here the space allocated is sum of all the above values i.e 34, and 2248 hence pre-allocation is perfect. 2249 2250 Level: intermediate 2251 2252 .keywords: matrix, aij, compressed row, sparse, parallel 2253 2254 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2255 @*/ 2256 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2257 { 2258 int ierr,size; 2259 2260 PetscFunctionBegin; 2261 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2262 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2263 if (size > 1) { 2264 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2265 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2266 } else { 2267 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2268 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2269 } 2270 PetscFunctionReturn(0); 2271 } 2272 2273 #undef __FUNCT__ 2274 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2275 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2276 { 2277 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2278 PetscFunctionBegin; 2279 *Ad = a->A; 2280 *Ao = a->B; 2281 *colmap = a->garray; 2282 PetscFunctionReturn(0); 2283 } 2284 2285 #undef __FUNCT__ 2286 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2287 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2288 { 2289 int ierr; 2290 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2291 2292 PetscFunctionBegin; 2293 if (coloring->ctype == IS_COLORING_LOCAL) { 2294 int *allcolors,*colors,i; 2295 ISColoring ocoloring; 2296 2297 /* set coloring for diagonal portion */ 2298 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2299 2300 /* set coloring for off-diagonal portion */ 2301 ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2302 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2303 for (i=0; i<a->B->n; i++) { 2304 colors[i] = allcolors[a->garray[i]]; 2305 } 2306 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2307 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2308 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2309 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2310 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2311 int *colors,i,*larray; 2312 ISColoring ocoloring; 2313 2314 /* set coloring for diagonal portion */ 2315 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2316 for (i=0; i<a->A->n; i++) { 2317 larray[i] = i + a->cstart; 2318 } 2319 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2320 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2321 for (i=0; i<a->A->n; i++) { 2322 colors[i] = coloring->colors[larray[i]]; 2323 } 2324 ierr = PetscFree(larray);CHKERRQ(ierr); 2325 ierr = ISColoringCreate(MPI_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2326 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2327 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2328 2329 /* set coloring for off-diagonal portion */ 2330 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2331 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2332 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2333 for (i=0; i<a->B->n; i++) { 2334 colors[i] = coloring->colors[larray[i]]; 2335 } 2336 ierr = PetscFree(larray);CHKERRQ(ierr); 2337 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2338 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2339 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2340 } else { 2341 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2342 } 2343 2344 PetscFunctionReturn(0); 2345 } 2346 2347 #undef __FUNCT__ 2348 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2349 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2350 { 2351 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2352 int ierr; 2353 2354 PetscFunctionBegin; 2355 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2356 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2357 PetscFunctionReturn(0); 2358 } 2359 2360 #undef __FUNCT__ 2361 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2362 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2363 { 2364 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2365 int ierr; 2366 2367 PetscFunctionBegin; 2368 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2369 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2370 PetscFunctionReturn(0); 2371 } 2372