1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 EXTERN int MatUseSuperLU_DIST_MPIAIJ(Mat); 14 15 /* 16 Local utility routine that creates a mapping from the global column 17 number to the local number in the off-diagonal part of the local 18 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 19 a slightly higher hash table cost; without it it is not scalable (each processor 20 has an order N integer array but is fast to acess. 21 */ 22 #undef __FUNCT__ 23 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 24 int CreateColmap_MPIAIJ_Private(Mat mat) 25 { 26 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 27 int n = aij->B->n,i,ierr; 28 29 PetscFunctionBegin; 30 #if defined (PETSC_USE_CTABLE) 31 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 32 for (i=0; i<n; i++){ 33 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 34 } 35 #else 36 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 37 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 38 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 39 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 40 #endif 41 PetscFunctionReturn(0); 42 } 43 44 #define CHUNKSIZE 15 45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 46 { \ 47 \ 48 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 49 rmax = aimax[row]; nrow = ailen[row]; \ 50 col1 = col - shift; \ 51 \ 52 low = 0; high = nrow; \ 53 while (high-low > 5) { \ 54 t = (low+high)/2; \ 55 if (rp[t] > col) high = t; \ 56 else low = t; \ 57 } \ 58 for (_i=low; _i<high; _i++) { \ 59 if (rp[_i] > col1) break; \ 60 if (rp[_i] == col1) { \ 61 if (addv == ADD_VALUES) ap[_i] += value; \ 62 else ap[_i] = value; \ 63 goto a_noinsert; \ 64 } \ 65 } \ 66 if (nonew == 1) goto a_noinsert; \ 67 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 68 if (nrow >= rmax) { \ 69 /* there is no extra room in row, therefore enlarge */ \ 70 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 71 PetscScalar *new_a; \ 72 \ 73 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 74 \ 75 /* malloc new storage space */ \ 76 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 77 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 78 new_j = (int*)(new_a + new_nz); \ 79 new_i = new_j + new_nz; \ 80 \ 81 /* copy over old data into new slots */ \ 82 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 83 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 84 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 85 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 86 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 87 len*sizeof(int));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 89 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 90 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 91 /* free up old matrix storage */ \ 92 \ 93 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 94 if (!a->singlemalloc) { \ 95 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 96 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 97 } \ 98 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 99 a->singlemalloc = PETSC_TRUE; \ 100 \ 101 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 102 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 103 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 104 a->maxnz += CHUNKSIZE; \ 105 a->reallocs++; \ 106 } \ 107 N = nrow++ - 1; a->nz++; \ 108 /* shift up all the later entries in this row */ \ 109 for (ii=N; ii>=_i; ii--) { \ 110 rp[ii+1] = rp[ii]; \ 111 ap[ii+1] = ap[ii]; \ 112 } \ 113 rp[_i] = col1; \ 114 ap[_i] = value; \ 115 a_noinsert: ; \ 116 ailen[row] = nrow; \ 117 } 118 119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 120 { \ 121 \ 122 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 123 rmax = bimax[row]; nrow = bilen[row]; \ 124 col1 = col - shift; \ 125 \ 126 low = 0; high = nrow; \ 127 while (high-low > 5) { \ 128 t = (low+high)/2; \ 129 if (rp[t] > col) high = t; \ 130 else low = t; \ 131 } \ 132 for (_i=low; _i<high; _i++) { \ 133 if (rp[_i] > col1) break; \ 134 if (rp[_i] == col1) { \ 135 if (addv == ADD_VALUES) ap[_i] += value; \ 136 else ap[_i] = value; \ 137 goto b_noinsert; \ 138 } \ 139 } \ 140 if (nonew == 1) goto b_noinsert; \ 141 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 142 if (nrow >= rmax) { \ 143 /* there is no extra room in row, therefore enlarge */ \ 144 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 145 PetscScalar *new_a; \ 146 \ 147 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 148 \ 149 /* malloc new storage space */ \ 150 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 151 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 152 new_j = (int*)(new_a + new_nz); \ 153 new_i = new_j + new_nz; \ 154 \ 155 /* copy over old data into new slots */ \ 156 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 157 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 158 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 159 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 160 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 161 len*sizeof(int));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 163 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 164 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 165 /* free up old matrix storage */ \ 166 \ 167 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 168 if (!b->singlemalloc) { \ 169 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 170 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 171 } \ 172 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 173 b->singlemalloc = PETSC_TRUE; \ 174 \ 175 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 176 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 177 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 178 b->maxnz += CHUNKSIZE; \ 179 b->reallocs++; \ 180 } \ 181 N = nrow++ - 1; b->nz++; \ 182 /* shift up all the later entries in this row */ \ 183 for (ii=N; ii>=_i; ii--) { \ 184 rp[ii+1] = rp[ii]; \ 185 ap[ii+1] = ap[ii]; \ 186 } \ 187 rp[_i] = col1; \ 188 ap[_i] = value; \ 189 b_noinsert: ; \ 190 bilen[row] = nrow; \ 191 } 192 193 #undef __FUNCT__ 194 #define __FUNCT__ "MatSetValues_MPIAIJ" 195 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 196 { 197 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 198 PetscScalar value; 199 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 200 int cstart = aij->cstart,cend = aij->cend,row,col; 201 PetscTruth roworiented = aij->roworiented; 202 203 /* Some Variables required in the macro */ 204 Mat A = aij->A; 205 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 206 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 207 PetscScalar *aa = a->a; 208 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 209 Mat B = aij->B; 210 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 211 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 212 PetscScalar *ba = b->a; 213 214 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 215 int nonew = a->nonew,shift = a->indexshift; 216 PetscScalar *ap; 217 218 PetscFunctionBegin; 219 for (i=0; i<m; i++) { 220 if (im[i] < 0) continue; 221 #if defined(PETSC_USE_BOPT_g) 222 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 223 #endif 224 if (im[i] >= rstart && im[i] < rend) { 225 row = im[i] - rstart; 226 for (j=0; j<n; j++) { 227 if (in[j] >= cstart && in[j] < cend){ 228 col = in[j] - cstart; 229 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 230 if (ignorezeroentries && value == 0.0) continue; 231 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 232 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 233 } else if (in[j] < 0) continue; 234 #if defined(PETSC_USE_BOPT_g) 235 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 236 #endif 237 else { 238 if (mat->was_assembled) { 239 if (!aij->colmap) { 240 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 241 } 242 #if defined (PETSC_USE_CTABLE) 243 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 244 col--; 245 #else 246 col = aij->colmap[in[j]] - 1; 247 #endif 248 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 249 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 250 col = in[j]; 251 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 252 B = aij->B; 253 b = (Mat_SeqAIJ*)B->data; 254 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 255 ba = b->a; 256 } 257 } else col = in[j]; 258 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 259 if (ignorezeroentries && value == 0.0) continue; 260 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 261 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 262 } 263 } 264 } else { 265 if (!aij->donotstash) { 266 if (roworiented) { 267 if (ignorezeroentries && v[i*n] == 0.0) continue; 268 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 269 } else { 270 if (ignorezeroentries && v[i] == 0.0) continue; 271 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 272 } 273 } 274 } 275 } 276 PetscFunctionReturn(0); 277 } 278 279 #undef __FUNCT__ 280 #define __FUNCT__ "MatGetValues_MPIAIJ" 281 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 282 { 283 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 284 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 285 int cstart = aij->cstart,cend = aij->cend,row,col; 286 287 PetscFunctionBegin; 288 for (i=0; i<m; i++) { 289 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 290 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 291 if (idxm[i] >= rstart && idxm[i] < rend) { 292 row = idxm[i] - rstart; 293 for (j=0; j<n; j++) { 294 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 295 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 296 if (idxn[j] >= cstart && idxn[j] < cend){ 297 col = idxn[j] - cstart; 298 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 299 } else { 300 if (!aij->colmap) { 301 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 302 } 303 #if defined (PETSC_USE_CTABLE) 304 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 305 col --; 306 #else 307 col = aij->colmap[idxn[j]] - 1; 308 #endif 309 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 310 else { 311 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 312 } 313 } 314 } 315 } else { 316 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 317 } 318 } 319 PetscFunctionReturn(0); 320 } 321 322 #undef __FUNCT__ 323 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 324 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 325 { 326 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 327 int ierr,nstash,reallocs; 328 InsertMode addv; 329 330 PetscFunctionBegin; 331 if (aij->donotstash) { 332 PetscFunctionReturn(0); 333 } 334 335 /* make sure all processors are either in INSERTMODE or ADDMODE */ 336 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 337 if (addv == (ADD_VALUES|INSERT_VALUES)) { 338 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 339 } 340 mat->insertmode = addv; /* in case this processor had no cache */ 341 342 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 343 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 344 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 345 PetscFunctionReturn(0); 346 } 347 348 349 #undef __FUNCT__ 350 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 351 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 352 { 353 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 354 int i,j,rstart,ncols,n,ierr,flg; 355 int *row,*col,other_disassembled; 356 PetscScalar *val; 357 InsertMode addv = mat->insertmode; 358 #if defined(PETSC_HAVE_SUPERLUDIST) 359 PetscTruth flag; 360 #endif 361 362 PetscFunctionBegin; 363 if (!aij->donotstash) { 364 while (1) { 365 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 366 if (!flg) break; 367 368 for (i=0; i<n;) { 369 /* Now identify the consecutive vals belonging to the same row */ 370 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 371 if (j < n) ncols = j-i; 372 else ncols = n-i; 373 /* Now assemble all these values with a single function call */ 374 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 375 i = j; 376 } 377 } 378 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 379 } 380 381 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 382 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 383 384 /* determine if any processor has disassembled, if so we must 385 also disassemble ourselfs, in order that we may reassemble. */ 386 /* 387 if nonzero structure of submatrix B cannot change then we know that 388 no processor disassembled thus we can skip this stuff 389 */ 390 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 391 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 392 if (mat->was_assembled && !other_disassembled) { 393 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 394 } 395 } 396 397 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 398 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 399 } 400 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 401 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 402 403 if (aij->rowvalues) { 404 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 405 aij->rowvalues = 0; 406 } 407 #if defined(PETSC_HAVE_SUPERLUDIST) 408 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 409 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); } 410 #endif 411 PetscFunctionReturn(0); 412 } 413 414 #undef __FUNCT__ 415 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 416 int MatZeroEntries_MPIAIJ(Mat A) 417 { 418 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 419 int ierr; 420 421 PetscFunctionBegin; 422 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 423 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 424 PetscFunctionReturn(0); 425 } 426 427 #undef __FUNCT__ 428 #define __FUNCT__ "MatZeroRows_MPIAIJ" 429 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag) 430 { 431 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 432 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 433 int *procs,*nprocs,j,idx,nsends,*work,row; 434 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 435 int *rvalues,tag = A->tag,count,base,slen,n,*source; 436 int *lens,imdex,*lrows,*values,rstart=l->rstart; 437 MPI_Comm comm = A->comm; 438 MPI_Request *send_waits,*recv_waits; 439 MPI_Status recv_status,*send_status; 440 IS istmp; 441 PetscTruth found; 442 443 PetscFunctionBegin; 444 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 445 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 446 447 /* first count number of contributors to each processor */ 448 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 449 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 450 procs = nprocs + size; 451 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 452 for (i=0; i<N; i++) { 453 idx = rows[i]; 454 found = PETSC_FALSE; 455 for (j=0; j<size; j++) { 456 if (idx >= owners[j] && idx < owners[j+1]) { 457 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 458 } 459 } 460 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 461 } 462 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 463 464 /* inform other processors of number of messages and max length*/ 465 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 466 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 467 nrecvs = work[size+rank]; 468 nmax = work[rank]; 469 ierr = PetscFree(work);CHKERRQ(ierr); 470 471 /* post receives: */ 472 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 473 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 474 for (i=0; i<nrecvs; i++) { 475 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 476 } 477 478 /* do sends: 479 1) starts[i] gives the starting index in svalues for stuff going to 480 the ith processor 481 */ 482 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 483 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 484 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 485 starts[0] = 0; 486 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 487 for (i=0; i<N; i++) { 488 svalues[starts[owner[i]]++] = rows[i]; 489 } 490 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 491 492 starts[0] = 0; 493 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 494 count = 0; 495 for (i=0; i<size; i++) { 496 if (procs[i]) { 497 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 498 } 499 } 500 ierr = PetscFree(starts);CHKERRQ(ierr); 501 502 base = owners[rank]; 503 504 /* wait on receives */ 505 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 506 source = lens + nrecvs; 507 count = nrecvs; slen = 0; 508 while (count) { 509 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 510 /* unpack receives into our local space */ 511 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 512 source[imdex] = recv_status.MPI_SOURCE; 513 lens[imdex] = n; 514 slen += n; 515 count--; 516 } 517 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 518 519 /* move the data into the send scatter */ 520 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 521 count = 0; 522 for (i=0; i<nrecvs; i++) { 523 values = rvalues + i*nmax; 524 for (j=0; j<lens[i]; j++) { 525 lrows[count++] = values[j] - base; 526 } 527 } 528 ierr = PetscFree(rvalues);CHKERRQ(ierr); 529 ierr = PetscFree(lens);CHKERRQ(ierr); 530 ierr = PetscFree(owner);CHKERRQ(ierr); 531 ierr = PetscFree(nprocs);CHKERRQ(ierr); 532 533 /* actually zap the local rows */ 534 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 535 PetscLogObjectParent(A,istmp); 536 537 /* 538 Zero the required rows. If the "diagonal block" of the matrix 539 is square and the user wishes to set the diagonal we use seperate 540 code so that MatSetValues() is not called for each diagonal allocating 541 new memory, thus calling lots of mallocs and slowing things down. 542 543 Contributed by: Mathew Knepley 544 */ 545 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 546 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 547 if (diag && (l->A->M == l->A->N)) { 548 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 549 } else if (diag) { 550 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 551 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 552 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 553 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 554 } 555 for (i = 0; i < slen; i++) { 556 row = lrows[i] + rstart; 557 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 558 } 559 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 560 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 561 } else { 562 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 563 } 564 ierr = ISDestroy(istmp);CHKERRQ(ierr); 565 ierr = PetscFree(lrows);CHKERRQ(ierr); 566 567 /* wait on sends */ 568 if (nsends) { 569 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 570 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 571 ierr = PetscFree(send_status);CHKERRQ(ierr); 572 } 573 ierr = PetscFree(send_waits);CHKERRQ(ierr); 574 ierr = PetscFree(svalues);CHKERRQ(ierr); 575 576 PetscFunctionReturn(0); 577 } 578 579 #undef __FUNCT__ 580 #define __FUNCT__ "MatMult_MPIAIJ" 581 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 582 { 583 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 584 int ierr,nt; 585 586 PetscFunctionBegin; 587 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 588 if (nt != A->n) { 589 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 590 } 591 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 592 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 593 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 594 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 595 PetscFunctionReturn(0); 596 } 597 598 #undef __FUNCT__ 599 #define __FUNCT__ "MatMultAdd_MPIAIJ" 600 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 601 { 602 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 603 int ierr; 604 605 PetscFunctionBegin; 606 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 607 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 608 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 609 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 610 PetscFunctionReturn(0); 611 } 612 613 #undef __FUNCT__ 614 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 615 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 616 { 617 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 618 int ierr; 619 620 PetscFunctionBegin; 621 /* do nondiagonal part */ 622 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 623 /* send it on its way */ 624 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 625 /* do local part */ 626 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 627 /* receive remote parts: note this assumes the values are not actually */ 628 /* inserted in yy until the next line, which is true for my implementation*/ 629 /* but is not perhaps always true. */ 630 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 631 PetscFunctionReturn(0); 632 } 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 636 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 637 { 638 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 639 int ierr; 640 641 PetscFunctionBegin; 642 /* do nondiagonal part */ 643 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 644 /* send it on its way */ 645 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 646 /* do local part */ 647 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 648 /* receive remote parts: note this assumes the values are not actually */ 649 /* inserted in yy until the next line, which is true for my implementation*/ 650 /* but is not perhaps always true. */ 651 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 652 PetscFunctionReturn(0); 653 } 654 655 /* 656 This only works correctly for square matrices where the subblock A->A is the 657 diagonal block 658 */ 659 #undef __FUNCT__ 660 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 661 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 662 { 663 int ierr; 664 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 665 666 PetscFunctionBegin; 667 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 668 if (a->rstart != a->cstart || a->rend != a->cend) { 669 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 670 } 671 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 672 PetscFunctionReturn(0); 673 } 674 675 #undef __FUNCT__ 676 #define __FUNCT__ "MatScale_MPIAIJ" 677 int MatScale_MPIAIJ(PetscScalar *aa,Mat A) 678 { 679 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 680 int ierr; 681 682 PetscFunctionBegin; 683 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 684 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 685 PetscFunctionReturn(0); 686 } 687 688 #undef __FUNCT__ 689 #define __FUNCT__ "MatDestroy_MPIAIJ" 690 int MatDestroy_MPIAIJ(Mat mat) 691 { 692 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 693 int ierr; 694 695 PetscFunctionBegin; 696 #if defined(PETSC_USE_LOG) 697 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 698 #endif 699 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 700 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 701 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 702 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 703 #if defined (PETSC_USE_CTABLE) 704 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 705 #else 706 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 707 #endif 708 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 709 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 710 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 711 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 712 ierr = PetscFree(aij);CHKERRQ(ierr); 713 PetscFunctionReturn(0); 714 } 715 716 #undef __FUNCT__ 717 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 718 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 719 { 720 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 721 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 722 int ierr,shift = C->indexshift,rank = aij->rank,size = aij->size; 723 PetscTruth isdraw,isascii,flg; 724 PetscViewer sviewer; 725 PetscViewerFormat format; 726 727 PetscFunctionBegin; 728 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 729 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 730 if (isascii) { 731 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 732 if (format == PETSC_VIEWER_ASCII_INFO_LONG) { 733 MatInfo info; 734 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 735 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 736 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 737 if (flg) { 738 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 739 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 740 } else { 741 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 742 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 743 } 744 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 745 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 746 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 747 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 748 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 749 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 750 PetscFunctionReturn(0); 751 } else if (format == PETSC_VIEWER_ASCII_INFO) { 752 PetscFunctionReturn(0); 753 } 754 } else if (isdraw) { 755 PetscDraw draw; 756 PetscTruth isnull; 757 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 758 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 759 } 760 761 if (size == 1) { 762 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 763 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 764 } else { 765 /* assemble the entire matrix onto first processor. */ 766 Mat A; 767 Mat_SeqAIJ *Aloc; 768 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 769 PetscScalar *a; 770 771 if (!rank) { 772 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 773 } else { 774 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 775 } 776 PetscLogObjectParent(mat,A); 777 778 /* copy over the A part */ 779 Aloc = (Mat_SeqAIJ*)aij->A->data; 780 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 781 row = aij->rstart; 782 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 783 for (i=0; i<m; i++) { 784 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 785 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 786 } 787 aj = Aloc->j; 788 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 789 790 /* copy over the B part */ 791 Aloc = (Mat_SeqAIJ*)aij->B->data; 792 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 793 row = aij->rstart; 794 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 795 ct = cols; 796 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 797 for (i=0; i<m; i++) { 798 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 799 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 800 } 801 ierr = PetscFree(ct);CHKERRQ(ierr); 802 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 803 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 804 /* 805 Everyone has to call to draw the matrix since the graphics waits are 806 synchronized across all processors that share the PetscDraw object 807 */ 808 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 809 if (!rank) { 810 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 811 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 812 } 813 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 814 ierr = MatDestroy(A);CHKERRQ(ierr); 815 } 816 PetscFunctionReturn(0); 817 } 818 819 #undef __FUNCT__ 820 #define __FUNCT__ "MatView_MPIAIJ" 821 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 822 { 823 int ierr; 824 PetscTruth isascii,isdraw,issocket,isbinary; 825 826 PetscFunctionBegin; 827 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 828 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 829 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 830 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 831 if (isascii || isdraw || isbinary || issocket) { 832 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 833 } else { 834 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 835 } 836 PetscFunctionReturn(0); 837 } 838 839 840 841 #undef __FUNCT__ 842 #define __FUNCT__ "MatRelax_MPIAIJ" 843 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 844 { 845 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 846 int ierr; 847 Vec bb1; 848 PetscScalar mone=-1.0; 849 850 PetscFunctionBegin; 851 852 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 853 854 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 855 if (flag & SOR_ZERO_INITIAL_GUESS) { 856 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 857 its--; 858 } 859 860 while (its--) { 861 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 862 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 863 864 /* update rhs: bb1 = bb - B*x */ 865 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 866 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 867 868 /* local sweep */ 869 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,PETSC_NULL,xx); 870 CHKERRQ(ierr); 871 } 872 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 873 if (flag & SOR_ZERO_INITIAL_GUESS) { 874 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 875 its--; 876 } 877 while (its--) { 878 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 879 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 880 881 /* update rhs: bb1 = bb - B*x */ 882 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 883 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 884 885 /* local sweep */ 886 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 887 CHKERRQ(ierr); 888 } 889 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 890 if (flag & SOR_ZERO_INITIAL_GUESS) { 891 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 892 its--; 893 } 894 while (its--) { 895 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 896 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 897 898 /* update rhs: bb1 = bb - B*x */ 899 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 900 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 901 902 /* local sweep */ 903 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 904 CHKERRQ(ierr); 905 } 906 } else { 907 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 908 } 909 910 ierr = VecDestroy(bb1);CHKERRQ(ierr); 911 PetscFunctionReturn(0); 912 } 913 914 #undef __FUNCT__ 915 #define __FUNCT__ "MatGetInfo_MPIAIJ" 916 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 917 { 918 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 919 Mat A = mat->A,B = mat->B; 920 int ierr; 921 PetscReal isend[5],irecv[5]; 922 923 PetscFunctionBegin; 924 info->block_size = 1.0; 925 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 926 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 927 isend[3] = info->memory; isend[4] = info->mallocs; 928 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 929 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 930 isend[3] += info->memory; isend[4] += info->mallocs; 931 if (flag == MAT_LOCAL) { 932 info->nz_used = isend[0]; 933 info->nz_allocated = isend[1]; 934 info->nz_unneeded = isend[2]; 935 info->memory = isend[3]; 936 info->mallocs = isend[4]; 937 } else if (flag == MAT_GLOBAL_MAX) { 938 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 939 info->nz_used = irecv[0]; 940 info->nz_allocated = irecv[1]; 941 info->nz_unneeded = irecv[2]; 942 info->memory = irecv[3]; 943 info->mallocs = irecv[4]; 944 } else if (flag == MAT_GLOBAL_SUM) { 945 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 946 info->nz_used = irecv[0]; 947 info->nz_allocated = irecv[1]; 948 info->nz_unneeded = irecv[2]; 949 info->memory = irecv[3]; 950 info->mallocs = irecv[4]; 951 } 952 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 953 info->fill_ratio_needed = 0; 954 info->factor_mallocs = 0; 955 info->rows_global = (double)matin->M; 956 info->columns_global = (double)matin->N; 957 info->rows_local = (double)matin->m; 958 info->columns_local = (double)matin->N; 959 960 PetscFunctionReturn(0); 961 } 962 963 #undef __FUNCT__ 964 #define __FUNCT__ "MatSetOption_MPIAIJ" 965 int MatSetOption_MPIAIJ(Mat A,MatOption op) 966 { 967 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 968 int ierr; 969 970 PetscFunctionBegin; 971 switch (op) { 972 case MAT_NO_NEW_NONZERO_LOCATIONS: 973 case MAT_YES_NEW_NONZERO_LOCATIONS: 974 case MAT_COLUMNS_UNSORTED: 975 case MAT_COLUMNS_SORTED: 976 case MAT_NEW_NONZERO_ALLOCATION_ERR: 977 case MAT_KEEP_ZEROED_ROWS: 978 case MAT_NEW_NONZERO_LOCATION_ERR: 979 case MAT_USE_INODES: 980 case MAT_DO_NOT_USE_INODES: 981 case MAT_IGNORE_ZERO_ENTRIES: 982 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 983 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 984 break; 985 case MAT_ROW_ORIENTED: 986 a->roworiented = PETSC_TRUE; 987 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 988 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 989 break; 990 case MAT_ROWS_SORTED: 991 case MAT_ROWS_UNSORTED: 992 case MAT_YES_NEW_DIAGONALS: 993 case MAT_USE_SINGLE_PRECISION_SOLVES: 994 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 995 break; 996 case MAT_COLUMN_ORIENTED: 997 a->roworiented = PETSC_FALSE; 998 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 999 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1000 break; 1001 case MAT_IGNORE_OFF_PROC_ENTRIES: 1002 a->donotstash = PETSC_TRUE; 1003 break; 1004 case MAT_NO_NEW_DIAGONALS: 1005 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1006 break; 1007 default: 1008 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1009 break; 1010 } 1011 PetscFunctionReturn(0); 1012 } 1013 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "MatGetRow_MPIAIJ" 1016 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1017 { 1018 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1019 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1020 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1021 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1022 int *cmap,*idx_p; 1023 1024 PetscFunctionBegin; 1025 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1026 mat->getrowactive = PETSC_TRUE; 1027 1028 if (!mat->rowvalues && (idx || v)) { 1029 /* 1030 allocate enough space to hold information from the longest row. 1031 */ 1032 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1033 int max = 1,tmp; 1034 for (i=0; i<matin->m; i++) { 1035 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1036 if (max < tmp) { max = tmp; } 1037 } 1038 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1039 mat->rowindices = (int*)(mat->rowvalues + max); 1040 } 1041 1042 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1043 lrow = row - rstart; 1044 1045 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1046 if (!v) {pvA = 0; pvB = 0;} 1047 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1048 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1049 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1050 nztot = nzA + nzB; 1051 1052 cmap = mat->garray; 1053 if (v || idx) { 1054 if (nztot) { 1055 /* Sort by increasing column numbers, assuming A and B already sorted */ 1056 int imark = -1; 1057 if (v) { 1058 *v = v_p = mat->rowvalues; 1059 for (i=0; i<nzB; i++) { 1060 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1061 else break; 1062 } 1063 imark = i; 1064 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1065 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1066 } 1067 if (idx) { 1068 *idx = idx_p = mat->rowindices; 1069 if (imark > -1) { 1070 for (i=0; i<imark; i++) { 1071 idx_p[i] = cmap[cworkB[i]]; 1072 } 1073 } else { 1074 for (i=0; i<nzB; i++) { 1075 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1076 else break; 1077 } 1078 imark = i; 1079 } 1080 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1081 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1082 } 1083 } else { 1084 if (idx) *idx = 0; 1085 if (v) *v = 0; 1086 } 1087 } 1088 *nz = nztot; 1089 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1090 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1091 PetscFunctionReturn(0); 1092 } 1093 1094 #undef __FUNCT__ 1095 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1096 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1097 { 1098 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1099 1100 PetscFunctionBegin; 1101 if (aij->getrowactive == PETSC_FALSE) { 1102 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1103 } 1104 aij->getrowactive = PETSC_FALSE; 1105 PetscFunctionReturn(0); 1106 } 1107 1108 #undef __FUNCT__ 1109 #define __FUNCT__ "MatNorm_MPIAIJ" 1110 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1111 { 1112 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1113 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1114 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1115 PetscReal sum = 0.0; 1116 PetscScalar *v; 1117 1118 PetscFunctionBegin; 1119 if (aij->size == 1) { 1120 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1121 } else { 1122 if (type == NORM_FROBENIUS) { 1123 v = amat->a; 1124 for (i=0; i<amat->nz; i++) { 1125 #if defined(PETSC_USE_COMPLEX) 1126 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1127 #else 1128 sum += (*v)*(*v); v++; 1129 #endif 1130 } 1131 v = bmat->a; 1132 for (i=0; i<bmat->nz; i++) { 1133 #if defined(PETSC_USE_COMPLEX) 1134 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1135 #else 1136 sum += (*v)*(*v); v++; 1137 #endif 1138 } 1139 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1140 *norm = sqrt(*norm); 1141 } else if (type == NORM_1) { /* max column norm */ 1142 PetscReal *tmp,*tmp2; 1143 int *jj,*garray = aij->garray; 1144 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1145 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1146 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1147 *norm = 0.0; 1148 v = amat->a; jj = amat->j; 1149 for (j=0; j<amat->nz; j++) { 1150 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1151 } 1152 v = bmat->a; jj = bmat->j; 1153 for (j=0; j<bmat->nz; j++) { 1154 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1155 } 1156 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1157 for (j=0; j<mat->N; j++) { 1158 if (tmp2[j] > *norm) *norm = tmp2[j]; 1159 } 1160 ierr = PetscFree(tmp);CHKERRQ(ierr); 1161 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1162 } else if (type == NORM_INFINITY) { /* max row norm */ 1163 PetscReal ntemp = 0.0; 1164 for (j=0; j<aij->A->m; j++) { 1165 v = amat->a + amat->i[j] + shift; 1166 sum = 0.0; 1167 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1168 sum += PetscAbsScalar(*v); v++; 1169 } 1170 v = bmat->a + bmat->i[j] + shift; 1171 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1172 sum += PetscAbsScalar(*v); v++; 1173 } 1174 if (sum > ntemp) ntemp = sum; 1175 } 1176 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1177 } else { 1178 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1179 } 1180 } 1181 PetscFunctionReturn(0); 1182 } 1183 1184 #undef __FUNCT__ 1185 #define __FUNCT__ "MatTranspose_MPIAIJ" 1186 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1187 { 1188 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1189 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1190 int ierr,shift = Aloc->indexshift; 1191 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1192 Mat B; 1193 PetscScalar *array; 1194 1195 PetscFunctionBegin; 1196 if (!matout && M != N) { 1197 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1198 } 1199 1200 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1201 1202 /* copy over the A part */ 1203 Aloc = (Mat_SeqAIJ*)a->A->data; 1204 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1205 row = a->rstart; 1206 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1207 for (i=0; i<m; i++) { 1208 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1209 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1210 } 1211 aj = Aloc->j; 1212 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1213 1214 /* copy over the B part */ 1215 Aloc = (Mat_SeqAIJ*)a->B->data; 1216 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1217 row = a->rstart; 1218 ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr); 1219 ct = cols; 1220 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1221 for (i=0; i<m; i++) { 1222 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1223 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1224 } 1225 ierr = PetscFree(ct);CHKERRQ(ierr); 1226 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1227 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1228 if (matout) { 1229 *matout = B; 1230 } else { 1231 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1232 } 1233 PetscFunctionReturn(0); 1234 } 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1238 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1239 { 1240 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1241 Mat a = aij->A,b = aij->B; 1242 int ierr,s1,s2,s3; 1243 1244 PetscFunctionBegin; 1245 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1246 if (rr) { 1247 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1248 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1249 /* Overlap communication with computation. */ 1250 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1251 } 1252 if (ll) { 1253 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1254 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1255 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1256 } 1257 /* scale the diagonal block */ 1258 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1259 1260 if (rr) { 1261 /* Do a scatter end and then right scale the off-diagonal block */ 1262 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1263 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1264 } 1265 1266 PetscFunctionReturn(0); 1267 } 1268 1269 1270 #undef __FUNCT__ 1271 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1272 int MatPrintHelp_MPIAIJ(Mat A) 1273 { 1274 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1275 int ierr; 1276 1277 PetscFunctionBegin; 1278 if (!a->rank) { 1279 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1280 } 1281 PetscFunctionReturn(0); 1282 } 1283 1284 #undef __FUNCT__ 1285 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1286 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1287 { 1288 PetscFunctionBegin; 1289 *bs = 1; 1290 PetscFunctionReturn(0); 1291 } 1292 #undef __FUNCT__ 1293 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1294 int MatSetUnfactored_MPIAIJ(Mat A) 1295 { 1296 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1297 int ierr; 1298 1299 PetscFunctionBegin; 1300 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1301 PetscFunctionReturn(0); 1302 } 1303 1304 #undef __FUNCT__ 1305 #define __FUNCT__ "MatEqual_MPIAIJ" 1306 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1307 { 1308 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1309 Mat a,b,c,d; 1310 PetscTruth flg; 1311 int ierr; 1312 1313 PetscFunctionBegin; 1314 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1315 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1316 a = matA->A; b = matA->B; 1317 c = matB->A; d = matB->B; 1318 1319 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1320 if (flg == PETSC_TRUE) { 1321 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1322 } 1323 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1324 PetscFunctionReturn(0); 1325 } 1326 1327 #undef __FUNCT__ 1328 #define __FUNCT__ "MatCopy_MPIAIJ" 1329 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1330 { 1331 int ierr; 1332 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1333 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1334 PetscTruth flg; 1335 1336 PetscFunctionBegin; 1337 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1338 if (str != SAME_NONZERO_PATTERN || !flg) { 1339 /* because of the column compression in the off-processor part of the matrix a->B, 1340 the number of columns in a->B and b->B may be different, hence we cannot call 1341 the MatCopy() directly on the two parts. If need be, we can provide a more 1342 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1343 then copying the submatrices */ 1344 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1345 } else { 1346 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1347 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1348 } 1349 PetscFunctionReturn(0); 1350 } 1351 1352 #undef __FUNCT__ 1353 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1354 int MatSetUpPreallocation_MPIAIJ(Mat A) 1355 { 1356 int ierr; 1357 1358 PetscFunctionBegin; 1359 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1360 PetscFunctionReturn(0); 1361 } 1362 1363 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1364 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1365 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1366 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1367 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1368 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1369 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*); 1370 #endif 1371 1372 /* -------------------------------------------------------------------*/ 1373 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1374 MatGetRow_MPIAIJ, 1375 MatRestoreRow_MPIAIJ, 1376 MatMult_MPIAIJ, 1377 MatMultAdd_MPIAIJ, 1378 MatMultTranspose_MPIAIJ, 1379 MatMultTransposeAdd_MPIAIJ, 1380 0, 1381 0, 1382 0, 1383 0, 1384 0, 1385 0, 1386 MatRelax_MPIAIJ, 1387 MatTranspose_MPIAIJ, 1388 MatGetInfo_MPIAIJ, 1389 MatEqual_MPIAIJ, 1390 MatGetDiagonal_MPIAIJ, 1391 MatDiagonalScale_MPIAIJ, 1392 MatNorm_MPIAIJ, 1393 MatAssemblyBegin_MPIAIJ, 1394 MatAssemblyEnd_MPIAIJ, 1395 0, 1396 MatSetOption_MPIAIJ, 1397 MatZeroEntries_MPIAIJ, 1398 MatZeroRows_MPIAIJ, 1399 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1400 MatLUFactorSymbolic_MPIAIJ_TFS, 1401 #else 1402 0, 1403 #endif 1404 0, 1405 0, 1406 0, 1407 MatSetUpPreallocation_MPIAIJ, 1408 0, 1409 0, 1410 0, 1411 0, 1412 MatDuplicate_MPIAIJ, 1413 0, 1414 0, 1415 0, 1416 0, 1417 0, 1418 MatGetSubMatrices_MPIAIJ, 1419 MatIncreaseOverlap_MPIAIJ, 1420 MatGetValues_MPIAIJ, 1421 MatCopy_MPIAIJ, 1422 MatPrintHelp_MPIAIJ, 1423 MatScale_MPIAIJ, 1424 0, 1425 0, 1426 0, 1427 MatGetBlockSize_MPIAIJ, 1428 0, 1429 0, 1430 0, 1431 0, 1432 MatFDColoringCreate_MPIAIJ, 1433 0, 1434 MatSetUnfactored_MPIAIJ, 1435 0, 1436 0, 1437 MatGetSubMatrix_MPIAIJ, 1438 MatDestroy_MPIAIJ, 1439 MatView_MPIAIJ, 1440 MatGetPetscMaps_Petsc, 1441 0, 1442 0, 1443 0, 1444 0, 1445 0, 1446 0, 1447 0, 1448 0, 1449 MatSetColoring_MPIAIJ, 1450 MatSetValuesAdic_MPIAIJ, 1451 MatSetValuesAdifor_MPIAIJ 1452 }; 1453 1454 /* ----------------------------------------------------------------------------------------*/ 1455 1456 EXTERN_C_BEGIN 1457 #undef __FUNCT__ 1458 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1459 int MatStoreValues_MPIAIJ(Mat mat) 1460 { 1461 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1462 int ierr; 1463 1464 PetscFunctionBegin; 1465 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1466 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1467 PetscFunctionReturn(0); 1468 } 1469 EXTERN_C_END 1470 1471 EXTERN_C_BEGIN 1472 #undef __FUNCT__ 1473 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1474 int MatRetrieveValues_MPIAIJ(Mat mat) 1475 { 1476 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1477 int ierr; 1478 1479 PetscFunctionBegin; 1480 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1481 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } 1484 EXTERN_C_END 1485 1486 #include "petscpc.h" 1487 EXTERN_C_BEGIN 1488 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1489 EXTERN_C_END 1490 1491 EXTERN_C_BEGIN 1492 #undef __FUNCT__ 1493 #define __FUNCT__ "MatCreate_MPIAIJ" 1494 int MatCreate_MPIAIJ(Mat B) 1495 { 1496 Mat_MPIAIJ *b; 1497 int ierr,i,size; 1498 #if defined(PETSC_HAVE_SUPERLUDIST) 1499 PetscTruth flg; 1500 #endif 1501 1502 PetscFunctionBegin; 1503 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1504 1505 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1506 B->data = (void*)b; 1507 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1508 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1509 B->factor = 0; 1510 B->assembled = PETSC_FALSE; 1511 B->mapping = 0; 1512 1513 B->insertmode = NOT_SET_VALUES; 1514 b->size = size; 1515 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1516 1517 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1518 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1519 1520 /* the information in the maps duplicates the information computed below, eventually 1521 we should remove the duplicate information that is not contained in the maps */ 1522 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1523 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1524 1525 /* build local table of row and column ownerships */ 1526 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1527 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1528 b->cowners = b->rowners + b->size + 2; 1529 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1530 b->rowners[0] = 0; 1531 for (i=2; i<=b->size; i++) { 1532 b->rowners[i] += b->rowners[i-1]; 1533 } 1534 b->rstart = b->rowners[b->rank]; 1535 b->rend = b->rowners[b->rank+1]; 1536 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1537 b->cowners[0] = 0; 1538 for (i=2; i<=b->size; i++) { 1539 b->cowners[i] += b->cowners[i-1]; 1540 } 1541 b->cstart = b->cowners[b->rank]; 1542 b->cend = b->cowners[b->rank+1]; 1543 1544 /* build cache for off array entries formed */ 1545 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1546 b->donotstash = PETSC_FALSE; 1547 b->colmap = 0; 1548 b->garray = 0; 1549 b->roworiented = PETSC_TRUE; 1550 1551 /* stuff used for matrix vector multiply */ 1552 b->lvec = PETSC_NULL; 1553 b->Mvctx = PETSC_NULL; 1554 1555 /* stuff for MatGetRow() */ 1556 b->rowindices = 0; 1557 b->rowvalues = 0; 1558 b->getrowactive = PETSC_FALSE; 1559 1560 /* #if defined(PETSC_HAVE_SUPERLUDIST) 1561 1562 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flg);CHKERRQ(ierr); 1563 if (flg) { ierr = MatUseSuperLU_DIST_MPIAIJ(B);CHKERRQ(ierr); } 1564 #endif */ 1565 1566 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1567 "MatStoreValues_MPIAIJ", 1568 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1569 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1570 "MatRetrieveValues_MPIAIJ", 1571 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1572 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1573 "MatGetDiagonalBlock_MPIAIJ", 1574 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1575 1576 PetscFunctionReturn(0); 1577 } 1578 EXTERN_C_END 1579 1580 #undef __FUNCT__ 1581 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1582 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1583 { 1584 Mat mat; 1585 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1586 int ierr; 1587 1588 PetscFunctionBegin; 1589 *newmat = 0; 1590 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1591 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1592 a = (Mat_MPIAIJ*)mat->data; 1593 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1594 mat->factor = matin->factor; 1595 mat->assembled = PETSC_TRUE; 1596 mat->insertmode = NOT_SET_VALUES; 1597 mat->preallocated = PETSC_TRUE; 1598 1599 a->rstart = oldmat->rstart; 1600 a->rend = oldmat->rend; 1601 a->cstart = oldmat->cstart; 1602 a->cend = oldmat->cend; 1603 a->size = oldmat->size; 1604 a->rank = oldmat->rank; 1605 a->donotstash = oldmat->donotstash; 1606 a->roworiented = oldmat->roworiented; 1607 a->rowindices = 0; 1608 a->rowvalues = 0; 1609 a->getrowactive = PETSC_FALSE; 1610 1611 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1612 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1613 if (oldmat->colmap) { 1614 #if defined (PETSC_USE_CTABLE) 1615 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1616 #else 1617 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1618 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1619 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1620 #endif 1621 } else a->colmap = 0; 1622 if (oldmat->garray) { 1623 int len; 1624 len = oldmat->B->n; 1625 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1626 PetscLogObjectMemory(mat,len*sizeof(int)); 1627 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1628 } else a->garray = 0; 1629 1630 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1631 PetscLogObjectParent(mat,a->lvec); 1632 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1633 PetscLogObjectParent(mat,a->Mvctx); 1634 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1635 PetscLogObjectParent(mat,a->A); 1636 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1637 PetscLogObjectParent(mat,a->B); 1638 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1639 *newmat = mat; 1640 PetscFunctionReturn(0); 1641 } 1642 1643 #include "petscsys.h" 1644 1645 EXTERN_C_BEGIN 1646 #undef __FUNCT__ 1647 #define __FUNCT__ "MatLoad_MPIAIJ" 1648 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1649 { 1650 Mat A; 1651 PetscScalar *vals,*svals; 1652 MPI_Comm comm = ((PetscObject)viewer)->comm; 1653 MPI_Status status; 1654 int i,nz,ierr,j,rstart,rend,fd; 1655 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1656 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1657 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1658 1659 PetscFunctionBegin; 1660 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1661 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1662 if (!rank) { 1663 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1664 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1665 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1666 if (header[3] < 0) { 1667 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1668 } 1669 } 1670 1671 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1672 M = header[1]; N = header[2]; 1673 /* determine ownership of all rows */ 1674 m = M/size + ((M % size) > rank); 1675 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1676 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1677 rowners[0] = 0; 1678 for (i=2; i<=size; i++) { 1679 rowners[i] += rowners[i-1]; 1680 } 1681 rstart = rowners[rank]; 1682 rend = rowners[rank+1]; 1683 1684 /* distribute row lengths to all processors */ 1685 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1686 offlens = ourlens + (rend-rstart); 1687 if (!rank) { 1688 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1689 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1690 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1691 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1692 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1693 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1694 } else { 1695 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1696 } 1697 1698 if (!rank) { 1699 /* calculate the number of nonzeros on each processor */ 1700 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1701 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1702 for (i=0; i<size; i++) { 1703 for (j=rowners[i]; j< rowners[i+1]; j++) { 1704 procsnz[i] += rowlengths[j]; 1705 } 1706 } 1707 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1708 1709 /* determine max buffer needed and allocate it */ 1710 maxnz = 0; 1711 for (i=0; i<size; i++) { 1712 maxnz = PetscMax(maxnz,procsnz[i]); 1713 } 1714 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1715 1716 /* read in my part of the matrix column indices */ 1717 nz = procsnz[0]; 1718 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1719 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1720 1721 /* read in every one elses and ship off */ 1722 for (i=1; i<size; i++) { 1723 nz = procsnz[i]; 1724 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1725 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1726 } 1727 ierr = PetscFree(cols);CHKERRQ(ierr); 1728 } else { 1729 /* determine buffer space needed for message */ 1730 nz = 0; 1731 for (i=0; i<m; i++) { 1732 nz += ourlens[i]; 1733 } 1734 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1735 1736 /* receive message of column indices*/ 1737 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1738 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1739 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1740 } 1741 1742 /* determine column ownership if matrix is not square */ 1743 if (N != M) { 1744 n = N/size + ((N % size) > rank); 1745 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1746 cstart = cend - n; 1747 } else { 1748 cstart = rstart; 1749 cend = rend; 1750 n = cend - cstart; 1751 } 1752 1753 /* loop over local rows, determining number of off diagonal entries */ 1754 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1755 jj = 0; 1756 for (i=0; i<m; i++) { 1757 for (j=0; j<ourlens[i]; j++) { 1758 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1759 jj++; 1760 } 1761 } 1762 1763 /* create our matrix */ 1764 for (i=0; i<m; i++) { 1765 ourlens[i] -= offlens[i]; 1766 } 1767 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1768 A = *newmat; 1769 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1770 for (i=0; i<m; i++) { 1771 ourlens[i] += offlens[i]; 1772 } 1773 1774 if (!rank) { 1775 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1776 1777 /* read in my part of the matrix numerical values */ 1778 nz = procsnz[0]; 1779 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1780 1781 /* insert into matrix */ 1782 jj = rstart; 1783 smycols = mycols; 1784 svals = vals; 1785 for (i=0; i<m; i++) { 1786 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1787 smycols += ourlens[i]; 1788 svals += ourlens[i]; 1789 jj++; 1790 } 1791 1792 /* read in other processors and ship out */ 1793 for (i=1; i<size; i++) { 1794 nz = procsnz[i]; 1795 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1796 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1797 } 1798 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1799 } else { 1800 /* receive numeric values */ 1801 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1802 1803 /* receive message of values*/ 1804 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1805 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1806 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1807 1808 /* insert into matrix */ 1809 jj = rstart; 1810 smycols = mycols; 1811 svals = vals; 1812 for (i=0; i<m; i++) { 1813 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1814 smycols += ourlens[i]; 1815 svals += ourlens[i]; 1816 jj++; 1817 } 1818 } 1819 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1820 ierr = PetscFree(vals);CHKERRQ(ierr); 1821 ierr = PetscFree(mycols);CHKERRQ(ierr); 1822 ierr = PetscFree(rowners);CHKERRQ(ierr); 1823 1824 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1825 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1826 PetscFunctionReturn(0); 1827 } 1828 EXTERN_C_END 1829 1830 #undef __FUNCT__ 1831 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 1832 /* 1833 Not great since it makes two copies of the submatrix, first an SeqAIJ 1834 in local and then by concatenating the local matrices the end result. 1835 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1836 */ 1837 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 1838 { 1839 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1840 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend; 1841 Mat *local,M,Mreuse; 1842 PetscScalar *vwork,*aa; 1843 MPI_Comm comm = mat->comm; 1844 Mat_SeqAIJ *aij; 1845 1846 1847 PetscFunctionBegin; 1848 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1849 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1850 1851 if (call == MAT_REUSE_MATRIX) { 1852 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1853 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 1854 local = &Mreuse; 1855 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1856 } else { 1857 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1858 Mreuse = *local; 1859 ierr = PetscFree(local);CHKERRQ(ierr); 1860 } 1861 1862 /* 1863 m - number of local rows 1864 n - number of columns (same on all processors) 1865 rstart - first row in new global matrix generated 1866 */ 1867 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1868 if (call == MAT_INITIAL_MATRIX) { 1869 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1870 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1871 ii = aij->i; 1872 jj = aij->j; 1873 1874 /* 1875 Determine the number of non-zeros in the diagonal and off-diagonal 1876 portions of the matrix in order to do correct preallocation 1877 */ 1878 1879 /* first get start and end of "diagonal" columns */ 1880 if (csize == PETSC_DECIDE) { 1881 nlocal = n/size + ((n % size) > rank); 1882 } else { 1883 nlocal = csize; 1884 } 1885 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1886 rstart = rend - nlocal; 1887 if (rank == size - 1 && rend != n) { 1888 SETERRQ(1,"Local column sizes do not add up to total number of columns"); 1889 } 1890 1891 /* next, compute all the lengths */ 1892 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 1893 olens = dlens + m; 1894 for (i=0; i<m; i++) { 1895 jend = ii[i+1] - ii[i]; 1896 olen = 0; 1897 dlen = 0; 1898 for (j=0; j<jend; j++) { 1899 if (*jj < rstart || *jj >= rend) olen++; 1900 else dlen++; 1901 jj++; 1902 } 1903 olens[i] = olen; 1904 dlens[i] = dlen; 1905 } 1906 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 1907 ierr = PetscFree(dlens);CHKERRQ(ierr); 1908 } else { 1909 int ml,nl; 1910 1911 M = *newmat; 1912 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1913 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1914 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1915 /* 1916 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1917 rather than the slower MatSetValues(). 1918 */ 1919 M->was_assembled = PETSC_TRUE; 1920 M->assembled = PETSC_FALSE; 1921 } 1922 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1923 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1924 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1925 ii = aij->i; 1926 jj = aij->j; 1927 aa = aij->a; 1928 for (i=0; i<m; i++) { 1929 row = rstart + i; 1930 nz = ii[i+1] - ii[i]; 1931 cwork = jj; jj += nz; 1932 vwork = aa; aa += nz; 1933 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1934 } 1935 1936 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1937 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1938 *newmat = M; 1939 1940 /* save submatrix used in processor for next request */ 1941 if (call == MAT_INITIAL_MATRIX) { 1942 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 1943 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 1944 } 1945 1946 PetscFunctionReturn(0); 1947 } 1948 1949 #undef __FUNCT__ 1950 #define __FUNCT__ "MatMPIAIJSetPreallocation" 1951 /*@C 1952 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 1953 (the default parallel PETSc format). For good matrix assembly performance 1954 the user should preallocate the matrix storage by setting the parameters 1955 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1956 performance can be increased by more than a factor of 50. 1957 1958 Collective on MPI_Comm 1959 1960 Input Parameters: 1961 + A - the matrix 1962 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1963 (same value is used for all local rows) 1964 . d_nnz - array containing the number of nonzeros in the various rows of the 1965 DIAGONAL portion of the local submatrix (possibly different for each row) 1966 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1967 The size of this array is equal to the number of local rows, i.e 'm'. 1968 You must leave room for the diagonal entry even if it is zero. 1969 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 1970 submatrix (same value is used for all local rows). 1971 - o_nnz - array containing the number of nonzeros in the various rows of the 1972 OFF-DIAGONAL portion of the local submatrix (possibly different for 1973 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 1974 structure. The size of this array is equal to the number 1975 of local rows, i.e 'm'. 1976 1977 The AIJ format (also called the Yale sparse matrix format or 1978 compressed row storage), is fully compatible with standard Fortran 77 1979 storage. That is, the stored row and column indices can begin at 1980 either one (as in Fortran) or zero. See the users manual for details. 1981 1982 The user MUST specify either the local or global matrix dimensions 1983 (possibly both). 1984 1985 The parallel matrix is partitioned such that the first m0 rows belong to 1986 process 0, the next m1 rows belong to process 1, the next m2 rows belong 1987 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 1988 1989 The DIAGONAL portion of the local submatrix of a processor can be defined 1990 as the submatrix which is obtained by extraction the part corresponding 1991 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 1992 first row that belongs to the processor, and r2 is the last row belonging 1993 to the this processor. This is a square mxm matrix. The remaining portion 1994 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 1995 1996 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 1997 1998 By default, this format uses inodes (identical nodes) when possible. 1999 We search for consecutive rows with the same nonzero structure, thereby 2000 reusing matrix information to achieve increased efficiency. 2001 2002 Options Database Keys: 2003 + -mat_aij_no_inode - Do not use inodes 2004 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2005 - -mat_aij_oneindex - Internally use indexing starting at 1 2006 rather than 0. Note that when calling MatSetValues(), 2007 the user still MUST index entries starting at 0! 2008 2009 Example usage: 2010 2011 Consider the following 8x8 matrix with 34 non-zero values, that is 2012 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2013 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2014 as follows: 2015 2016 .vb 2017 1 2 0 | 0 3 0 | 0 4 2018 Proc0 0 5 6 | 7 0 0 | 8 0 2019 9 0 10 | 11 0 0 | 12 0 2020 ------------------------------------- 2021 13 0 14 | 15 16 17 | 0 0 2022 Proc1 0 18 0 | 19 20 21 | 0 0 2023 0 0 0 | 22 23 0 | 24 0 2024 ------------------------------------- 2025 Proc2 25 26 27 | 0 0 28 | 29 0 2026 30 0 0 | 31 32 33 | 0 34 2027 .ve 2028 2029 This can be represented as a collection of submatrices as: 2030 2031 .vb 2032 A B C 2033 D E F 2034 G H I 2035 .ve 2036 2037 Where the submatrices A,B,C are owned by proc0, D,E,F are 2038 owned by proc1, G,H,I are owned by proc2. 2039 2040 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2041 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2042 The 'M','N' parameters are 8,8, and have the same values on all procs. 2043 2044 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2045 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2046 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2047 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2048 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2049 matrix, ans [DF] as another SeqAIJ matrix. 2050 2051 When d_nz, o_nz parameters are specified, d_nz storage elements are 2052 allocated for every row of the local diagonal submatrix, and o_nz 2053 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2054 One way to choose d_nz and o_nz is to use the max nonzerors per local 2055 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2056 In this case, the values of d_nz,o_nz are: 2057 .vb 2058 proc0 : dnz = 2, o_nz = 2 2059 proc1 : dnz = 3, o_nz = 2 2060 proc2 : dnz = 1, o_nz = 4 2061 .ve 2062 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2063 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2064 for proc3. i.e we are using 12+15+10=37 storage locations to store 2065 34 values. 2066 2067 When d_nnz, o_nnz parameters are specified, the storage is specified 2068 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2069 In the above case the values for d_nnz,o_nnz are: 2070 .vb 2071 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2072 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2073 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2074 .ve 2075 Here the space allocated is sum of all the above values i.e 34, and 2076 hence pre-allocation is perfect. 2077 2078 Level: intermediate 2079 2080 .keywords: matrix, aij, compressed row, sparse, parallel 2081 2082 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2083 @*/ 2084 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2085 { 2086 Mat_MPIAIJ *b; 2087 int ierr,i; 2088 PetscTruth flg2; 2089 2090 PetscFunctionBegin; 2091 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr); 2092 if (!flg2) PetscFunctionReturn(0); 2093 B->preallocated = PETSC_TRUE; 2094 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2095 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2096 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2097 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2098 if (d_nnz) { 2099 for (i=0; i<B->m; i++) { 2100 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 2101 } 2102 } 2103 if (o_nnz) { 2104 for (i=0; i<B->m; i++) { 2105 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 2106 } 2107 } 2108 b = (Mat_MPIAIJ*)B->data; 2109 2110 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2111 PetscLogObjectParent(B,b->A); 2112 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2113 PetscLogObjectParent(B,b->B); 2114 2115 PetscFunctionReturn(0); 2116 } 2117 2118 #undef __FUNCT__ 2119 #define __FUNCT__ "MatCreateMPIAIJ" 2120 /*@C 2121 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2122 (the default parallel PETSc format). For good matrix assembly performance 2123 the user should preallocate the matrix storage by setting the parameters 2124 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2125 performance can be increased by more than a factor of 50. 2126 2127 Collective on MPI_Comm 2128 2129 Input Parameters: 2130 + comm - MPI communicator 2131 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2132 This value should be the same as the local size used in creating the 2133 y vector for the matrix-vector product y = Ax. 2134 . n - This value should be the same as the local size used in creating the 2135 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2136 calculated if N is given) For square matrices n is almost always m. 2137 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2138 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2139 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2140 (same value is used for all local rows) 2141 . d_nnz - array containing the number of nonzeros in the various rows of the 2142 DIAGONAL portion of the local submatrix (possibly different for each row) 2143 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2144 The size of this array is equal to the number of local rows, i.e 'm'. 2145 You must leave room for the diagonal entry even if it is zero. 2146 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2147 submatrix (same value is used for all local rows). 2148 - o_nnz - array containing the number of nonzeros in the various rows of the 2149 OFF-DIAGONAL portion of the local submatrix (possibly different for 2150 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2151 structure. The size of this array is equal to the number 2152 of local rows, i.e 'm'. 2153 2154 Output Parameter: 2155 . A - the matrix 2156 2157 Notes: 2158 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2159 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2160 storage requirements for this matrix. 2161 2162 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2163 processor than it must be used on all processors that share the object for 2164 that argument. 2165 2166 The AIJ format (also called the Yale sparse matrix format or 2167 compressed row storage), is fully compatible with standard Fortran 77 2168 storage. That is, the stored row and column indices can begin at 2169 either one (as in Fortran) or zero. See the users manual for details. 2170 2171 The user MUST specify either the local or global matrix dimensions 2172 (possibly both). 2173 2174 The parallel matrix is partitioned such that the first m0 rows belong to 2175 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2176 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2177 2178 The DIAGONAL portion of the local submatrix of a processor can be defined 2179 as the submatrix which is obtained by extraction the part corresponding 2180 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2181 first row that belongs to the processor, and r2 is the last row belonging 2182 to the this processor. This is a square mxm matrix. The remaining portion 2183 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2184 2185 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2186 2187 By default, this format uses inodes (identical nodes) when possible. 2188 We search for consecutive rows with the same nonzero structure, thereby 2189 reusing matrix information to achieve increased efficiency. 2190 2191 Options Database Keys: 2192 + -mat_aij_no_inode - Do not use inodes 2193 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2194 - -mat_aij_oneindex - Internally use indexing starting at 1 2195 rather than 0. Note that when calling MatSetValues(), 2196 the user still MUST index entries starting at 0! 2197 2198 2199 Example usage: 2200 2201 Consider the following 8x8 matrix with 34 non-zero values, that is 2202 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2203 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2204 as follows: 2205 2206 .vb 2207 1 2 0 | 0 3 0 | 0 4 2208 Proc0 0 5 6 | 7 0 0 | 8 0 2209 9 0 10 | 11 0 0 | 12 0 2210 ------------------------------------- 2211 13 0 14 | 15 16 17 | 0 0 2212 Proc1 0 18 0 | 19 20 21 | 0 0 2213 0 0 0 | 22 23 0 | 24 0 2214 ------------------------------------- 2215 Proc2 25 26 27 | 0 0 28 | 29 0 2216 30 0 0 | 31 32 33 | 0 34 2217 .ve 2218 2219 This can be represented as a collection of submatrices as: 2220 2221 .vb 2222 A B C 2223 D E F 2224 G H I 2225 .ve 2226 2227 Where the submatrices A,B,C are owned by proc0, D,E,F are 2228 owned by proc1, G,H,I are owned by proc2. 2229 2230 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2231 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2232 The 'M','N' parameters are 8,8, and have the same values on all procs. 2233 2234 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2235 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2236 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2237 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2238 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2239 matrix, ans [DF] as another SeqAIJ matrix. 2240 2241 When d_nz, o_nz parameters are specified, d_nz storage elements are 2242 allocated for every row of the local diagonal submatrix, and o_nz 2243 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2244 One way to choose d_nz and o_nz is to use the max nonzerors per local 2245 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2246 In this case, the values of d_nz,o_nz are: 2247 .vb 2248 proc0 : dnz = 2, o_nz = 2 2249 proc1 : dnz = 3, o_nz = 2 2250 proc2 : dnz = 1, o_nz = 4 2251 .ve 2252 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2253 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2254 for proc3. i.e we are using 12+15+10=37 storage locations to store 2255 34 values. 2256 2257 When d_nnz, o_nnz parameters are specified, the storage is specified 2258 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2259 In the above case the values for d_nnz,o_nnz are: 2260 .vb 2261 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2262 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2263 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2264 .ve 2265 Here the space allocated is sum of all the above values i.e 34, and 2266 hence pre-allocation is perfect. 2267 2268 Level: intermediate 2269 2270 .keywords: matrix, aij, compressed row, sparse, parallel 2271 2272 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2273 @*/ 2274 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2275 { 2276 int ierr,size; 2277 2278 PetscFunctionBegin; 2279 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2280 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2281 if (size > 1) { 2282 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2283 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2284 } else { 2285 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2286 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2287 } 2288 PetscFunctionReturn(0); 2289 } 2290 2291 #undef __FUNCT__ 2292 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2293 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2294 { 2295 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2296 PetscFunctionBegin; 2297 *Ad = a->A; 2298 *Ao = a->B; 2299 *colmap = a->garray; 2300 PetscFunctionReturn(0); 2301 } 2302 2303 #undef __FUNCT__ 2304 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2305 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2306 { 2307 int ierr; 2308 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2309 2310 PetscFunctionBegin; 2311 if (coloring->ctype == IS_COLORING_LOCAL) { 2312 int *allcolors,*colors,i; 2313 ISColoring ocoloring; 2314 2315 /* set coloring for diagonal portion */ 2316 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2317 2318 /* set coloring for off-diagonal portion */ 2319 ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2320 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2321 for (i=0; i<a->B->n; i++) { 2322 colors[i] = allcolors[a->garray[i]]; 2323 } 2324 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2325 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2326 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2327 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2328 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2329 int *colors,i,*larray; 2330 ISColoring ocoloring; 2331 2332 /* set coloring for diagonal portion */ 2333 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2334 for (i=0; i<a->A->n; i++) { 2335 larray[i] = i + a->cstart; 2336 } 2337 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2338 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2339 for (i=0; i<a->A->n; i++) { 2340 colors[i] = coloring->colors[larray[i]]; 2341 } 2342 ierr = PetscFree(larray);CHKERRQ(ierr); 2343 ierr = ISColoringCreate(MPI_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2344 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2345 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2346 2347 /* set coloring for off-diagonal portion */ 2348 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2349 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2350 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2351 for (i=0; i<a->B->n; i++) { 2352 colors[i] = coloring->colors[larray[i]]; 2353 } 2354 ierr = PetscFree(larray);CHKERRQ(ierr); 2355 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2356 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2357 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2358 } else { 2359 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2360 } 2361 2362 PetscFunctionReturn(0); 2363 } 2364 2365 #undef __FUNCT__ 2366 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2367 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2368 { 2369 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2370 int ierr; 2371 2372 PetscFunctionBegin; 2373 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2374 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2375 PetscFunctionReturn(0); 2376 } 2377 2378 #undef __FUNCT__ 2379 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2380 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2381 { 2382 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2383 int ierr; 2384 2385 PetscFunctionBegin; 2386 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2387 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2388 PetscFunctionReturn(0); 2389 } 2390