1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 EXTERN int MatUseSuperLU_DIST_MPIAIJ(Mat); 14 15 /* 16 Local utility routine that creates a mapping from the global column 17 number to the local number in the off-diagonal part of the local 18 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 19 a slightly higher hash table cost; without it it is not scalable (each processor 20 has an order N integer array but is fast to acess. 21 */ 22 #undef __FUNCT__ 23 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 24 int CreateColmap_MPIAIJ_Private(Mat mat) 25 { 26 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 27 int n = aij->B->n,i,ierr; 28 29 PetscFunctionBegin; 30 #if defined (PETSC_USE_CTABLE) 31 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 32 for (i=0; i<n; i++){ 33 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 34 } 35 #else 36 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 37 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 38 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 39 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 40 #endif 41 PetscFunctionReturn(0); 42 } 43 44 #define CHUNKSIZE 15 45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 46 { \ 47 \ 48 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 49 rmax = aimax[row]; nrow = ailen[row]; \ 50 col1 = col - shift; \ 51 \ 52 low = 0; high = nrow; \ 53 while (high-low > 5) { \ 54 t = (low+high)/2; \ 55 if (rp[t] > col) high = t; \ 56 else low = t; \ 57 } \ 58 for (_i=low; _i<high; _i++) { \ 59 if (rp[_i] > col1) break; \ 60 if (rp[_i] == col1) { \ 61 if (addv == ADD_VALUES) ap[_i] += value; \ 62 else ap[_i] = value; \ 63 goto a_noinsert; \ 64 } \ 65 } \ 66 if (nonew == 1) goto a_noinsert; \ 67 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 68 if (nrow >= rmax) { \ 69 /* there is no extra room in row, therefore enlarge */ \ 70 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 71 PetscScalar *new_a; \ 72 \ 73 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 74 \ 75 /* malloc new storage space */ \ 76 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 77 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 78 new_j = (int*)(new_a + new_nz); \ 79 new_i = new_j + new_nz; \ 80 \ 81 /* copy over old data into new slots */ \ 82 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 83 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 84 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 85 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 86 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 87 len*sizeof(int));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 89 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 90 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 91 /* free up old matrix storage */ \ 92 \ 93 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 94 if (!a->singlemalloc) { \ 95 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 96 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 97 } \ 98 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 99 a->singlemalloc = PETSC_TRUE; \ 100 \ 101 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 102 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 103 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 104 a->maxnz += CHUNKSIZE; \ 105 a->reallocs++; \ 106 } \ 107 N = nrow++ - 1; a->nz++; \ 108 /* shift up all the later entries in this row */ \ 109 for (ii=N; ii>=_i; ii--) { \ 110 rp[ii+1] = rp[ii]; \ 111 ap[ii+1] = ap[ii]; \ 112 } \ 113 rp[_i] = col1; \ 114 ap[_i] = value; \ 115 a_noinsert: ; \ 116 ailen[row] = nrow; \ 117 } 118 119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 120 { \ 121 \ 122 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 123 rmax = bimax[row]; nrow = bilen[row]; \ 124 col1 = col - shift; \ 125 \ 126 low = 0; high = nrow; \ 127 while (high-low > 5) { \ 128 t = (low+high)/2; \ 129 if (rp[t] > col) high = t; \ 130 else low = t; \ 131 } \ 132 for (_i=low; _i<high; _i++) { \ 133 if (rp[_i] > col1) break; \ 134 if (rp[_i] == col1) { \ 135 if (addv == ADD_VALUES) ap[_i] += value; \ 136 else ap[_i] = value; \ 137 goto b_noinsert; \ 138 } \ 139 } \ 140 if (nonew == 1) goto b_noinsert; \ 141 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 142 if (nrow >= rmax) { \ 143 /* there is no extra room in row, therefore enlarge */ \ 144 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 145 PetscScalar *new_a; \ 146 \ 147 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 148 \ 149 /* malloc new storage space */ \ 150 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 151 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 152 new_j = (int*)(new_a + new_nz); \ 153 new_i = new_j + new_nz; \ 154 \ 155 /* copy over old data into new slots */ \ 156 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 157 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 158 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 159 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 160 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 161 len*sizeof(int));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 163 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 164 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 165 /* free up old matrix storage */ \ 166 \ 167 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 168 if (!b->singlemalloc) { \ 169 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 170 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 171 } \ 172 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 173 b->singlemalloc = PETSC_TRUE; \ 174 \ 175 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 176 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 177 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 178 b->maxnz += CHUNKSIZE; \ 179 b->reallocs++; \ 180 } \ 181 N = nrow++ - 1; b->nz++; \ 182 /* shift up all the later entries in this row */ \ 183 for (ii=N; ii>=_i; ii--) { \ 184 rp[ii+1] = rp[ii]; \ 185 ap[ii+1] = ap[ii]; \ 186 } \ 187 rp[_i] = col1; \ 188 ap[_i] = value; \ 189 b_noinsert: ; \ 190 bilen[row] = nrow; \ 191 } 192 193 #undef __FUNCT__ 194 #define __FUNCT__ "MatSetValues_MPIAIJ" 195 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 196 { 197 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 198 PetscScalar value; 199 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 200 int cstart = aij->cstart,cend = aij->cend,row,col; 201 PetscTruth roworiented = aij->roworiented; 202 203 /* Some Variables required in the macro */ 204 Mat A = aij->A; 205 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 206 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 207 PetscScalar *aa = a->a; 208 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 209 Mat B = aij->B; 210 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 211 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 212 PetscScalar *ba = b->a; 213 214 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 215 int nonew = a->nonew,shift = a->indexshift; 216 PetscScalar *ap; 217 218 PetscFunctionBegin; 219 for (i=0; i<m; i++) { 220 if (im[i] < 0) continue; 221 #if defined(PETSC_USE_BOPT_g) 222 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 223 #endif 224 if (im[i] >= rstart && im[i] < rend) { 225 row = im[i] - rstart; 226 for (j=0; j<n; j++) { 227 if (in[j] >= cstart && in[j] < cend){ 228 col = in[j] - cstart; 229 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 230 if (ignorezeroentries && value == 0.0) continue; 231 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 232 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 233 } else if (in[j] < 0) continue; 234 #if defined(PETSC_USE_BOPT_g) 235 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 236 #endif 237 else { 238 if (mat->was_assembled) { 239 if (!aij->colmap) { 240 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 241 } 242 #if defined (PETSC_USE_CTABLE) 243 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 244 col--; 245 #else 246 col = aij->colmap[in[j]] - 1; 247 #endif 248 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 249 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 250 col = in[j]; 251 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 252 B = aij->B; 253 b = (Mat_SeqAIJ*)B->data; 254 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 255 ba = b->a; 256 } 257 } else col = in[j]; 258 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 259 if (ignorezeroentries && value == 0.0) continue; 260 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 261 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 262 } 263 } 264 } else { 265 if (!aij->donotstash) { 266 if (roworiented) { 267 if (ignorezeroentries && v[i*n] == 0.0) continue; 268 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 269 } else { 270 if (ignorezeroentries && v[i] == 0.0) continue; 271 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 272 } 273 } 274 } 275 } 276 PetscFunctionReturn(0); 277 } 278 279 #undef __FUNCT__ 280 #define __FUNCT__ "MatGetValues_MPIAIJ" 281 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 282 { 283 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 284 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 285 int cstart = aij->cstart,cend = aij->cend,row,col; 286 287 PetscFunctionBegin; 288 for (i=0; i<m; i++) { 289 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 290 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 291 if (idxm[i] >= rstart && idxm[i] < rend) { 292 row = idxm[i] - rstart; 293 for (j=0; j<n; j++) { 294 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 295 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 296 if (idxn[j] >= cstart && idxn[j] < cend){ 297 col = idxn[j] - cstart; 298 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 299 } else { 300 if (!aij->colmap) { 301 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 302 } 303 #if defined (PETSC_USE_CTABLE) 304 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 305 col --; 306 #else 307 col = aij->colmap[idxn[j]] - 1; 308 #endif 309 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 310 else { 311 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 312 } 313 } 314 } 315 } else { 316 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 317 } 318 } 319 PetscFunctionReturn(0); 320 } 321 322 #undef __FUNCT__ 323 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 324 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 325 { 326 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 327 int ierr,nstash,reallocs; 328 InsertMode addv; 329 330 PetscFunctionBegin; 331 if (aij->donotstash) { 332 PetscFunctionReturn(0); 333 } 334 335 /* make sure all processors are either in INSERTMODE or ADDMODE */ 336 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 337 if (addv == (ADD_VALUES|INSERT_VALUES)) { 338 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 339 } 340 mat->insertmode = addv; /* in case this processor had no cache */ 341 342 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 343 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 344 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 345 PetscFunctionReturn(0); 346 } 347 348 349 #undef __FUNCT__ 350 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 351 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 352 { 353 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 354 int i,j,rstart,ncols,n,ierr,flg; 355 int *row,*col,other_disassembled; 356 PetscScalar *val; 357 InsertMode addv = mat->insertmode; 358 #if defined(PETSC_HAVE_SUPERLUDIST) 359 PetscTruth flag; 360 #endif 361 362 PetscFunctionBegin; 363 if (!aij->donotstash) { 364 while (1) { 365 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 366 if (!flg) break; 367 368 for (i=0; i<n;) { 369 /* Now identify the consecutive vals belonging to the same row */ 370 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 371 if (j < n) ncols = j-i; 372 else ncols = n-i; 373 /* Now assemble all these values with a single function call */ 374 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 375 i = j; 376 } 377 } 378 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 379 } 380 381 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 382 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 383 384 /* determine if any processor has disassembled, if so we must 385 also disassemble ourselfs, in order that we may reassemble. */ 386 /* 387 if nonzero structure of submatrix B cannot change then we know that 388 no processor disassembled thus we can skip this stuff 389 */ 390 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 391 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 392 if (mat->was_assembled && !other_disassembled) { 393 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 394 } 395 } 396 397 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 398 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 399 } 400 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 401 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 402 403 if (aij->rowvalues) { 404 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 405 aij->rowvalues = 0; 406 } 407 #if defined(PETSC_HAVE_SUPERLUDIST) 408 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 409 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); } 410 #endif 411 PetscFunctionReturn(0); 412 } 413 414 #undef __FUNCT__ 415 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 416 int MatZeroEntries_MPIAIJ(Mat A) 417 { 418 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 419 int ierr; 420 421 PetscFunctionBegin; 422 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 423 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 424 PetscFunctionReturn(0); 425 } 426 427 #undef __FUNCT__ 428 #define __FUNCT__ "MatZeroRows_MPIAIJ" 429 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag) 430 { 431 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 432 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 433 int *procs,*nprocs,j,idx,nsends,*work,row; 434 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 435 int *rvalues,tag = A->tag,count,base,slen,n,*source; 436 int *lens,imdex,*lrows,*values,rstart=l->rstart; 437 MPI_Comm comm = A->comm; 438 MPI_Request *send_waits,*recv_waits; 439 MPI_Status recv_status,*send_status; 440 IS istmp; 441 PetscTruth found; 442 443 PetscFunctionBegin; 444 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 445 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 446 447 /* first count number of contributors to each processor */ 448 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 449 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 450 procs = nprocs + size; 451 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 452 for (i=0; i<N; i++) { 453 idx = rows[i]; 454 found = PETSC_FALSE; 455 for (j=0; j<size; j++) { 456 if (idx >= owners[j] && idx < owners[j+1]) { 457 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 458 } 459 } 460 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 461 } 462 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 463 464 /* inform other processors of number of messages and max length*/ 465 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 466 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 467 nrecvs = work[size+rank]; 468 nmax = work[rank]; 469 ierr = PetscFree(work);CHKERRQ(ierr); 470 471 /* post receives: */ 472 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 473 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 474 for (i=0; i<nrecvs; i++) { 475 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 476 } 477 478 /* do sends: 479 1) starts[i] gives the starting index in svalues for stuff going to 480 the ith processor 481 */ 482 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 483 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 484 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 485 starts[0] = 0; 486 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 487 for (i=0; i<N; i++) { 488 svalues[starts[owner[i]]++] = rows[i]; 489 } 490 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 491 492 starts[0] = 0; 493 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 494 count = 0; 495 for (i=0; i<size; i++) { 496 if (procs[i]) { 497 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 498 } 499 } 500 ierr = PetscFree(starts);CHKERRQ(ierr); 501 502 base = owners[rank]; 503 504 /* wait on receives */ 505 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 506 source = lens + nrecvs; 507 count = nrecvs; slen = 0; 508 while (count) { 509 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 510 /* unpack receives into our local space */ 511 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 512 source[imdex] = recv_status.MPI_SOURCE; 513 lens[imdex] = n; 514 slen += n; 515 count--; 516 } 517 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 518 519 /* move the data into the send scatter */ 520 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 521 count = 0; 522 for (i=0; i<nrecvs; i++) { 523 values = rvalues + i*nmax; 524 for (j=0; j<lens[i]; j++) { 525 lrows[count++] = values[j] - base; 526 } 527 } 528 ierr = PetscFree(rvalues);CHKERRQ(ierr); 529 ierr = PetscFree(lens);CHKERRQ(ierr); 530 ierr = PetscFree(owner);CHKERRQ(ierr); 531 ierr = PetscFree(nprocs);CHKERRQ(ierr); 532 533 /* actually zap the local rows */ 534 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 535 PetscLogObjectParent(A,istmp); 536 537 /* 538 Zero the required rows. If the "diagonal block" of the matrix 539 is square and the user wishes to set the diagonal we use seperate 540 code so that MatSetValues() is not called for each diagonal allocating 541 new memory, thus calling lots of mallocs and slowing things down. 542 543 Contributed by: Mathew Knepley 544 */ 545 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 546 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 547 if (diag && (l->A->M == l->A->N)) { 548 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 549 } else if (diag) { 550 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 551 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 552 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 553 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 554 } 555 for (i = 0; i < slen; i++) { 556 row = lrows[i] + rstart; 557 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 558 } 559 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 560 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 561 } else { 562 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 563 } 564 ierr = ISDestroy(istmp);CHKERRQ(ierr); 565 ierr = PetscFree(lrows);CHKERRQ(ierr); 566 567 /* wait on sends */ 568 if (nsends) { 569 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 570 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 571 ierr = PetscFree(send_status);CHKERRQ(ierr); 572 } 573 ierr = PetscFree(send_waits);CHKERRQ(ierr); 574 ierr = PetscFree(svalues);CHKERRQ(ierr); 575 576 PetscFunctionReturn(0); 577 } 578 579 #undef __FUNCT__ 580 #define __FUNCT__ "MatMult_MPIAIJ" 581 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 582 { 583 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 584 int ierr,nt; 585 586 PetscFunctionBegin; 587 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 588 if (nt != A->n) { 589 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 590 } 591 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 592 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 593 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 594 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 595 PetscFunctionReturn(0); 596 } 597 598 #undef __FUNCT__ 599 #define __FUNCT__ "MatMultAdd_MPIAIJ" 600 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 601 { 602 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 603 int ierr; 604 605 PetscFunctionBegin; 606 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 607 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 608 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 609 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 610 PetscFunctionReturn(0); 611 } 612 613 #undef __FUNCT__ 614 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 615 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 616 { 617 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 618 int ierr; 619 620 PetscFunctionBegin; 621 /* do nondiagonal part */ 622 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 623 /* send it on its way */ 624 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 625 /* do local part */ 626 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 627 /* receive remote parts: note this assumes the values are not actually */ 628 /* inserted in yy until the next line, which is true for my implementation*/ 629 /* but is not perhaps always true. */ 630 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 631 PetscFunctionReturn(0); 632 } 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 636 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 637 { 638 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 639 int ierr; 640 641 PetscFunctionBegin; 642 /* do nondiagonal part */ 643 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 644 /* send it on its way */ 645 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 646 /* do local part */ 647 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 648 /* receive remote parts: note this assumes the values are not actually */ 649 /* inserted in yy until the next line, which is true for my implementation*/ 650 /* but is not perhaps always true. */ 651 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 652 PetscFunctionReturn(0); 653 } 654 655 /* 656 This only works correctly for square matrices where the subblock A->A is the 657 diagonal block 658 */ 659 #undef __FUNCT__ 660 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 661 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 662 { 663 int ierr; 664 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 665 666 PetscFunctionBegin; 667 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 668 if (a->rstart != a->cstart || a->rend != a->cend) { 669 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 670 } 671 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 672 PetscFunctionReturn(0); 673 } 674 675 #undef __FUNCT__ 676 #define __FUNCT__ "MatScale_MPIAIJ" 677 int MatScale_MPIAIJ(PetscScalar *aa,Mat A) 678 { 679 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 680 int ierr; 681 682 PetscFunctionBegin; 683 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 684 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 685 PetscFunctionReturn(0); 686 } 687 688 #undef __FUNCT__ 689 #define __FUNCT__ "MatDestroy_MPIAIJ" 690 int MatDestroy_MPIAIJ(Mat mat) 691 { 692 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 693 int ierr; 694 695 PetscFunctionBegin; 696 #if defined(PETSC_USE_LOG) 697 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 698 #endif 699 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 700 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 701 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 702 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 703 #if defined (PETSC_USE_CTABLE) 704 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 705 #else 706 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 707 #endif 708 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 709 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 710 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 711 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 712 ierr = PetscFree(aij);CHKERRQ(ierr); 713 PetscFunctionReturn(0); 714 } 715 716 #undef __FUNCT__ 717 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 718 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 719 { 720 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 721 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 722 int ierr,shift = C->indexshift,rank = aij->rank,size = aij->size; 723 PetscTruth isdraw,isascii,flg; 724 PetscViewer sviewer; 725 PetscViewerFormat format; 726 727 PetscFunctionBegin; 728 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 729 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 730 if (isascii) { 731 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 732 if (format == PETSC_VIEWER_ASCII_INFO_LONG) { 733 MatInfo info; 734 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 735 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 736 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 737 if (flg) { 738 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 739 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 740 } else { 741 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 742 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 743 } 744 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 745 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 746 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 747 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 748 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 749 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 750 PetscFunctionReturn(0); 751 } else if (format == PETSC_VIEWER_ASCII_INFO) { 752 PetscFunctionReturn(0); 753 } 754 } else if (isdraw) { 755 PetscDraw draw; 756 PetscTruth isnull; 757 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 758 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 759 } 760 761 if (size == 1) { 762 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 763 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 764 } else { 765 /* assemble the entire matrix onto first processor. */ 766 Mat A; 767 Mat_SeqAIJ *Aloc; 768 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 769 PetscScalar *a; 770 771 if (!rank) { 772 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 773 } else { 774 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 775 } 776 PetscLogObjectParent(mat,A); 777 778 /* copy over the A part */ 779 Aloc = (Mat_SeqAIJ*)aij->A->data; 780 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 781 row = aij->rstart; 782 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 783 for (i=0; i<m; i++) { 784 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 785 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 786 } 787 aj = Aloc->j; 788 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 789 790 /* copy over the B part */ 791 Aloc = (Mat_SeqAIJ*)aij->B->data; 792 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 793 row = aij->rstart; 794 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 795 ct = cols; 796 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 797 for (i=0; i<m; i++) { 798 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 799 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 800 } 801 ierr = PetscFree(ct);CHKERRQ(ierr); 802 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 803 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 804 /* 805 Everyone has to call to draw the matrix since the graphics waits are 806 synchronized across all processors that share the PetscDraw object 807 */ 808 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 809 if (!rank) { 810 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 811 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 812 } 813 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 814 ierr = MatDestroy(A);CHKERRQ(ierr); 815 } 816 PetscFunctionReturn(0); 817 } 818 819 #undef __FUNCT__ 820 #define __FUNCT__ "MatView_MPIAIJ" 821 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 822 { 823 int ierr; 824 PetscTruth isascii,isdraw,issocket,isbinary; 825 826 PetscFunctionBegin; 827 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 828 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 829 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 830 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 831 if (isascii || isdraw || isbinary || issocket) { 832 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 833 } else { 834 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 835 } 836 PetscFunctionReturn(0); 837 } 838 839 840 841 #undef __FUNCT__ 842 #define __FUNCT__ "MatRelax_MPIAIJ" 843 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 844 { 845 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 846 int ierr; 847 Vec bb1; 848 PetscScalar mone=-1.0; 849 850 PetscFunctionBegin; 851 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 852 853 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 854 855 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 856 if (flag & SOR_ZERO_INITIAL_GUESS) { 857 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 858 its--; 859 } 860 861 while (its--) { 862 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 863 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 864 865 /* update rhs: bb1 = bb - B*x */ 866 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 867 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 868 869 /* local sweep */ 870 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,PETSC_NULL,xx); 871 CHKERRQ(ierr); 872 } 873 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 874 if (flag & SOR_ZERO_INITIAL_GUESS) { 875 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 876 its--; 877 } 878 while (its--) { 879 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 880 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 881 882 /* update rhs: bb1 = bb - B*x */ 883 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 884 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 885 886 /* local sweep */ 887 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 888 CHKERRQ(ierr); 889 } 890 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 891 if (flag & SOR_ZERO_INITIAL_GUESS) { 892 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 893 its--; 894 } 895 while (its--) { 896 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 897 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 898 899 /* update rhs: bb1 = bb - B*x */ 900 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 901 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 902 903 /* local sweep */ 904 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 905 CHKERRQ(ierr); 906 } 907 } else { 908 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 909 } 910 911 ierr = VecDestroy(bb1);CHKERRQ(ierr); 912 PetscFunctionReturn(0); 913 } 914 915 #undef __FUNCT__ 916 #define __FUNCT__ "MatGetInfo_MPIAIJ" 917 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 918 { 919 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 920 Mat A = mat->A,B = mat->B; 921 int ierr; 922 PetscReal isend[5],irecv[5]; 923 924 PetscFunctionBegin; 925 info->block_size = 1.0; 926 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 927 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 928 isend[3] = info->memory; isend[4] = info->mallocs; 929 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 930 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 931 isend[3] += info->memory; isend[4] += info->mallocs; 932 if (flag == MAT_LOCAL) { 933 info->nz_used = isend[0]; 934 info->nz_allocated = isend[1]; 935 info->nz_unneeded = isend[2]; 936 info->memory = isend[3]; 937 info->mallocs = isend[4]; 938 } else if (flag == MAT_GLOBAL_MAX) { 939 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 940 info->nz_used = irecv[0]; 941 info->nz_allocated = irecv[1]; 942 info->nz_unneeded = irecv[2]; 943 info->memory = irecv[3]; 944 info->mallocs = irecv[4]; 945 } else if (flag == MAT_GLOBAL_SUM) { 946 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 947 info->nz_used = irecv[0]; 948 info->nz_allocated = irecv[1]; 949 info->nz_unneeded = irecv[2]; 950 info->memory = irecv[3]; 951 info->mallocs = irecv[4]; 952 } 953 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 954 info->fill_ratio_needed = 0; 955 info->factor_mallocs = 0; 956 info->rows_global = (double)matin->M; 957 info->columns_global = (double)matin->N; 958 info->rows_local = (double)matin->m; 959 info->columns_local = (double)matin->N; 960 961 PetscFunctionReturn(0); 962 } 963 964 #undef __FUNCT__ 965 #define __FUNCT__ "MatSetOption_MPIAIJ" 966 int MatSetOption_MPIAIJ(Mat A,MatOption op) 967 { 968 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 969 int ierr; 970 971 PetscFunctionBegin; 972 switch (op) { 973 case MAT_NO_NEW_NONZERO_LOCATIONS: 974 case MAT_YES_NEW_NONZERO_LOCATIONS: 975 case MAT_COLUMNS_UNSORTED: 976 case MAT_COLUMNS_SORTED: 977 case MAT_NEW_NONZERO_ALLOCATION_ERR: 978 case MAT_KEEP_ZEROED_ROWS: 979 case MAT_NEW_NONZERO_LOCATION_ERR: 980 case MAT_USE_INODES: 981 case MAT_DO_NOT_USE_INODES: 982 case MAT_IGNORE_ZERO_ENTRIES: 983 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 984 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 985 break; 986 case MAT_ROW_ORIENTED: 987 a->roworiented = PETSC_TRUE; 988 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 989 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 990 break; 991 case MAT_ROWS_SORTED: 992 case MAT_ROWS_UNSORTED: 993 case MAT_YES_NEW_DIAGONALS: 994 case MAT_USE_SINGLE_PRECISION_SOLVES: 995 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 996 break; 997 case MAT_COLUMN_ORIENTED: 998 a->roworiented = PETSC_FALSE; 999 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1000 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1001 break; 1002 case MAT_IGNORE_OFF_PROC_ENTRIES: 1003 a->donotstash = PETSC_TRUE; 1004 break; 1005 case MAT_NO_NEW_DIAGONALS: 1006 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1007 break; 1008 default: 1009 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1010 break; 1011 } 1012 PetscFunctionReturn(0); 1013 } 1014 1015 #undef __FUNCT__ 1016 #define __FUNCT__ "MatGetRow_MPIAIJ" 1017 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1018 { 1019 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1020 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1021 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1022 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1023 int *cmap,*idx_p; 1024 1025 PetscFunctionBegin; 1026 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1027 mat->getrowactive = PETSC_TRUE; 1028 1029 if (!mat->rowvalues && (idx || v)) { 1030 /* 1031 allocate enough space to hold information from the longest row. 1032 */ 1033 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1034 int max = 1,tmp; 1035 for (i=0; i<matin->m; i++) { 1036 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1037 if (max < tmp) { max = tmp; } 1038 } 1039 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1040 mat->rowindices = (int*)(mat->rowvalues + max); 1041 } 1042 1043 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1044 lrow = row - rstart; 1045 1046 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1047 if (!v) {pvA = 0; pvB = 0;} 1048 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1049 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1050 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1051 nztot = nzA + nzB; 1052 1053 cmap = mat->garray; 1054 if (v || idx) { 1055 if (nztot) { 1056 /* Sort by increasing column numbers, assuming A and B already sorted */ 1057 int imark = -1; 1058 if (v) { 1059 *v = v_p = mat->rowvalues; 1060 for (i=0; i<nzB; i++) { 1061 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1062 else break; 1063 } 1064 imark = i; 1065 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1066 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1067 } 1068 if (idx) { 1069 *idx = idx_p = mat->rowindices; 1070 if (imark > -1) { 1071 for (i=0; i<imark; i++) { 1072 idx_p[i] = cmap[cworkB[i]]; 1073 } 1074 } else { 1075 for (i=0; i<nzB; i++) { 1076 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1077 else break; 1078 } 1079 imark = i; 1080 } 1081 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1082 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1083 } 1084 } else { 1085 if (idx) *idx = 0; 1086 if (v) *v = 0; 1087 } 1088 } 1089 *nz = nztot; 1090 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1091 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1092 PetscFunctionReturn(0); 1093 } 1094 1095 #undef __FUNCT__ 1096 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1097 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1098 { 1099 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1100 1101 PetscFunctionBegin; 1102 if (aij->getrowactive == PETSC_FALSE) { 1103 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1104 } 1105 aij->getrowactive = PETSC_FALSE; 1106 PetscFunctionReturn(0); 1107 } 1108 1109 #undef __FUNCT__ 1110 #define __FUNCT__ "MatNorm_MPIAIJ" 1111 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1112 { 1113 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1114 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1115 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1116 PetscReal sum = 0.0; 1117 PetscScalar *v; 1118 1119 PetscFunctionBegin; 1120 if (aij->size == 1) { 1121 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1122 } else { 1123 if (type == NORM_FROBENIUS) { 1124 v = amat->a; 1125 for (i=0; i<amat->nz; i++) { 1126 #if defined(PETSC_USE_COMPLEX) 1127 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1128 #else 1129 sum += (*v)*(*v); v++; 1130 #endif 1131 } 1132 v = bmat->a; 1133 for (i=0; i<bmat->nz; i++) { 1134 #if defined(PETSC_USE_COMPLEX) 1135 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1136 #else 1137 sum += (*v)*(*v); v++; 1138 #endif 1139 } 1140 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1141 *norm = sqrt(*norm); 1142 } else if (type == NORM_1) { /* max column norm */ 1143 PetscReal *tmp,*tmp2; 1144 int *jj,*garray = aij->garray; 1145 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1146 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1147 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1148 *norm = 0.0; 1149 v = amat->a; jj = amat->j; 1150 for (j=0; j<amat->nz; j++) { 1151 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1152 } 1153 v = bmat->a; jj = bmat->j; 1154 for (j=0; j<bmat->nz; j++) { 1155 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1156 } 1157 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1158 for (j=0; j<mat->N; j++) { 1159 if (tmp2[j] > *norm) *norm = tmp2[j]; 1160 } 1161 ierr = PetscFree(tmp);CHKERRQ(ierr); 1162 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1163 } else if (type == NORM_INFINITY) { /* max row norm */ 1164 PetscReal ntemp = 0.0; 1165 for (j=0; j<aij->A->m; j++) { 1166 v = amat->a + amat->i[j] + shift; 1167 sum = 0.0; 1168 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1169 sum += PetscAbsScalar(*v); v++; 1170 } 1171 v = bmat->a + bmat->i[j] + shift; 1172 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1173 sum += PetscAbsScalar(*v); v++; 1174 } 1175 if (sum > ntemp) ntemp = sum; 1176 } 1177 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1178 } else { 1179 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1180 } 1181 } 1182 PetscFunctionReturn(0); 1183 } 1184 1185 #undef __FUNCT__ 1186 #define __FUNCT__ "MatTranspose_MPIAIJ" 1187 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1188 { 1189 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1190 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1191 int ierr,shift = Aloc->indexshift; 1192 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1193 Mat B; 1194 PetscScalar *array; 1195 1196 PetscFunctionBegin; 1197 if (!matout && M != N) { 1198 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1199 } 1200 1201 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1202 1203 /* copy over the A part */ 1204 Aloc = (Mat_SeqAIJ*)a->A->data; 1205 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1206 row = a->rstart; 1207 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1208 for (i=0; i<m; i++) { 1209 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1210 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1211 } 1212 aj = Aloc->j; 1213 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1214 1215 /* copy over the B part */ 1216 Aloc = (Mat_SeqAIJ*)a->B->data; 1217 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1218 row = a->rstart; 1219 ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr); 1220 ct = cols; 1221 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1222 for (i=0; i<m; i++) { 1223 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1224 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1225 } 1226 ierr = PetscFree(ct);CHKERRQ(ierr); 1227 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1228 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1229 if (matout) { 1230 *matout = B; 1231 } else { 1232 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1233 } 1234 PetscFunctionReturn(0); 1235 } 1236 1237 #undef __FUNCT__ 1238 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1239 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1240 { 1241 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1242 Mat a = aij->A,b = aij->B; 1243 int ierr,s1,s2,s3; 1244 1245 PetscFunctionBegin; 1246 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1247 if (rr) { 1248 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1249 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1250 /* Overlap communication with computation. */ 1251 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1252 } 1253 if (ll) { 1254 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1255 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1256 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1257 } 1258 /* scale the diagonal block */ 1259 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1260 1261 if (rr) { 1262 /* Do a scatter end and then right scale the off-diagonal block */ 1263 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1264 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1265 } 1266 1267 PetscFunctionReturn(0); 1268 } 1269 1270 1271 #undef __FUNCT__ 1272 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1273 int MatPrintHelp_MPIAIJ(Mat A) 1274 { 1275 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1276 int ierr; 1277 1278 PetscFunctionBegin; 1279 if (!a->rank) { 1280 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1281 } 1282 PetscFunctionReturn(0); 1283 } 1284 1285 #undef __FUNCT__ 1286 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1287 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1288 { 1289 PetscFunctionBegin; 1290 *bs = 1; 1291 PetscFunctionReturn(0); 1292 } 1293 #undef __FUNCT__ 1294 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1295 int MatSetUnfactored_MPIAIJ(Mat A) 1296 { 1297 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1298 int ierr; 1299 1300 PetscFunctionBegin; 1301 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1302 PetscFunctionReturn(0); 1303 } 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatEqual_MPIAIJ" 1307 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1308 { 1309 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1310 Mat a,b,c,d; 1311 PetscTruth flg; 1312 int ierr; 1313 1314 PetscFunctionBegin; 1315 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1316 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1317 a = matA->A; b = matA->B; 1318 c = matB->A; d = matB->B; 1319 1320 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1321 if (flg == PETSC_TRUE) { 1322 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1323 } 1324 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1325 PetscFunctionReturn(0); 1326 } 1327 1328 #undef __FUNCT__ 1329 #define __FUNCT__ "MatCopy_MPIAIJ" 1330 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1331 { 1332 int ierr; 1333 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1334 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1335 PetscTruth flg; 1336 1337 PetscFunctionBegin; 1338 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1339 if (str != SAME_NONZERO_PATTERN || !flg) { 1340 /* because of the column compression in the off-processor part of the matrix a->B, 1341 the number of columns in a->B and b->B may be different, hence we cannot call 1342 the MatCopy() directly on the two parts. If need be, we can provide a more 1343 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1344 then copying the submatrices */ 1345 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1346 } else { 1347 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1348 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1349 } 1350 PetscFunctionReturn(0); 1351 } 1352 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1355 int MatSetUpPreallocation_MPIAIJ(Mat A) 1356 { 1357 int ierr; 1358 1359 PetscFunctionBegin; 1360 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1361 PetscFunctionReturn(0); 1362 } 1363 1364 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1365 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1366 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1367 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1368 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1369 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1370 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*); 1371 #endif 1372 1373 /* -------------------------------------------------------------------*/ 1374 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1375 MatGetRow_MPIAIJ, 1376 MatRestoreRow_MPIAIJ, 1377 MatMult_MPIAIJ, 1378 MatMultAdd_MPIAIJ, 1379 MatMultTranspose_MPIAIJ, 1380 MatMultTransposeAdd_MPIAIJ, 1381 0, 1382 0, 1383 0, 1384 0, 1385 0, 1386 0, 1387 MatRelax_MPIAIJ, 1388 MatTranspose_MPIAIJ, 1389 MatGetInfo_MPIAIJ, 1390 MatEqual_MPIAIJ, 1391 MatGetDiagonal_MPIAIJ, 1392 MatDiagonalScale_MPIAIJ, 1393 MatNorm_MPIAIJ, 1394 MatAssemblyBegin_MPIAIJ, 1395 MatAssemblyEnd_MPIAIJ, 1396 0, 1397 MatSetOption_MPIAIJ, 1398 MatZeroEntries_MPIAIJ, 1399 MatZeroRows_MPIAIJ, 1400 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1401 MatLUFactorSymbolic_MPIAIJ_TFS, 1402 #else 1403 0, 1404 #endif 1405 0, 1406 0, 1407 0, 1408 MatSetUpPreallocation_MPIAIJ, 1409 0, 1410 0, 1411 0, 1412 0, 1413 MatDuplicate_MPIAIJ, 1414 0, 1415 0, 1416 0, 1417 0, 1418 0, 1419 MatGetSubMatrices_MPIAIJ, 1420 MatIncreaseOverlap_MPIAIJ, 1421 MatGetValues_MPIAIJ, 1422 MatCopy_MPIAIJ, 1423 MatPrintHelp_MPIAIJ, 1424 MatScale_MPIAIJ, 1425 0, 1426 0, 1427 0, 1428 MatGetBlockSize_MPIAIJ, 1429 0, 1430 0, 1431 0, 1432 0, 1433 MatFDColoringCreate_MPIAIJ, 1434 0, 1435 MatSetUnfactored_MPIAIJ, 1436 0, 1437 0, 1438 MatGetSubMatrix_MPIAIJ, 1439 MatDestroy_MPIAIJ, 1440 MatView_MPIAIJ, 1441 MatGetPetscMaps_Petsc, 1442 0, 1443 0, 1444 0, 1445 0, 1446 0, 1447 0, 1448 0, 1449 0, 1450 MatSetColoring_MPIAIJ, 1451 MatSetValuesAdic_MPIAIJ, 1452 MatSetValuesAdifor_MPIAIJ 1453 }; 1454 1455 /* ----------------------------------------------------------------------------------------*/ 1456 1457 EXTERN_C_BEGIN 1458 #undef __FUNCT__ 1459 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1460 int MatStoreValues_MPIAIJ(Mat mat) 1461 { 1462 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1463 int ierr; 1464 1465 PetscFunctionBegin; 1466 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1467 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1468 PetscFunctionReturn(0); 1469 } 1470 EXTERN_C_END 1471 1472 EXTERN_C_BEGIN 1473 #undef __FUNCT__ 1474 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1475 int MatRetrieveValues_MPIAIJ(Mat mat) 1476 { 1477 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1478 int ierr; 1479 1480 PetscFunctionBegin; 1481 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1482 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1483 PetscFunctionReturn(0); 1484 } 1485 EXTERN_C_END 1486 1487 #include "petscpc.h" 1488 EXTERN_C_BEGIN 1489 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1490 EXTERN_C_END 1491 1492 EXTERN_C_BEGIN 1493 #undef __FUNCT__ 1494 #define __FUNCT__ "MatCreate_MPIAIJ" 1495 int MatCreate_MPIAIJ(Mat B) 1496 { 1497 Mat_MPIAIJ *b; 1498 int ierr,i,size; 1499 #if defined(PETSC_HAVE_SUPERLUDIST) 1500 PetscTruth flg; 1501 #endif 1502 1503 PetscFunctionBegin; 1504 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1505 1506 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1507 B->data = (void*)b; 1508 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1509 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1510 B->factor = 0; 1511 B->assembled = PETSC_FALSE; 1512 B->mapping = 0; 1513 1514 B->insertmode = NOT_SET_VALUES; 1515 b->size = size; 1516 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1517 1518 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1519 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1520 1521 /* the information in the maps duplicates the information computed below, eventually 1522 we should remove the duplicate information that is not contained in the maps */ 1523 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1524 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1525 1526 /* build local table of row and column ownerships */ 1527 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1528 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1529 b->cowners = b->rowners + b->size + 2; 1530 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1531 b->rowners[0] = 0; 1532 for (i=2; i<=b->size; i++) { 1533 b->rowners[i] += b->rowners[i-1]; 1534 } 1535 b->rstart = b->rowners[b->rank]; 1536 b->rend = b->rowners[b->rank+1]; 1537 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1538 b->cowners[0] = 0; 1539 for (i=2; i<=b->size; i++) { 1540 b->cowners[i] += b->cowners[i-1]; 1541 } 1542 b->cstart = b->cowners[b->rank]; 1543 b->cend = b->cowners[b->rank+1]; 1544 1545 /* build cache for off array entries formed */ 1546 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1547 b->donotstash = PETSC_FALSE; 1548 b->colmap = 0; 1549 b->garray = 0; 1550 b->roworiented = PETSC_TRUE; 1551 1552 /* stuff used for matrix vector multiply */ 1553 b->lvec = PETSC_NULL; 1554 b->Mvctx = PETSC_NULL; 1555 1556 /* stuff for MatGetRow() */ 1557 b->rowindices = 0; 1558 b->rowvalues = 0; 1559 b->getrowactive = PETSC_FALSE; 1560 1561 /* #if defined(PETSC_HAVE_SUPERLUDIST) 1562 1563 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flg);CHKERRQ(ierr); 1564 if (flg) { ierr = MatUseSuperLU_DIST_MPIAIJ(B);CHKERRQ(ierr); } 1565 #endif */ 1566 1567 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1568 "MatStoreValues_MPIAIJ", 1569 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1570 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1571 "MatRetrieveValues_MPIAIJ", 1572 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1573 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1574 "MatGetDiagonalBlock_MPIAIJ", 1575 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1576 1577 PetscFunctionReturn(0); 1578 } 1579 EXTERN_C_END 1580 1581 #undef __FUNCT__ 1582 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1583 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1584 { 1585 Mat mat; 1586 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1587 int ierr; 1588 1589 PetscFunctionBegin; 1590 *newmat = 0; 1591 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1592 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1593 a = (Mat_MPIAIJ*)mat->data; 1594 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1595 mat->factor = matin->factor; 1596 mat->assembled = PETSC_TRUE; 1597 mat->insertmode = NOT_SET_VALUES; 1598 mat->preallocated = PETSC_TRUE; 1599 1600 a->rstart = oldmat->rstart; 1601 a->rend = oldmat->rend; 1602 a->cstart = oldmat->cstart; 1603 a->cend = oldmat->cend; 1604 a->size = oldmat->size; 1605 a->rank = oldmat->rank; 1606 a->donotstash = oldmat->donotstash; 1607 a->roworiented = oldmat->roworiented; 1608 a->rowindices = 0; 1609 a->rowvalues = 0; 1610 a->getrowactive = PETSC_FALSE; 1611 1612 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1613 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1614 if (oldmat->colmap) { 1615 #if defined (PETSC_USE_CTABLE) 1616 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1617 #else 1618 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1619 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1620 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1621 #endif 1622 } else a->colmap = 0; 1623 if (oldmat->garray) { 1624 int len; 1625 len = oldmat->B->n; 1626 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1627 PetscLogObjectMemory(mat,len*sizeof(int)); 1628 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1629 } else a->garray = 0; 1630 1631 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1632 PetscLogObjectParent(mat,a->lvec); 1633 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1634 PetscLogObjectParent(mat,a->Mvctx); 1635 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1636 PetscLogObjectParent(mat,a->A); 1637 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1638 PetscLogObjectParent(mat,a->B); 1639 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1640 *newmat = mat; 1641 PetscFunctionReturn(0); 1642 } 1643 1644 #include "petscsys.h" 1645 1646 EXTERN_C_BEGIN 1647 #undef __FUNCT__ 1648 #define __FUNCT__ "MatLoad_MPIAIJ" 1649 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1650 { 1651 Mat A; 1652 PetscScalar *vals,*svals; 1653 MPI_Comm comm = ((PetscObject)viewer)->comm; 1654 MPI_Status status; 1655 int i,nz,ierr,j,rstart,rend,fd; 1656 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1657 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1658 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1659 1660 PetscFunctionBegin; 1661 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1662 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1663 if (!rank) { 1664 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1665 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1666 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1667 if (header[3] < 0) { 1668 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1669 } 1670 } 1671 1672 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1673 M = header[1]; N = header[2]; 1674 /* determine ownership of all rows */ 1675 m = M/size + ((M % size) > rank); 1676 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1677 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1678 rowners[0] = 0; 1679 for (i=2; i<=size; i++) { 1680 rowners[i] += rowners[i-1]; 1681 } 1682 rstart = rowners[rank]; 1683 rend = rowners[rank+1]; 1684 1685 /* distribute row lengths to all processors */ 1686 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1687 offlens = ourlens + (rend-rstart); 1688 if (!rank) { 1689 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1690 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1691 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1692 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1693 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1694 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1695 } else { 1696 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1697 } 1698 1699 if (!rank) { 1700 /* calculate the number of nonzeros on each processor */ 1701 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1702 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1703 for (i=0; i<size; i++) { 1704 for (j=rowners[i]; j< rowners[i+1]; j++) { 1705 procsnz[i] += rowlengths[j]; 1706 } 1707 } 1708 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1709 1710 /* determine max buffer needed and allocate it */ 1711 maxnz = 0; 1712 for (i=0; i<size; i++) { 1713 maxnz = PetscMax(maxnz,procsnz[i]); 1714 } 1715 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1716 1717 /* read in my part of the matrix column indices */ 1718 nz = procsnz[0]; 1719 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1720 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1721 1722 /* read in every one elses and ship off */ 1723 for (i=1; i<size; i++) { 1724 nz = procsnz[i]; 1725 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1726 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1727 } 1728 ierr = PetscFree(cols);CHKERRQ(ierr); 1729 } else { 1730 /* determine buffer space needed for message */ 1731 nz = 0; 1732 for (i=0; i<m; i++) { 1733 nz += ourlens[i]; 1734 } 1735 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1736 1737 /* receive message of column indices*/ 1738 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1739 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1740 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1741 } 1742 1743 /* determine column ownership if matrix is not square */ 1744 if (N != M) { 1745 n = N/size + ((N % size) > rank); 1746 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1747 cstart = cend - n; 1748 } else { 1749 cstart = rstart; 1750 cend = rend; 1751 n = cend - cstart; 1752 } 1753 1754 /* loop over local rows, determining number of off diagonal entries */ 1755 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1756 jj = 0; 1757 for (i=0; i<m; i++) { 1758 for (j=0; j<ourlens[i]; j++) { 1759 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1760 jj++; 1761 } 1762 } 1763 1764 /* create our matrix */ 1765 for (i=0; i<m; i++) { 1766 ourlens[i] -= offlens[i]; 1767 } 1768 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1769 A = *newmat; 1770 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1771 for (i=0; i<m; i++) { 1772 ourlens[i] += offlens[i]; 1773 } 1774 1775 if (!rank) { 1776 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1777 1778 /* read in my part of the matrix numerical values */ 1779 nz = procsnz[0]; 1780 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1781 1782 /* insert into matrix */ 1783 jj = rstart; 1784 smycols = mycols; 1785 svals = vals; 1786 for (i=0; i<m; i++) { 1787 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1788 smycols += ourlens[i]; 1789 svals += ourlens[i]; 1790 jj++; 1791 } 1792 1793 /* read in other processors and ship out */ 1794 for (i=1; i<size; i++) { 1795 nz = procsnz[i]; 1796 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1797 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1798 } 1799 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1800 } else { 1801 /* receive numeric values */ 1802 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1803 1804 /* receive message of values*/ 1805 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1806 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1807 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1808 1809 /* insert into matrix */ 1810 jj = rstart; 1811 smycols = mycols; 1812 svals = vals; 1813 for (i=0; i<m; i++) { 1814 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1815 smycols += ourlens[i]; 1816 svals += ourlens[i]; 1817 jj++; 1818 } 1819 } 1820 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1821 ierr = PetscFree(vals);CHKERRQ(ierr); 1822 ierr = PetscFree(mycols);CHKERRQ(ierr); 1823 ierr = PetscFree(rowners);CHKERRQ(ierr); 1824 1825 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1826 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1827 PetscFunctionReturn(0); 1828 } 1829 EXTERN_C_END 1830 1831 #undef __FUNCT__ 1832 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 1833 /* 1834 Not great since it makes two copies of the submatrix, first an SeqAIJ 1835 in local and then by concatenating the local matrices the end result. 1836 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1837 */ 1838 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 1839 { 1840 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1841 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend; 1842 Mat *local,M,Mreuse; 1843 PetscScalar *vwork,*aa; 1844 MPI_Comm comm = mat->comm; 1845 Mat_SeqAIJ *aij; 1846 1847 1848 PetscFunctionBegin; 1849 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1850 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1851 1852 if (call == MAT_REUSE_MATRIX) { 1853 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1854 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 1855 local = &Mreuse; 1856 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1857 } else { 1858 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1859 Mreuse = *local; 1860 ierr = PetscFree(local);CHKERRQ(ierr); 1861 } 1862 1863 /* 1864 m - number of local rows 1865 n - number of columns (same on all processors) 1866 rstart - first row in new global matrix generated 1867 */ 1868 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1869 if (call == MAT_INITIAL_MATRIX) { 1870 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1871 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1872 ii = aij->i; 1873 jj = aij->j; 1874 1875 /* 1876 Determine the number of non-zeros in the diagonal and off-diagonal 1877 portions of the matrix in order to do correct preallocation 1878 */ 1879 1880 /* first get start and end of "diagonal" columns */ 1881 if (csize == PETSC_DECIDE) { 1882 nlocal = n/size + ((n % size) > rank); 1883 } else { 1884 nlocal = csize; 1885 } 1886 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1887 rstart = rend - nlocal; 1888 if (rank == size - 1 && rend != n) { 1889 SETERRQ(1,"Local column sizes do not add up to total number of columns"); 1890 } 1891 1892 /* next, compute all the lengths */ 1893 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 1894 olens = dlens + m; 1895 for (i=0; i<m; i++) { 1896 jend = ii[i+1] - ii[i]; 1897 olen = 0; 1898 dlen = 0; 1899 for (j=0; j<jend; j++) { 1900 if (*jj < rstart || *jj >= rend) olen++; 1901 else dlen++; 1902 jj++; 1903 } 1904 olens[i] = olen; 1905 dlens[i] = dlen; 1906 } 1907 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 1908 ierr = PetscFree(dlens);CHKERRQ(ierr); 1909 } else { 1910 int ml,nl; 1911 1912 M = *newmat; 1913 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1914 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1915 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1916 /* 1917 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1918 rather than the slower MatSetValues(). 1919 */ 1920 M->was_assembled = PETSC_TRUE; 1921 M->assembled = PETSC_FALSE; 1922 } 1923 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1924 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1925 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1926 ii = aij->i; 1927 jj = aij->j; 1928 aa = aij->a; 1929 for (i=0; i<m; i++) { 1930 row = rstart + i; 1931 nz = ii[i+1] - ii[i]; 1932 cwork = jj; jj += nz; 1933 vwork = aa; aa += nz; 1934 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1935 } 1936 1937 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1938 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1939 *newmat = M; 1940 1941 /* save submatrix used in processor for next request */ 1942 if (call == MAT_INITIAL_MATRIX) { 1943 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 1944 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 1945 } 1946 1947 PetscFunctionReturn(0); 1948 } 1949 1950 #undef __FUNCT__ 1951 #define __FUNCT__ "MatMPIAIJSetPreallocation" 1952 /*@C 1953 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 1954 (the default parallel PETSc format). For good matrix assembly performance 1955 the user should preallocate the matrix storage by setting the parameters 1956 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1957 performance can be increased by more than a factor of 50. 1958 1959 Collective on MPI_Comm 1960 1961 Input Parameters: 1962 + A - the matrix 1963 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1964 (same value is used for all local rows) 1965 . d_nnz - array containing the number of nonzeros in the various rows of the 1966 DIAGONAL portion of the local submatrix (possibly different for each row) 1967 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1968 The size of this array is equal to the number of local rows, i.e 'm'. 1969 You must leave room for the diagonal entry even if it is zero. 1970 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 1971 submatrix (same value is used for all local rows). 1972 - o_nnz - array containing the number of nonzeros in the various rows of the 1973 OFF-DIAGONAL portion of the local submatrix (possibly different for 1974 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 1975 structure. The size of this array is equal to the number 1976 of local rows, i.e 'm'. 1977 1978 The AIJ format (also called the Yale sparse matrix format or 1979 compressed row storage), is fully compatible with standard Fortran 77 1980 storage. That is, the stored row and column indices can begin at 1981 either one (as in Fortran) or zero. See the users manual for details. 1982 1983 The user MUST specify either the local or global matrix dimensions 1984 (possibly both). 1985 1986 The parallel matrix is partitioned such that the first m0 rows belong to 1987 process 0, the next m1 rows belong to process 1, the next m2 rows belong 1988 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 1989 1990 The DIAGONAL portion of the local submatrix of a processor can be defined 1991 as the submatrix which is obtained by extraction the part corresponding 1992 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 1993 first row that belongs to the processor, and r2 is the last row belonging 1994 to the this processor. This is a square mxm matrix. The remaining portion 1995 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 1996 1997 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 1998 1999 By default, this format uses inodes (identical nodes) when possible. 2000 We search for consecutive rows with the same nonzero structure, thereby 2001 reusing matrix information to achieve increased efficiency. 2002 2003 Options Database Keys: 2004 + -mat_aij_no_inode - Do not use inodes 2005 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2006 - -mat_aij_oneindex - Internally use indexing starting at 1 2007 rather than 0. Note that when calling MatSetValues(), 2008 the user still MUST index entries starting at 0! 2009 2010 Example usage: 2011 2012 Consider the following 8x8 matrix with 34 non-zero values, that is 2013 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2014 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2015 as follows: 2016 2017 .vb 2018 1 2 0 | 0 3 0 | 0 4 2019 Proc0 0 5 6 | 7 0 0 | 8 0 2020 9 0 10 | 11 0 0 | 12 0 2021 ------------------------------------- 2022 13 0 14 | 15 16 17 | 0 0 2023 Proc1 0 18 0 | 19 20 21 | 0 0 2024 0 0 0 | 22 23 0 | 24 0 2025 ------------------------------------- 2026 Proc2 25 26 27 | 0 0 28 | 29 0 2027 30 0 0 | 31 32 33 | 0 34 2028 .ve 2029 2030 This can be represented as a collection of submatrices as: 2031 2032 .vb 2033 A B C 2034 D E F 2035 G H I 2036 .ve 2037 2038 Where the submatrices A,B,C are owned by proc0, D,E,F are 2039 owned by proc1, G,H,I are owned by proc2. 2040 2041 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2042 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2043 The 'M','N' parameters are 8,8, and have the same values on all procs. 2044 2045 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2046 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2047 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2048 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2049 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2050 matrix, ans [DF] as another SeqAIJ matrix. 2051 2052 When d_nz, o_nz parameters are specified, d_nz storage elements are 2053 allocated for every row of the local diagonal submatrix, and o_nz 2054 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2055 One way to choose d_nz and o_nz is to use the max nonzerors per local 2056 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2057 In this case, the values of d_nz,o_nz are: 2058 .vb 2059 proc0 : dnz = 2, o_nz = 2 2060 proc1 : dnz = 3, o_nz = 2 2061 proc2 : dnz = 1, o_nz = 4 2062 .ve 2063 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2064 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2065 for proc3. i.e we are using 12+15+10=37 storage locations to store 2066 34 values. 2067 2068 When d_nnz, o_nnz parameters are specified, the storage is specified 2069 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2070 In the above case the values for d_nnz,o_nnz are: 2071 .vb 2072 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2073 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2074 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2075 .ve 2076 Here the space allocated is sum of all the above values i.e 34, and 2077 hence pre-allocation is perfect. 2078 2079 Level: intermediate 2080 2081 .keywords: matrix, aij, compressed row, sparse, parallel 2082 2083 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2084 @*/ 2085 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2086 { 2087 Mat_MPIAIJ *b; 2088 int ierr,i; 2089 PetscTruth flg2; 2090 2091 PetscFunctionBegin; 2092 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr); 2093 if (!flg2) PetscFunctionReturn(0); 2094 B->preallocated = PETSC_TRUE; 2095 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2096 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2097 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2098 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2099 if (d_nnz) { 2100 for (i=0; i<B->m; i++) { 2101 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 2102 } 2103 } 2104 if (o_nnz) { 2105 for (i=0; i<B->m; i++) { 2106 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 2107 } 2108 } 2109 b = (Mat_MPIAIJ*)B->data; 2110 2111 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2112 PetscLogObjectParent(B,b->A); 2113 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2114 PetscLogObjectParent(B,b->B); 2115 2116 PetscFunctionReturn(0); 2117 } 2118 2119 #undef __FUNCT__ 2120 #define __FUNCT__ "MatCreateMPIAIJ" 2121 /*@C 2122 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2123 (the default parallel PETSc format). For good matrix assembly performance 2124 the user should preallocate the matrix storage by setting the parameters 2125 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2126 performance can be increased by more than a factor of 50. 2127 2128 Collective on MPI_Comm 2129 2130 Input Parameters: 2131 + comm - MPI communicator 2132 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2133 This value should be the same as the local size used in creating the 2134 y vector for the matrix-vector product y = Ax. 2135 . n - This value should be the same as the local size used in creating the 2136 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2137 calculated if N is given) For square matrices n is almost always m. 2138 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2139 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2140 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2141 (same value is used for all local rows) 2142 . d_nnz - array containing the number of nonzeros in the various rows of the 2143 DIAGONAL portion of the local submatrix (possibly different for each row) 2144 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2145 The size of this array is equal to the number of local rows, i.e 'm'. 2146 You must leave room for the diagonal entry even if it is zero. 2147 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2148 submatrix (same value is used for all local rows). 2149 - o_nnz - array containing the number of nonzeros in the various rows of the 2150 OFF-DIAGONAL portion of the local submatrix (possibly different for 2151 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2152 structure. The size of this array is equal to the number 2153 of local rows, i.e 'm'. 2154 2155 Output Parameter: 2156 . A - the matrix 2157 2158 Notes: 2159 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2160 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2161 storage requirements for this matrix. 2162 2163 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2164 processor than it must be used on all processors that share the object for 2165 that argument. 2166 2167 The AIJ format (also called the Yale sparse matrix format or 2168 compressed row storage), is fully compatible with standard Fortran 77 2169 storage. That is, the stored row and column indices can begin at 2170 either one (as in Fortran) or zero. See the users manual for details. 2171 2172 The user MUST specify either the local or global matrix dimensions 2173 (possibly both). 2174 2175 The parallel matrix is partitioned such that the first m0 rows belong to 2176 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2177 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2178 2179 The DIAGONAL portion of the local submatrix of a processor can be defined 2180 as the submatrix which is obtained by extraction the part corresponding 2181 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2182 first row that belongs to the processor, and r2 is the last row belonging 2183 to the this processor. This is a square mxm matrix. The remaining portion 2184 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2185 2186 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2187 2188 By default, this format uses inodes (identical nodes) when possible. 2189 We search for consecutive rows with the same nonzero structure, thereby 2190 reusing matrix information to achieve increased efficiency. 2191 2192 Options Database Keys: 2193 + -mat_aij_no_inode - Do not use inodes 2194 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2195 - -mat_aij_oneindex - Internally use indexing starting at 1 2196 rather than 0. Note that when calling MatSetValues(), 2197 the user still MUST index entries starting at 0! 2198 2199 2200 Example usage: 2201 2202 Consider the following 8x8 matrix with 34 non-zero values, that is 2203 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2204 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2205 as follows: 2206 2207 .vb 2208 1 2 0 | 0 3 0 | 0 4 2209 Proc0 0 5 6 | 7 0 0 | 8 0 2210 9 0 10 | 11 0 0 | 12 0 2211 ------------------------------------- 2212 13 0 14 | 15 16 17 | 0 0 2213 Proc1 0 18 0 | 19 20 21 | 0 0 2214 0 0 0 | 22 23 0 | 24 0 2215 ------------------------------------- 2216 Proc2 25 26 27 | 0 0 28 | 29 0 2217 30 0 0 | 31 32 33 | 0 34 2218 .ve 2219 2220 This can be represented as a collection of submatrices as: 2221 2222 .vb 2223 A B C 2224 D E F 2225 G H I 2226 .ve 2227 2228 Where the submatrices A,B,C are owned by proc0, D,E,F are 2229 owned by proc1, G,H,I are owned by proc2. 2230 2231 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2232 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2233 The 'M','N' parameters are 8,8, and have the same values on all procs. 2234 2235 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2236 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2237 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2238 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2239 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2240 matrix, ans [DF] as another SeqAIJ matrix. 2241 2242 When d_nz, o_nz parameters are specified, d_nz storage elements are 2243 allocated for every row of the local diagonal submatrix, and o_nz 2244 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2245 One way to choose d_nz and o_nz is to use the max nonzerors per local 2246 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2247 In this case, the values of d_nz,o_nz are: 2248 .vb 2249 proc0 : dnz = 2, o_nz = 2 2250 proc1 : dnz = 3, o_nz = 2 2251 proc2 : dnz = 1, o_nz = 4 2252 .ve 2253 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2254 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2255 for proc3. i.e we are using 12+15+10=37 storage locations to store 2256 34 values. 2257 2258 When d_nnz, o_nnz parameters are specified, the storage is specified 2259 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2260 In the above case the values for d_nnz,o_nnz are: 2261 .vb 2262 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2263 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2264 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2265 .ve 2266 Here the space allocated is sum of all the above values i.e 34, and 2267 hence pre-allocation is perfect. 2268 2269 Level: intermediate 2270 2271 .keywords: matrix, aij, compressed row, sparse, parallel 2272 2273 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2274 @*/ 2275 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2276 { 2277 int ierr,size; 2278 2279 PetscFunctionBegin; 2280 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2281 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2282 if (size > 1) { 2283 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2284 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2285 } else { 2286 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2287 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2288 } 2289 PetscFunctionReturn(0); 2290 } 2291 2292 #undef __FUNCT__ 2293 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2294 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2295 { 2296 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2297 PetscFunctionBegin; 2298 *Ad = a->A; 2299 *Ao = a->B; 2300 *colmap = a->garray; 2301 PetscFunctionReturn(0); 2302 } 2303 2304 #undef __FUNCT__ 2305 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2306 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2307 { 2308 int ierr; 2309 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2310 2311 PetscFunctionBegin; 2312 if (coloring->ctype == IS_COLORING_LOCAL) { 2313 int *allcolors,*colors,i; 2314 ISColoring ocoloring; 2315 2316 /* set coloring for diagonal portion */ 2317 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2318 2319 /* set coloring for off-diagonal portion */ 2320 ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2321 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2322 for (i=0; i<a->B->n; i++) { 2323 colors[i] = allcolors[a->garray[i]]; 2324 } 2325 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2326 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2327 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2328 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2329 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2330 int *colors,i,*larray; 2331 ISColoring ocoloring; 2332 2333 /* set coloring for diagonal portion */ 2334 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2335 for (i=0; i<a->A->n; i++) { 2336 larray[i] = i + a->cstart; 2337 } 2338 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2339 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2340 for (i=0; i<a->A->n; i++) { 2341 colors[i] = coloring->colors[larray[i]]; 2342 } 2343 ierr = PetscFree(larray);CHKERRQ(ierr); 2344 ierr = ISColoringCreate(MPI_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2345 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2346 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2347 2348 /* set coloring for off-diagonal portion */ 2349 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2350 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2351 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2352 for (i=0; i<a->B->n; i++) { 2353 colors[i] = coloring->colors[larray[i]]; 2354 } 2355 ierr = PetscFree(larray);CHKERRQ(ierr); 2356 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2357 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2358 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2359 } else { 2360 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2361 } 2362 2363 PetscFunctionReturn(0); 2364 } 2365 2366 #undef __FUNCT__ 2367 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2368 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2369 { 2370 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2371 int ierr; 2372 2373 PetscFunctionBegin; 2374 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2375 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2376 PetscFunctionReturn(0); 2377 } 2378 2379 #undef __FUNCT__ 2380 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2381 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2382 { 2383 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2384 int ierr; 2385 2386 PetscFunctionBegin; 2387 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2388 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2389 PetscFunctionReturn(0); 2390 } 2391