1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 EXTERN int MatUseSuperLU_DIST_MPIAIJ(Mat); 14 15 /* 16 Local utility routine that creates a mapping from the global column 17 number to the local number in the off-diagonal part of the local 18 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 19 a slightly higher hash table cost; without it it is not scalable (each processor 20 has an order N integer array but is fast to acess. 21 */ 22 #undef __FUNCT__ 23 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 24 int CreateColmap_MPIAIJ_Private(Mat mat) 25 { 26 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 27 int n = aij->B->n,i,ierr; 28 29 PetscFunctionBegin; 30 #if defined (PETSC_USE_CTABLE) 31 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 32 for (i=0; i<n; i++){ 33 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 34 } 35 #else 36 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 37 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 38 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 39 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 40 #endif 41 PetscFunctionReturn(0); 42 } 43 44 #define CHUNKSIZE 15 45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 46 { \ 47 \ 48 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 49 rmax = aimax[row]; nrow = ailen[row]; \ 50 col1 = col - shift; \ 51 \ 52 low = 0; high = nrow; \ 53 while (high-low > 5) { \ 54 t = (low+high)/2; \ 55 if (rp[t] > col) high = t; \ 56 else low = t; \ 57 } \ 58 for (_i=low; _i<high; _i++) { \ 59 if (rp[_i] > col1) break; \ 60 if (rp[_i] == col1) { \ 61 if (addv == ADD_VALUES) ap[_i] += value; \ 62 else ap[_i] = value; \ 63 goto a_noinsert; \ 64 } \ 65 } \ 66 if (nonew == 1) goto a_noinsert; \ 67 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 68 if (nrow >= rmax) { \ 69 /* there is no extra room in row, therefore enlarge */ \ 70 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 71 PetscScalar *new_a; \ 72 \ 73 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 74 \ 75 /* malloc new storage space */ \ 76 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 77 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 78 new_j = (int*)(new_a + new_nz); \ 79 new_i = new_j + new_nz; \ 80 \ 81 /* copy over old data into new slots */ \ 82 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 83 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 84 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 85 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 86 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 87 len*sizeof(int));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 89 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 90 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 91 /* free up old matrix storage */ \ 92 \ 93 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 94 if (!a->singlemalloc) { \ 95 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 96 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 97 } \ 98 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 99 a->singlemalloc = PETSC_TRUE; \ 100 \ 101 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 102 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 103 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 104 a->maxnz += CHUNKSIZE; \ 105 a->reallocs++; \ 106 } \ 107 N = nrow++ - 1; a->nz++; \ 108 /* shift up all the later entries in this row */ \ 109 for (ii=N; ii>=_i; ii--) { \ 110 rp[ii+1] = rp[ii]; \ 111 ap[ii+1] = ap[ii]; \ 112 } \ 113 rp[_i] = col1; \ 114 ap[_i] = value; \ 115 a_noinsert: ; \ 116 ailen[row] = nrow; \ 117 } 118 119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 120 { \ 121 \ 122 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 123 rmax = bimax[row]; nrow = bilen[row]; \ 124 col1 = col - shift; \ 125 \ 126 low = 0; high = nrow; \ 127 while (high-low > 5) { \ 128 t = (low+high)/2; \ 129 if (rp[t] > col) high = t; \ 130 else low = t; \ 131 } \ 132 for (_i=low; _i<high; _i++) { \ 133 if (rp[_i] > col1) break; \ 134 if (rp[_i] == col1) { \ 135 if (addv == ADD_VALUES) ap[_i] += value; \ 136 else ap[_i] = value; \ 137 goto b_noinsert; \ 138 } \ 139 } \ 140 if (nonew == 1) goto b_noinsert; \ 141 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 142 if (nrow >= rmax) { \ 143 /* there is no extra room in row, therefore enlarge */ \ 144 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 145 PetscScalar *new_a; \ 146 \ 147 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 148 \ 149 /* malloc new storage space */ \ 150 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 151 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 152 new_j = (int*)(new_a + new_nz); \ 153 new_i = new_j + new_nz; \ 154 \ 155 /* copy over old data into new slots */ \ 156 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 157 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 158 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 159 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 160 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 161 len*sizeof(int));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 163 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 164 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 165 /* free up old matrix storage */ \ 166 \ 167 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 168 if (!b->singlemalloc) { \ 169 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 170 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 171 } \ 172 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 173 b->singlemalloc = PETSC_TRUE; \ 174 \ 175 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 176 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 177 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 178 b->maxnz += CHUNKSIZE; \ 179 b->reallocs++; \ 180 } \ 181 N = nrow++ - 1; b->nz++; \ 182 /* shift up all the later entries in this row */ \ 183 for (ii=N; ii>=_i; ii--) { \ 184 rp[ii+1] = rp[ii]; \ 185 ap[ii+1] = ap[ii]; \ 186 } \ 187 rp[_i] = col1; \ 188 ap[_i] = value; \ 189 b_noinsert: ; \ 190 bilen[row] = nrow; \ 191 } 192 193 #undef __FUNCT__ 194 #define __FUNCT__ "MatSetValues_MPIAIJ" 195 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 196 { 197 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 198 PetscScalar value; 199 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 200 int cstart = aij->cstart,cend = aij->cend,row,col; 201 PetscTruth roworiented = aij->roworiented; 202 203 /* Some Variables required in the macro */ 204 Mat A = aij->A; 205 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 206 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 207 PetscScalar *aa = a->a; 208 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 209 Mat B = aij->B; 210 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 211 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 212 PetscScalar *ba = b->a; 213 214 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 215 int nonew = a->nonew,shift = a->indexshift; 216 PetscScalar *ap; 217 218 PetscFunctionBegin; 219 for (i=0; i<m; i++) { 220 if (im[i] < 0) continue; 221 #if defined(PETSC_USE_BOPT_g) 222 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 223 #endif 224 if (im[i] >= rstart && im[i] < rend) { 225 row = im[i] - rstart; 226 for (j=0; j<n; j++) { 227 if (in[j] >= cstart && in[j] < cend){ 228 col = in[j] - cstart; 229 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 230 if (ignorezeroentries && value == 0.0) continue; 231 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 232 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 233 } else if (in[j] < 0) continue; 234 #if defined(PETSC_USE_BOPT_g) 235 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 236 #endif 237 else { 238 if (mat->was_assembled) { 239 if (!aij->colmap) { 240 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 241 } 242 #if defined (PETSC_USE_CTABLE) 243 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 244 col--; 245 #else 246 col = aij->colmap[in[j]] - 1; 247 #endif 248 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 249 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 250 col = in[j]; 251 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 252 B = aij->B; 253 b = (Mat_SeqAIJ*)B->data; 254 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 255 ba = b->a; 256 } 257 } else col = in[j]; 258 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 259 if (ignorezeroentries && value == 0.0) continue; 260 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 261 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 262 } 263 } 264 } else { 265 if (!aij->donotstash) { 266 if (roworiented) { 267 if (ignorezeroentries && v[i*n] == 0.0) continue; 268 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 269 } else { 270 if (ignorezeroentries && v[i] == 0.0) continue; 271 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 272 } 273 } 274 } 275 } 276 PetscFunctionReturn(0); 277 } 278 279 #undef __FUNCT__ 280 #define __FUNCT__ "MatGetValues_MPIAIJ" 281 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 282 { 283 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 284 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 285 int cstart = aij->cstart,cend = aij->cend,row,col; 286 287 PetscFunctionBegin; 288 for (i=0; i<m; i++) { 289 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 290 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 291 if (idxm[i] >= rstart && idxm[i] < rend) { 292 row = idxm[i] - rstart; 293 for (j=0; j<n; j++) { 294 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 295 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 296 if (idxn[j] >= cstart && idxn[j] < cend){ 297 col = idxn[j] - cstart; 298 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 299 } else { 300 if (!aij->colmap) { 301 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 302 } 303 #if defined (PETSC_USE_CTABLE) 304 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 305 col --; 306 #else 307 col = aij->colmap[idxn[j]] - 1; 308 #endif 309 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 310 else { 311 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 312 } 313 } 314 } 315 } else { 316 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 317 } 318 } 319 PetscFunctionReturn(0); 320 } 321 322 #undef __FUNCT__ 323 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 324 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 325 { 326 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 327 int ierr,nstash,reallocs; 328 InsertMode addv; 329 330 PetscFunctionBegin; 331 if (aij->donotstash) { 332 PetscFunctionReturn(0); 333 } 334 335 /* make sure all processors are either in INSERTMODE or ADDMODE */ 336 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 337 if (addv == (ADD_VALUES|INSERT_VALUES)) { 338 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 339 } 340 mat->insertmode = addv; /* in case this processor had no cache */ 341 342 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 343 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 344 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 345 PetscFunctionReturn(0); 346 } 347 348 349 #undef __FUNCT__ 350 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 351 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 352 { 353 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 354 int i,j,rstart,ncols,n,ierr,flg; 355 int *row,*col,other_disassembled; 356 PetscScalar *val; 357 InsertMode addv = mat->insertmode; 358 PetscTruth flag; 359 360 PetscFunctionBegin; 361 if (!aij->donotstash) { 362 while (1) { 363 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 364 if (!flg) break; 365 366 for (i=0; i<n;) { 367 /* Now identify the consecutive vals belonging to the same row */ 368 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 369 if (j < n) ncols = j-i; 370 else ncols = n-i; 371 /* Now assemble all these values with a single function call */ 372 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 373 i = j; 374 } 375 } 376 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 377 } 378 379 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 380 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 381 382 /* determine if any processor has disassembled, if so we must 383 also disassemble ourselfs, in order that we may reassemble. */ 384 /* 385 if nonzero structure of submatrix B cannot change then we know that 386 no processor disassembled thus we can skip this stuff 387 */ 388 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 389 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 390 if (mat->was_assembled && !other_disassembled) { 391 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 392 } 393 } 394 395 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 396 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 397 } 398 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 399 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 400 401 if (aij->rowvalues) { 402 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 403 aij->rowvalues = 0; 404 } 405 #if defined(PETSC_HAVE_SUPERLUDIST) 406 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 407 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); } 408 #endif 409 PetscFunctionReturn(0); 410 } 411 412 #undef __FUNCT__ 413 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 414 int MatZeroEntries_MPIAIJ(Mat A) 415 { 416 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 417 int ierr; 418 419 PetscFunctionBegin; 420 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 421 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 422 PetscFunctionReturn(0); 423 } 424 425 #undef __FUNCT__ 426 #define __FUNCT__ "MatZeroRows_MPIAIJ" 427 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag) 428 { 429 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 430 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 431 int *procs,*nprocs,j,idx,nsends,*work,row; 432 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 433 int *rvalues,tag = A->tag,count,base,slen,n,*source; 434 int *lens,imdex,*lrows,*values,rstart=l->rstart; 435 MPI_Comm comm = A->comm; 436 MPI_Request *send_waits,*recv_waits; 437 MPI_Status recv_status,*send_status; 438 IS istmp; 439 PetscTruth found; 440 441 PetscFunctionBegin; 442 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 443 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 444 445 /* first count number of contributors to each processor */ 446 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 447 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 448 procs = nprocs + size; 449 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 450 for (i=0; i<N; i++) { 451 idx = rows[i]; 452 found = PETSC_FALSE; 453 for (j=0; j<size; j++) { 454 if (idx >= owners[j] && idx < owners[j+1]) { 455 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 456 } 457 } 458 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 459 } 460 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 461 462 /* inform other processors of number of messages and max length*/ 463 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 464 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 465 nrecvs = work[size+rank]; 466 nmax = work[rank]; 467 ierr = PetscFree(work);CHKERRQ(ierr); 468 469 /* post receives: */ 470 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 471 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 472 for (i=0; i<nrecvs; i++) { 473 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 474 } 475 476 /* do sends: 477 1) starts[i] gives the starting index in svalues for stuff going to 478 the ith processor 479 */ 480 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 481 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 482 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 483 starts[0] = 0; 484 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 485 for (i=0; i<N; i++) { 486 svalues[starts[owner[i]]++] = rows[i]; 487 } 488 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 489 490 starts[0] = 0; 491 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 492 count = 0; 493 for (i=0; i<size; i++) { 494 if (procs[i]) { 495 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 496 } 497 } 498 ierr = PetscFree(starts);CHKERRQ(ierr); 499 500 base = owners[rank]; 501 502 /* wait on receives */ 503 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 504 source = lens + nrecvs; 505 count = nrecvs; slen = 0; 506 while (count) { 507 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 508 /* unpack receives into our local space */ 509 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 510 source[imdex] = recv_status.MPI_SOURCE; 511 lens[imdex] = n; 512 slen += n; 513 count--; 514 } 515 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 516 517 /* move the data into the send scatter */ 518 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 519 count = 0; 520 for (i=0; i<nrecvs; i++) { 521 values = rvalues + i*nmax; 522 for (j=0; j<lens[i]; j++) { 523 lrows[count++] = values[j] - base; 524 } 525 } 526 ierr = PetscFree(rvalues);CHKERRQ(ierr); 527 ierr = PetscFree(lens);CHKERRQ(ierr); 528 ierr = PetscFree(owner);CHKERRQ(ierr); 529 ierr = PetscFree(nprocs);CHKERRQ(ierr); 530 531 /* actually zap the local rows */ 532 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 533 PetscLogObjectParent(A,istmp); 534 535 /* 536 Zero the required rows. If the "diagonal block" of the matrix 537 is square and the user wishes to set the diagonal we use seperate 538 code so that MatSetValues() is not called for each diagonal allocating 539 new memory, thus calling lots of mallocs and slowing things down. 540 541 Contributed by: Mathew Knepley 542 */ 543 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 544 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 545 if (diag && (l->A->M == l->A->N)) { 546 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 547 } else if (diag) { 548 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 549 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 550 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 551 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 552 } 553 for (i = 0; i < slen; i++) { 554 row = lrows[i] + rstart; 555 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 556 } 557 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 558 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 559 } else { 560 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 561 } 562 ierr = ISDestroy(istmp);CHKERRQ(ierr); 563 ierr = PetscFree(lrows);CHKERRQ(ierr); 564 565 /* wait on sends */ 566 if (nsends) { 567 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 568 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 569 ierr = PetscFree(send_status);CHKERRQ(ierr); 570 } 571 ierr = PetscFree(send_waits);CHKERRQ(ierr); 572 ierr = PetscFree(svalues);CHKERRQ(ierr); 573 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatMult_MPIAIJ" 579 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 580 { 581 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 582 int ierr,nt; 583 584 PetscFunctionBegin; 585 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 586 if (nt != A->n) { 587 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 588 } 589 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 590 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 591 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 592 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 593 PetscFunctionReturn(0); 594 } 595 596 #undef __FUNCT__ 597 #define __FUNCT__ "MatMultAdd_MPIAIJ" 598 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 599 { 600 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 601 int ierr; 602 603 PetscFunctionBegin; 604 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 605 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 606 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 607 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 608 PetscFunctionReturn(0); 609 } 610 611 #undef __FUNCT__ 612 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 613 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 614 { 615 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 616 int ierr; 617 618 PetscFunctionBegin; 619 /* do nondiagonal part */ 620 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 621 /* send it on its way */ 622 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 623 /* do local part */ 624 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 625 /* receive remote parts: note this assumes the values are not actually */ 626 /* inserted in yy until the next line, which is true for my implementation*/ 627 /* but is not perhaps always true. */ 628 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 629 PetscFunctionReturn(0); 630 } 631 632 #undef __FUNCT__ 633 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 634 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 635 { 636 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 637 int ierr; 638 639 PetscFunctionBegin; 640 /* do nondiagonal part */ 641 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 642 /* send it on its way */ 643 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 644 /* do local part */ 645 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 646 /* receive remote parts: note this assumes the values are not actually */ 647 /* inserted in yy until the next line, which is true for my implementation*/ 648 /* but is not perhaps always true. */ 649 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 650 PetscFunctionReturn(0); 651 } 652 653 /* 654 This only works correctly for square matrices where the subblock A->A is the 655 diagonal block 656 */ 657 #undef __FUNCT__ 658 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 659 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 660 { 661 int ierr; 662 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 663 664 PetscFunctionBegin; 665 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 666 if (a->rstart != a->cstart || a->rend != a->cend) { 667 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 668 } 669 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 670 PetscFunctionReturn(0); 671 } 672 673 #undef __FUNCT__ 674 #define __FUNCT__ "MatScale_MPIAIJ" 675 int MatScale_MPIAIJ(PetscScalar *aa,Mat A) 676 { 677 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 678 int ierr; 679 680 PetscFunctionBegin; 681 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 682 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 683 PetscFunctionReturn(0); 684 } 685 686 #undef __FUNCT__ 687 #define __FUNCT__ "MatDestroy_MPIAIJ" 688 int MatDestroy_MPIAIJ(Mat mat) 689 { 690 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 691 int ierr; 692 693 PetscFunctionBegin; 694 #if defined(PETSC_USE_LOG) 695 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 696 #endif 697 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 698 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 699 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 700 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 701 #if defined (PETSC_USE_CTABLE) 702 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 703 #else 704 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 705 #endif 706 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 707 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 708 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 709 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 710 ierr = PetscFree(aij);CHKERRQ(ierr); 711 PetscFunctionReturn(0); 712 } 713 714 #undef __FUNCT__ 715 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 716 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 717 { 718 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 719 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 720 int ierr,shift = C->indexshift,rank = aij->rank,size = aij->size; 721 PetscTruth isdraw,isascii,flg; 722 PetscViewer sviewer; 723 PetscViewerFormat format; 724 725 PetscFunctionBegin; 726 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 727 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 728 if (isascii) { 729 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 730 if (format == PETSC_VIEWER_ASCII_INFO_LONG) { 731 MatInfo info; 732 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 733 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 734 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 735 if (flg) { 736 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 737 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 738 } else { 739 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 740 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 741 } 742 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 743 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 744 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 745 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 746 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 747 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 748 PetscFunctionReturn(0); 749 } else if (format == PETSC_VIEWER_ASCII_INFO) { 750 PetscFunctionReturn(0); 751 } 752 } else if (isdraw) { 753 PetscDraw draw; 754 PetscTruth isnull; 755 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 756 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 757 } 758 759 if (size == 1) { 760 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 761 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 762 } else { 763 /* assemble the entire matrix onto first processor. */ 764 Mat A; 765 Mat_SeqAIJ *Aloc; 766 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 767 PetscScalar *a; 768 769 if (!rank) { 770 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 771 } else { 772 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 773 } 774 PetscLogObjectParent(mat,A); 775 776 /* copy over the A part */ 777 Aloc = (Mat_SeqAIJ*)aij->A->data; 778 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 779 row = aij->rstart; 780 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 781 for (i=0; i<m; i++) { 782 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 783 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 784 } 785 aj = Aloc->j; 786 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 787 788 /* copy over the B part */ 789 Aloc = (Mat_SeqAIJ*)aij->B->data; 790 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 791 row = aij->rstart; 792 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 793 ct = cols; 794 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 795 for (i=0; i<m; i++) { 796 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 797 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 798 } 799 ierr = PetscFree(ct);CHKERRQ(ierr); 800 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 801 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 802 /* 803 Everyone has to call to draw the matrix since the graphics waits are 804 synchronized across all processors that share the PetscDraw object 805 */ 806 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 807 if (!rank) { 808 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 809 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 810 } 811 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 812 ierr = MatDestroy(A);CHKERRQ(ierr); 813 } 814 PetscFunctionReturn(0); 815 } 816 817 #undef __FUNCT__ 818 #define __FUNCT__ "MatView_MPIAIJ" 819 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 820 { 821 int ierr; 822 PetscTruth isascii,isdraw,issocket,isbinary; 823 824 PetscFunctionBegin; 825 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 826 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 827 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 828 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 829 if (isascii || isdraw || isbinary || issocket) { 830 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 831 } else { 832 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 833 } 834 PetscFunctionReturn(0); 835 } 836 837 838 839 #undef __FUNCT__ 840 #define __FUNCT__ "MatRelax_MPIAIJ" 841 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 842 { 843 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 844 int ierr; 845 Vec bb1; 846 PetscScalar mone=-1.0; 847 848 PetscFunctionBegin; 849 850 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 851 852 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 853 if (flag & SOR_ZERO_INITIAL_GUESS) { 854 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 855 its--; 856 } 857 858 while (its--) { 859 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 860 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 861 862 /* update rhs: bb1 = bb - B*x */ 863 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 864 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 865 866 /* local sweep */ 867 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,PETSC_NULL,xx); 868 CHKERRQ(ierr); 869 } 870 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 871 if (flag & SOR_ZERO_INITIAL_GUESS) { 872 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 873 its--; 874 } 875 while (its--) { 876 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 877 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 878 879 /* update rhs: bb1 = bb - B*x */ 880 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 881 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 882 883 /* local sweep */ 884 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 885 CHKERRQ(ierr); 886 } 887 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 888 if (flag & SOR_ZERO_INITIAL_GUESS) { 889 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 890 its--; 891 } 892 while (its--) { 893 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 894 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 895 896 /* update rhs: bb1 = bb - B*x */ 897 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 898 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 899 900 /* local sweep */ 901 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 902 CHKERRQ(ierr); 903 } 904 } else { 905 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 906 } 907 908 ierr = VecDestroy(bb1);CHKERRQ(ierr); 909 PetscFunctionReturn(0); 910 } 911 912 #undef __FUNCT__ 913 #define __FUNCT__ "MatGetInfo_MPIAIJ" 914 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 915 { 916 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 917 Mat A = mat->A,B = mat->B; 918 int ierr; 919 PetscReal isend[5],irecv[5]; 920 921 PetscFunctionBegin; 922 info->block_size = 1.0; 923 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 924 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 925 isend[3] = info->memory; isend[4] = info->mallocs; 926 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 927 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 928 isend[3] += info->memory; isend[4] += info->mallocs; 929 if (flag == MAT_LOCAL) { 930 info->nz_used = isend[0]; 931 info->nz_allocated = isend[1]; 932 info->nz_unneeded = isend[2]; 933 info->memory = isend[3]; 934 info->mallocs = isend[4]; 935 } else if (flag == MAT_GLOBAL_MAX) { 936 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 937 info->nz_used = irecv[0]; 938 info->nz_allocated = irecv[1]; 939 info->nz_unneeded = irecv[2]; 940 info->memory = irecv[3]; 941 info->mallocs = irecv[4]; 942 } else if (flag == MAT_GLOBAL_SUM) { 943 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 944 info->nz_used = irecv[0]; 945 info->nz_allocated = irecv[1]; 946 info->nz_unneeded = irecv[2]; 947 info->memory = irecv[3]; 948 info->mallocs = irecv[4]; 949 } 950 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 951 info->fill_ratio_needed = 0; 952 info->factor_mallocs = 0; 953 info->rows_global = (double)matin->M; 954 info->columns_global = (double)matin->N; 955 info->rows_local = (double)matin->m; 956 info->columns_local = (double)matin->N; 957 958 PetscFunctionReturn(0); 959 } 960 961 #undef __FUNCT__ 962 #define __FUNCT__ "MatSetOption_MPIAIJ" 963 int MatSetOption_MPIAIJ(Mat A,MatOption op) 964 { 965 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 966 int ierr; 967 968 PetscFunctionBegin; 969 switch (op) { 970 case MAT_NO_NEW_NONZERO_LOCATIONS: 971 case MAT_YES_NEW_NONZERO_LOCATIONS: 972 case MAT_COLUMNS_UNSORTED: 973 case MAT_COLUMNS_SORTED: 974 case MAT_NEW_NONZERO_ALLOCATION_ERR: 975 case MAT_KEEP_ZEROED_ROWS: 976 case MAT_NEW_NONZERO_LOCATION_ERR: 977 case MAT_USE_INODES: 978 case MAT_DO_NOT_USE_INODES: 979 case MAT_IGNORE_ZERO_ENTRIES: 980 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 981 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 982 break; 983 case MAT_ROW_ORIENTED: 984 a->roworiented = PETSC_TRUE; 985 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 986 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 987 break; 988 case MAT_ROWS_SORTED: 989 case MAT_ROWS_UNSORTED: 990 case MAT_YES_NEW_DIAGONALS: 991 case MAT_USE_SINGLE_PRECISION_SOLVES: 992 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 993 break; 994 case MAT_COLUMN_ORIENTED: 995 a->roworiented = PETSC_FALSE; 996 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 997 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 998 break; 999 case MAT_IGNORE_OFF_PROC_ENTRIES: 1000 a->donotstash = PETSC_TRUE; 1001 break; 1002 case MAT_NO_NEW_DIAGONALS: 1003 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1004 break; 1005 default: 1006 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1007 break; 1008 } 1009 PetscFunctionReturn(0); 1010 } 1011 1012 #undef __FUNCT__ 1013 #define __FUNCT__ "MatGetRow_MPIAIJ" 1014 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1015 { 1016 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1017 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1018 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1019 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1020 int *cmap,*idx_p; 1021 1022 PetscFunctionBegin; 1023 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1024 mat->getrowactive = PETSC_TRUE; 1025 1026 if (!mat->rowvalues && (idx || v)) { 1027 /* 1028 allocate enough space to hold information from the longest row. 1029 */ 1030 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1031 int max = 1,tmp; 1032 for (i=0; i<matin->m; i++) { 1033 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1034 if (max < tmp) { max = tmp; } 1035 } 1036 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1037 mat->rowindices = (int*)(mat->rowvalues + max); 1038 } 1039 1040 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1041 lrow = row - rstart; 1042 1043 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1044 if (!v) {pvA = 0; pvB = 0;} 1045 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1046 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1047 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1048 nztot = nzA + nzB; 1049 1050 cmap = mat->garray; 1051 if (v || idx) { 1052 if (nztot) { 1053 /* Sort by increasing column numbers, assuming A and B already sorted */ 1054 int imark = -1; 1055 if (v) { 1056 *v = v_p = mat->rowvalues; 1057 for (i=0; i<nzB; i++) { 1058 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1059 else break; 1060 } 1061 imark = i; 1062 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1063 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1064 } 1065 if (idx) { 1066 *idx = idx_p = mat->rowindices; 1067 if (imark > -1) { 1068 for (i=0; i<imark; i++) { 1069 idx_p[i] = cmap[cworkB[i]]; 1070 } 1071 } else { 1072 for (i=0; i<nzB; i++) { 1073 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1074 else break; 1075 } 1076 imark = i; 1077 } 1078 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1079 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1080 } 1081 } else { 1082 if (idx) *idx = 0; 1083 if (v) *v = 0; 1084 } 1085 } 1086 *nz = nztot; 1087 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1088 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1089 PetscFunctionReturn(0); 1090 } 1091 1092 #undef __FUNCT__ 1093 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1094 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1095 { 1096 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1097 1098 PetscFunctionBegin; 1099 if (aij->getrowactive == PETSC_FALSE) { 1100 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1101 } 1102 aij->getrowactive = PETSC_FALSE; 1103 PetscFunctionReturn(0); 1104 } 1105 1106 #undef __FUNCT__ 1107 #define __FUNCT__ "MatNorm_MPIAIJ" 1108 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1109 { 1110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1111 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1112 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1113 PetscReal sum = 0.0; 1114 PetscScalar *v; 1115 1116 PetscFunctionBegin; 1117 if (aij->size == 1) { 1118 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1119 } else { 1120 if (type == NORM_FROBENIUS) { 1121 v = amat->a; 1122 for (i=0; i<amat->nz; i++) { 1123 #if defined(PETSC_USE_COMPLEX) 1124 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1125 #else 1126 sum += (*v)*(*v); v++; 1127 #endif 1128 } 1129 v = bmat->a; 1130 for (i=0; i<bmat->nz; i++) { 1131 #if defined(PETSC_USE_COMPLEX) 1132 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1133 #else 1134 sum += (*v)*(*v); v++; 1135 #endif 1136 } 1137 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1138 *norm = sqrt(*norm); 1139 } else if (type == NORM_1) { /* max column norm */ 1140 PetscReal *tmp,*tmp2; 1141 int *jj,*garray = aij->garray; 1142 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1143 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1144 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1145 *norm = 0.0; 1146 v = amat->a; jj = amat->j; 1147 for (j=0; j<amat->nz; j++) { 1148 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1149 } 1150 v = bmat->a; jj = bmat->j; 1151 for (j=0; j<bmat->nz; j++) { 1152 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1153 } 1154 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1155 for (j=0; j<mat->N; j++) { 1156 if (tmp2[j] > *norm) *norm = tmp2[j]; 1157 } 1158 ierr = PetscFree(tmp);CHKERRQ(ierr); 1159 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1160 } else if (type == NORM_INFINITY) { /* max row norm */ 1161 PetscReal ntemp = 0.0; 1162 for (j=0; j<aij->A->m; j++) { 1163 v = amat->a + amat->i[j] + shift; 1164 sum = 0.0; 1165 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1166 sum += PetscAbsScalar(*v); v++; 1167 } 1168 v = bmat->a + bmat->i[j] + shift; 1169 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1170 sum += PetscAbsScalar(*v); v++; 1171 } 1172 if (sum > ntemp) ntemp = sum; 1173 } 1174 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1175 } else { 1176 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1177 } 1178 } 1179 PetscFunctionReturn(0); 1180 } 1181 1182 #undef __FUNCT__ 1183 #define __FUNCT__ "MatTranspose_MPIAIJ" 1184 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1185 { 1186 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1187 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1188 int ierr,shift = Aloc->indexshift; 1189 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1190 Mat B; 1191 PetscScalar *array; 1192 1193 PetscFunctionBegin; 1194 if (!matout && M != N) { 1195 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1196 } 1197 1198 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1199 1200 /* copy over the A part */ 1201 Aloc = (Mat_SeqAIJ*)a->A->data; 1202 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1203 row = a->rstart; 1204 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1205 for (i=0; i<m; i++) { 1206 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1207 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1208 } 1209 aj = Aloc->j; 1210 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1211 1212 /* copy over the B part */ 1213 Aloc = (Mat_SeqAIJ*)a->B->data; 1214 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1215 row = a->rstart; 1216 ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr); 1217 ct = cols; 1218 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1219 for (i=0; i<m; i++) { 1220 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1221 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1222 } 1223 ierr = PetscFree(ct);CHKERRQ(ierr); 1224 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1225 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1226 if (matout) { 1227 *matout = B; 1228 } else { 1229 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1230 } 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #undef __FUNCT__ 1235 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1236 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1237 { 1238 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1239 Mat a = aij->A,b = aij->B; 1240 int ierr,s1,s2,s3; 1241 1242 PetscFunctionBegin; 1243 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1244 if (rr) { 1245 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1246 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1247 /* Overlap communication with computation. */ 1248 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1249 } 1250 if (ll) { 1251 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1252 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1253 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1254 } 1255 /* scale the diagonal block */ 1256 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1257 1258 if (rr) { 1259 /* Do a scatter end and then right scale the off-diagonal block */ 1260 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1261 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1262 } 1263 1264 PetscFunctionReturn(0); 1265 } 1266 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1270 int MatPrintHelp_MPIAIJ(Mat A) 1271 { 1272 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1273 int ierr; 1274 1275 PetscFunctionBegin; 1276 if (!a->rank) { 1277 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1278 } 1279 PetscFunctionReturn(0); 1280 } 1281 1282 #undef __FUNCT__ 1283 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1284 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1285 { 1286 PetscFunctionBegin; 1287 *bs = 1; 1288 PetscFunctionReturn(0); 1289 } 1290 #undef __FUNCT__ 1291 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1292 int MatSetUnfactored_MPIAIJ(Mat A) 1293 { 1294 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1295 int ierr; 1296 1297 PetscFunctionBegin; 1298 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1299 PetscFunctionReturn(0); 1300 } 1301 1302 #undef __FUNCT__ 1303 #define __FUNCT__ "MatEqual_MPIAIJ" 1304 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1305 { 1306 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1307 Mat a,b,c,d; 1308 PetscTruth flg; 1309 int ierr; 1310 1311 PetscFunctionBegin; 1312 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1313 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1314 a = matA->A; b = matA->B; 1315 c = matB->A; d = matB->B; 1316 1317 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1318 if (flg == PETSC_TRUE) { 1319 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1320 } 1321 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1322 PetscFunctionReturn(0); 1323 } 1324 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatCopy_MPIAIJ" 1327 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1328 { 1329 int ierr; 1330 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1331 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1332 PetscTruth flg; 1333 1334 PetscFunctionBegin; 1335 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1336 if (str != SAME_NONZERO_PATTERN || !flg) { 1337 /* because of the column compression in the off-processor part of the matrix a->B, 1338 the number of columns in a->B and b->B may be different, hence we cannot call 1339 the MatCopy() directly on the two parts. If need be, we can provide a more 1340 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1341 then copying the submatrices */ 1342 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1343 } else { 1344 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1345 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1346 } 1347 PetscFunctionReturn(0); 1348 } 1349 1350 #undef __FUNCT__ 1351 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1352 int MatSetUpPreallocation_MPIAIJ(Mat A) 1353 { 1354 int ierr; 1355 1356 PetscFunctionBegin; 1357 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1358 PetscFunctionReturn(0); 1359 } 1360 1361 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1362 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1363 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1364 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1365 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1366 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1367 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*); 1368 #endif 1369 1370 /* -------------------------------------------------------------------*/ 1371 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1372 MatGetRow_MPIAIJ, 1373 MatRestoreRow_MPIAIJ, 1374 MatMult_MPIAIJ, 1375 MatMultAdd_MPIAIJ, 1376 MatMultTranspose_MPIAIJ, 1377 MatMultTransposeAdd_MPIAIJ, 1378 0, 1379 0, 1380 0, 1381 0, 1382 0, 1383 0, 1384 MatRelax_MPIAIJ, 1385 MatTranspose_MPIAIJ, 1386 MatGetInfo_MPIAIJ, 1387 MatEqual_MPIAIJ, 1388 MatGetDiagonal_MPIAIJ, 1389 MatDiagonalScale_MPIAIJ, 1390 MatNorm_MPIAIJ, 1391 MatAssemblyBegin_MPIAIJ, 1392 MatAssemblyEnd_MPIAIJ, 1393 0, 1394 MatSetOption_MPIAIJ, 1395 MatZeroEntries_MPIAIJ, 1396 MatZeroRows_MPIAIJ, 1397 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1398 MatLUFactorSymbolic_MPIAIJ_TFS, 1399 #else 1400 0, 1401 #endif 1402 0, 1403 0, 1404 0, 1405 MatSetUpPreallocation_MPIAIJ, 1406 0, 1407 0, 1408 0, 1409 0, 1410 MatDuplicate_MPIAIJ, 1411 0, 1412 0, 1413 0, 1414 0, 1415 0, 1416 MatGetSubMatrices_MPIAIJ, 1417 MatIncreaseOverlap_MPIAIJ, 1418 MatGetValues_MPIAIJ, 1419 MatCopy_MPIAIJ, 1420 MatPrintHelp_MPIAIJ, 1421 MatScale_MPIAIJ, 1422 0, 1423 0, 1424 0, 1425 MatGetBlockSize_MPIAIJ, 1426 0, 1427 0, 1428 0, 1429 0, 1430 MatFDColoringCreate_MPIAIJ, 1431 0, 1432 MatSetUnfactored_MPIAIJ, 1433 0, 1434 0, 1435 MatGetSubMatrix_MPIAIJ, 1436 MatDestroy_MPIAIJ, 1437 MatView_MPIAIJ, 1438 MatGetPetscMaps_Petsc, 1439 0, 1440 0, 1441 0, 1442 0, 1443 0, 1444 0, 1445 0, 1446 0, 1447 MatSetColoring_MPIAIJ, 1448 MatSetValuesAdic_MPIAIJ, 1449 MatSetValuesAdifor_MPIAIJ 1450 }; 1451 1452 /* ----------------------------------------------------------------------------------------*/ 1453 1454 EXTERN_C_BEGIN 1455 #undef __FUNCT__ 1456 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1457 int MatStoreValues_MPIAIJ(Mat mat) 1458 { 1459 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1460 int ierr; 1461 1462 PetscFunctionBegin; 1463 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1464 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1465 PetscFunctionReturn(0); 1466 } 1467 EXTERN_C_END 1468 1469 EXTERN_C_BEGIN 1470 #undef __FUNCT__ 1471 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1472 int MatRetrieveValues_MPIAIJ(Mat mat) 1473 { 1474 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1475 int ierr; 1476 1477 PetscFunctionBegin; 1478 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1479 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1480 PetscFunctionReturn(0); 1481 } 1482 EXTERN_C_END 1483 1484 #include "petscpc.h" 1485 EXTERN_C_BEGIN 1486 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1487 EXTERN_C_END 1488 1489 EXTERN_C_BEGIN 1490 #undef __FUNCT__ 1491 #define __FUNCT__ "MatCreate_MPIAIJ" 1492 int MatCreate_MPIAIJ(Mat B) 1493 { 1494 Mat_MPIAIJ *b; 1495 int ierr,i,size; 1496 PetscTruth flg; 1497 1498 PetscFunctionBegin; 1499 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1500 1501 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1502 B->data = (void*)b; 1503 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1504 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1505 B->factor = 0; 1506 B->assembled = PETSC_FALSE; 1507 B->mapping = 0; 1508 1509 B->insertmode = NOT_SET_VALUES; 1510 b->size = size; 1511 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1512 1513 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1514 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1515 1516 /* the information in the maps duplicates the information computed below, eventually 1517 we should remove the duplicate information that is not contained in the maps */ 1518 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1519 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1520 1521 /* build local table of row and column ownerships */ 1522 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1523 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1524 b->cowners = b->rowners + b->size + 2; 1525 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1526 b->rowners[0] = 0; 1527 for (i=2; i<=b->size; i++) { 1528 b->rowners[i] += b->rowners[i-1]; 1529 } 1530 b->rstart = b->rowners[b->rank]; 1531 b->rend = b->rowners[b->rank+1]; 1532 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1533 b->cowners[0] = 0; 1534 for (i=2; i<=b->size; i++) { 1535 b->cowners[i] += b->cowners[i-1]; 1536 } 1537 b->cstart = b->cowners[b->rank]; 1538 b->cend = b->cowners[b->rank+1]; 1539 1540 /* build cache for off array entries formed */ 1541 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1542 b->donotstash = PETSC_FALSE; 1543 b->colmap = 0; 1544 b->garray = 0; 1545 b->roworiented = PETSC_TRUE; 1546 1547 /* stuff used for matrix vector multiply */ 1548 b->lvec = PETSC_NULL; 1549 b->Mvctx = PETSC_NULL; 1550 1551 /* stuff for MatGetRow() */ 1552 b->rowindices = 0; 1553 b->rowvalues = 0; 1554 b->getrowactive = PETSC_FALSE; 1555 1556 /* #if defined(PETSC_HAVE_SUPERLUDIST) 1557 1558 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flg);CHKERRQ(ierr); 1559 if (flg) { ierr = MatUseSuperLU_DIST_MPIAIJ(B);CHKERRQ(ierr); } 1560 #endif */ 1561 1562 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1563 "MatStoreValues_MPIAIJ", 1564 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1565 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1566 "MatRetrieveValues_MPIAIJ", 1567 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1568 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1569 "MatGetDiagonalBlock_MPIAIJ", 1570 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1571 1572 PetscFunctionReturn(0); 1573 } 1574 EXTERN_C_END 1575 1576 #undef __FUNCT__ 1577 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1578 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1579 { 1580 Mat mat; 1581 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1582 int ierr; 1583 1584 PetscFunctionBegin; 1585 *newmat = 0; 1586 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1587 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1588 a = (Mat_MPIAIJ*)mat->data; 1589 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1590 mat->factor = matin->factor; 1591 mat->assembled = PETSC_TRUE; 1592 mat->insertmode = NOT_SET_VALUES; 1593 mat->preallocated = PETSC_TRUE; 1594 1595 a->rstart = oldmat->rstart; 1596 a->rend = oldmat->rend; 1597 a->cstart = oldmat->cstart; 1598 a->cend = oldmat->cend; 1599 a->size = oldmat->size; 1600 a->rank = oldmat->rank; 1601 a->donotstash = oldmat->donotstash; 1602 a->roworiented = oldmat->roworiented; 1603 a->rowindices = 0; 1604 a->rowvalues = 0; 1605 a->getrowactive = PETSC_FALSE; 1606 1607 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1608 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1609 if (oldmat->colmap) { 1610 #if defined (PETSC_USE_CTABLE) 1611 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1612 #else 1613 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1614 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1615 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1616 #endif 1617 } else a->colmap = 0; 1618 if (oldmat->garray) { 1619 int len; 1620 len = oldmat->B->n; 1621 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1622 PetscLogObjectMemory(mat,len*sizeof(int)); 1623 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1624 } else a->garray = 0; 1625 1626 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1627 PetscLogObjectParent(mat,a->lvec); 1628 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1629 PetscLogObjectParent(mat,a->Mvctx); 1630 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1631 PetscLogObjectParent(mat,a->A); 1632 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1633 PetscLogObjectParent(mat,a->B); 1634 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1635 *newmat = mat; 1636 PetscFunctionReturn(0); 1637 } 1638 1639 #include "petscsys.h" 1640 1641 EXTERN_C_BEGIN 1642 #undef __FUNCT__ 1643 #define __FUNCT__ "MatLoad_MPIAIJ" 1644 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1645 { 1646 Mat A; 1647 PetscScalar *vals,*svals; 1648 MPI_Comm comm = ((PetscObject)viewer)->comm; 1649 MPI_Status status; 1650 int i,nz,ierr,j,rstart,rend,fd; 1651 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1652 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1653 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1654 1655 PetscFunctionBegin; 1656 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1657 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1658 if (!rank) { 1659 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1660 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1661 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1662 if (header[3] < 0) { 1663 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1664 } 1665 } 1666 1667 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1668 M = header[1]; N = header[2]; 1669 /* determine ownership of all rows */ 1670 m = M/size + ((M % size) > rank); 1671 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1672 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1673 rowners[0] = 0; 1674 for (i=2; i<=size; i++) { 1675 rowners[i] += rowners[i-1]; 1676 } 1677 rstart = rowners[rank]; 1678 rend = rowners[rank+1]; 1679 1680 /* distribute row lengths to all processors */ 1681 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1682 offlens = ourlens + (rend-rstart); 1683 if (!rank) { 1684 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1685 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1686 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1687 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1688 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1689 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1690 } else { 1691 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1692 } 1693 1694 if (!rank) { 1695 /* calculate the number of nonzeros on each processor */ 1696 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1697 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1698 for (i=0; i<size; i++) { 1699 for (j=rowners[i]; j< rowners[i+1]; j++) { 1700 procsnz[i] += rowlengths[j]; 1701 } 1702 } 1703 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1704 1705 /* determine max buffer needed and allocate it */ 1706 maxnz = 0; 1707 for (i=0; i<size; i++) { 1708 maxnz = PetscMax(maxnz,procsnz[i]); 1709 } 1710 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1711 1712 /* read in my part of the matrix column indices */ 1713 nz = procsnz[0]; 1714 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1715 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1716 1717 /* read in every one elses and ship off */ 1718 for (i=1; i<size; i++) { 1719 nz = procsnz[i]; 1720 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1721 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1722 } 1723 ierr = PetscFree(cols);CHKERRQ(ierr); 1724 } else { 1725 /* determine buffer space needed for message */ 1726 nz = 0; 1727 for (i=0; i<m; i++) { 1728 nz += ourlens[i]; 1729 } 1730 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1731 1732 /* receive message of column indices*/ 1733 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1734 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1735 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1736 } 1737 1738 /* determine column ownership if matrix is not square */ 1739 if (N != M) { 1740 n = N/size + ((N % size) > rank); 1741 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1742 cstart = cend - n; 1743 } else { 1744 cstart = rstart; 1745 cend = rend; 1746 n = cend - cstart; 1747 } 1748 1749 /* loop over local rows, determining number of off diagonal entries */ 1750 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1751 jj = 0; 1752 for (i=0; i<m; i++) { 1753 for (j=0; j<ourlens[i]; j++) { 1754 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1755 jj++; 1756 } 1757 } 1758 1759 /* create our matrix */ 1760 for (i=0; i<m; i++) { 1761 ourlens[i] -= offlens[i]; 1762 } 1763 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1764 A = *newmat; 1765 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1766 for (i=0; i<m; i++) { 1767 ourlens[i] += offlens[i]; 1768 } 1769 1770 if (!rank) { 1771 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1772 1773 /* read in my part of the matrix numerical values */ 1774 nz = procsnz[0]; 1775 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1776 1777 /* insert into matrix */ 1778 jj = rstart; 1779 smycols = mycols; 1780 svals = vals; 1781 for (i=0; i<m; i++) { 1782 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1783 smycols += ourlens[i]; 1784 svals += ourlens[i]; 1785 jj++; 1786 } 1787 1788 /* read in other processors and ship out */ 1789 for (i=1; i<size; i++) { 1790 nz = procsnz[i]; 1791 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1792 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1793 } 1794 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1795 } else { 1796 /* receive numeric values */ 1797 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1798 1799 /* receive message of values*/ 1800 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1801 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1802 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1803 1804 /* insert into matrix */ 1805 jj = rstart; 1806 smycols = mycols; 1807 svals = vals; 1808 for (i=0; i<m; i++) { 1809 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1810 smycols += ourlens[i]; 1811 svals += ourlens[i]; 1812 jj++; 1813 } 1814 } 1815 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1816 ierr = PetscFree(vals);CHKERRQ(ierr); 1817 ierr = PetscFree(mycols);CHKERRQ(ierr); 1818 ierr = PetscFree(rowners);CHKERRQ(ierr); 1819 1820 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1821 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1822 PetscFunctionReturn(0); 1823 } 1824 EXTERN_C_END 1825 1826 #undef __FUNCT__ 1827 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 1828 /* 1829 Not great since it makes two copies of the submatrix, first an SeqAIJ 1830 in local and then by concatenating the local matrices the end result. 1831 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1832 */ 1833 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 1834 { 1835 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1836 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend; 1837 Mat *local,M,Mreuse; 1838 PetscScalar *vwork,*aa; 1839 MPI_Comm comm = mat->comm; 1840 Mat_SeqAIJ *aij; 1841 1842 1843 PetscFunctionBegin; 1844 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1845 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1846 1847 if (call == MAT_REUSE_MATRIX) { 1848 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1849 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 1850 local = &Mreuse; 1851 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1852 } else { 1853 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1854 Mreuse = *local; 1855 ierr = PetscFree(local);CHKERRQ(ierr); 1856 } 1857 1858 /* 1859 m - number of local rows 1860 n - number of columns (same on all processors) 1861 rstart - first row in new global matrix generated 1862 */ 1863 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1864 if (call == MAT_INITIAL_MATRIX) { 1865 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1866 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1867 ii = aij->i; 1868 jj = aij->j; 1869 1870 /* 1871 Determine the number of non-zeros in the diagonal and off-diagonal 1872 portions of the matrix in order to do correct preallocation 1873 */ 1874 1875 /* first get start and end of "diagonal" columns */ 1876 if (csize == PETSC_DECIDE) { 1877 nlocal = n/size + ((n % size) > rank); 1878 } else { 1879 nlocal = csize; 1880 } 1881 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1882 rstart = rend - nlocal; 1883 if (rank == size - 1 && rend != n) { 1884 SETERRQ(1,"Local column sizes do not add up to total number of columns"); 1885 } 1886 1887 /* next, compute all the lengths */ 1888 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 1889 olens = dlens + m; 1890 for (i=0; i<m; i++) { 1891 jend = ii[i+1] - ii[i]; 1892 olen = 0; 1893 dlen = 0; 1894 for (j=0; j<jend; j++) { 1895 if (*jj < rstart || *jj >= rend) olen++; 1896 else dlen++; 1897 jj++; 1898 } 1899 olens[i] = olen; 1900 dlens[i] = dlen; 1901 } 1902 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 1903 ierr = PetscFree(dlens);CHKERRQ(ierr); 1904 } else { 1905 int ml,nl; 1906 1907 M = *newmat; 1908 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1909 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1910 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1911 /* 1912 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1913 rather than the slower MatSetValues(). 1914 */ 1915 M->was_assembled = PETSC_TRUE; 1916 M->assembled = PETSC_FALSE; 1917 } 1918 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1919 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1920 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1921 ii = aij->i; 1922 jj = aij->j; 1923 aa = aij->a; 1924 for (i=0; i<m; i++) { 1925 row = rstart + i; 1926 nz = ii[i+1] - ii[i]; 1927 cwork = jj; jj += nz; 1928 vwork = aa; aa += nz; 1929 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1930 } 1931 1932 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1933 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1934 *newmat = M; 1935 1936 /* save submatrix used in processor for next request */ 1937 if (call == MAT_INITIAL_MATRIX) { 1938 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 1939 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 1940 } 1941 1942 PetscFunctionReturn(0); 1943 } 1944 1945 #undef __FUNCT__ 1946 #define __FUNCT__ "MatMPIAIJSetPreallocation" 1947 /*@C 1948 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 1949 (the default parallel PETSc format). For good matrix assembly performance 1950 the user should preallocate the matrix storage by setting the parameters 1951 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1952 performance can be increased by more than a factor of 50. 1953 1954 Collective on MPI_Comm 1955 1956 Input Parameters: 1957 + A - the matrix 1958 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1959 (same value is used for all local rows) 1960 . d_nnz - array containing the number of nonzeros in the various rows of the 1961 DIAGONAL portion of the local submatrix (possibly different for each row) 1962 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1963 The size of this array is equal to the number of local rows, i.e 'm'. 1964 You must leave room for the diagonal entry even if it is zero. 1965 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 1966 submatrix (same value is used for all local rows). 1967 - o_nnz - array containing the number of nonzeros in the various rows of the 1968 OFF-DIAGONAL portion of the local submatrix (possibly different for 1969 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 1970 structure. The size of this array is equal to the number 1971 of local rows, i.e 'm'. 1972 1973 The AIJ format (also called the Yale sparse matrix format or 1974 compressed row storage), is fully compatible with standard Fortran 77 1975 storage. That is, the stored row and column indices can begin at 1976 either one (as in Fortran) or zero. See the users manual for details. 1977 1978 The user MUST specify either the local or global matrix dimensions 1979 (possibly both). 1980 1981 The parallel matrix is partitioned such that the first m0 rows belong to 1982 process 0, the next m1 rows belong to process 1, the next m2 rows belong 1983 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 1984 1985 The DIAGONAL portion of the local submatrix of a processor can be defined 1986 as the submatrix which is obtained by extraction the part corresponding 1987 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 1988 first row that belongs to the processor, and r2 is the last row belonging 1989 to the this processor. This is a square mxm matrix. The remaining portion 1990 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 1991 1992 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 1993 1994 By default, this format uses inodes (identical nodes) when possible. 1995 We search for consecutive rows with the same nonzero structure, thereby 1996 reusing matrix information to achieve increased efficiency. 1997 1998 Options Database Keys: 1999 + -mat_aij_no_inode - Do not use inodes 2000 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2001 - -mat_aij_oneindex - Internally use indexing starting at 1 2002 rather than 0. Note that when calling MatSetValues(), 2003 the user still MUST index entries starting at 0! 2004 2005 Example usage: 2006 2007 Consider the following 8x8 matrix with 34 non-zero values, that is 2008 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2009 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2010 as follows: 2011 2012 .vb 2013 1 2 0 | 0 3 0 | 0 4 2014 Proc0 0 5 6 | 7 0 0 | 8 0 2015 9 0 10 | 11 0 0 | 12 0 2016 ------------------------------------- 2017 13 0 14 | 15 16 17 | 0 0 2018 Proc1 0 18 0 | 19 20 21 | 0 0 2019 0 0 0 | 22 23 0 | 24 0 2020 ------------------------------------- 2021 Proc2 25 26 27 | 0 0 28 | 29 0 2022 30 0 0 | 31 32 33 | 0 34 2023 .ve 2024 2025 This can be represented as a collection of submatrices as: 2026 2027 .vb 2028 A B C 2029 D E F 2030 G H I 2031 .ve 2032 2033 Where the submatrices A,B,C are owned by proc0, D,E,F are 2034 owned by proc1, G,H,I are owned by proc2. 2035 2036 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2037 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2038 The 'M','N' parameters are 8,8, and have the same values on all procs. 2039 2040 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2041 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2042 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2043 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2044 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2045 matrix, ans [DF] as another SeqAIJ matrix. 2046 2047 When d_nz, o_nz parameters are specified, d_nz storage elements are 2048 allocated for every row of the local diagonal submatrix, and o_nz 2049 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2050 One way to choose d_nz and o_nz is to use the max nonzerors per local 2051 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2052 In this case, the values of d_nz,o_nz are: 2053 .vb 2054 proc0 : dnz = 2, o_nz = 2 2055 proc1 : dnz = 3, o_nz = 2 2056 proc2 : dnz = 1, o_nz = 4 2057 .ve 2058 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2059 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2060 for proc3. i.e we are using 12+15+10=37 storage locations to store 2061 34 values. 2062 2063 When d_nnz, o_nnz parameters are specified, the storage is specified 2064 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2065 In the above case the values for d_nnz,o_nnz are: 2066 .vb 2067 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2068 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2069 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2070 .ve 2071 Here the space allocated is sum of all the above values i.e 34, and 2072 hence pre-allocation is perfect. 2073 2074 Level: intermediate 2075 2076 .keywords: matrix, aij, compressed row, sparse, parallel 2077 2078 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2079 @*/ 2080 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2081 { 2082 Mat_MPIAIJ *b; 2083 int ierr,i; 2084 PetscTruth flg2; 2085 2086 PetscFunctionBegin; 2087 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr); 2088 if (!flg2) PetscFunctionReturn(0); 2089 B->preallocated = PETSC_TRUE; 2090 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2091 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2092 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2093 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2094 if (d_nnz) { 2095 for (i=0; i<B->m; i++) { 2096 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 2097 } 2098 } 2099 if (o_nnz) { 2100 for (i=0; i<B->m; i++) { 2101 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 2102 } 2103 } 2104 b = (Mat_MPIAIJ*)B->data; 2105 2106 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2107 PetscLogObjectParent(B,b->A); 2108 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2109 PetscLogObjectParent(B,b->B); 2110 2111 PetscFunctionReturn(0); 2112 } 2113 2114 #undef __FUNCT__ 2115 #define __FUNCT__ "MatCreateMPIAIJ" 2116 /*@C 2117 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2118 (the default parallel PETSc format). For good matrix assembly performance 2119 the user should preallocate the matrix storage by setting the parameters 2120 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2121 performance can be increased by more than a factor of 50. 2122 2123 Collective on MPI_Comm 2124 2125 Input Parameters: 2126 + comm - MPI communicator 2127 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2128 This value should be the same as the local size used in creating the 2129 y vector for the matrix-vector product y = Ax. 2130 . n - This value should be the same as the local size used in creating the 2131 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2132 calculated if N is given) For square matrices n is almost always m. 2133 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2134 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2135 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2136 (same value is used for all local rows) 2137 . d_nnz - array containing the number of nonzeros in the various rows of the 2138 DIAGONAL portion of the local submatrix (possibly different for each row) 2139 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2140 The size of this array is equal to the number of local rows, i.e 'm'. 2141 You must leave room for the diagonal entry even if it is zero. 2142 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2143 submatrix (same value is used for all local rows). 2144 - o_nnz - array containing the number of nonzeros in the various rows of the 2145 OFF-DIAGONAL portion of the local submatrix (possibly different for 2146 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2147 structure. The size of this array is equal to the number 2148 of local rows, i.e 'm'. 2149 2150 Output Parameter: 2151 . A - the matrix 2152 2153 Notes: 2154 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2155 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2156 storage requirements for this matrix. 2157 2158 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2159 processor than it must be used on all processors that share the object for 2160 that argument. 2161 2162 The AIJ format (also called the Yale sparse matrix format or 2163 compressed row storage), is fully compatible with standard Fortran 77 2164 storage. That is, the stored row and column indices can begin at 2165 either one (as in Fortran) or zero. See the users manual for details. 2166 2167 The user MUST specify either the local or global matrix dimensions 2168 (possibly both). 2169 2170 The parallel matrix is partitioned such that the first m0 rows belong to 2171 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2172 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2173 2174 The DIAGONAL portion of the local submatrix of a processor can be defined 2175 as the submatrix which is obtained by extraction the part corresponding 2176 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2177 first row that belongs to the processor, and r2 is the last row belonging 2178 to the this processor. This is a square mxm matrix. The remaining portion 2179 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2180 2181 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2182 2183 By default, this format uses inodes (identical nodes) when possible. 2184 We search for consecutive rows with the same nonzero structure, thereby 2185 reusing matrix information to achieve increased efficiency. 2186 2187 Options Database Keys: 2188 + -mat_aij_no_inode - Do not use inodes 2189 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2190 - -mat_aij_oneindex - Internally use indexing starting at 1 2191 rather than 0. Note that when calling MatSetValues(), 2192 the user still MUST index entries starting at 0! 2193 2194 2195 Example usage: 2196 2197 Consider the following 8x8 matrix with 34 non-zero values, that is 2198 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2199 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2200 as follows: 2201 2202 .vb 2203 1 2 0 | 0 3 0 | 0 4 2204 Proc0 0 5 6 | 7 0 0 | 8 0 2205 9 0 10 | 11 0 0 | 12 0 2206 ------------------------------------- 2207 13 0 14 | 15 16 17 | 0 0 2208 Proc1 0 18 0 | 19 20 21 | 0 0 2209 0 0 0 | 22 23 0 | 24 0 2210 ------------------------------------- 2211 Proc2 25 26 27 | 0 0 28 | 29 0 2212 30 0 0 | 31 32 33 | 0 34 2213 .ve 2214 2215 This can be represented as a collection of submatrices as: 2216 2217 .vb 2218 A B C 2219 D E F 2220 G H I 2221 .ve 2222 2223 Where the submatrices A,B,C are owned by proc0, D,E,F are 2224 owned by proc1, G,H,I are owned by proc2. 2225 2226 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2227 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2228 The 'M','N' parameters are 8,8, and have the same values on all procs. 2229 2230 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2231 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2232 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2233 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2234 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2235 matrix, ans [DF] as another SeqAIJ matrix. 2236 2237 When d_nz, o_nz parameters are specified, d_nz storage elements are 2238 allocated for every row of the local diagonal submatrix, and o_nz 2239 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2240 One way to choose d_nz and o_nz is to use the max nonzerors per local 2241 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2242 In this case, the values of d_nz,o_nz are: 2243 .vb 2244 proc0 : dnz = 2, o_nz = 2 2245 proc1 : dnz = 3, o_nz = 2 2246 proc2 : dnz = 1, o_nz = 4 2247 .ve 2248 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2249 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2250 for proc3. i.e we are using 12+15+10=37 storage locations to store 2251 34 values. 2252 2253 When d_nnz, o_nnz parameters are specified, the storage is specified 2254 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2255 In the above case the values for d_nnz,o_nnz are: 2256 .vb 2257 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2258 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2259 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2260 .ve 2261 Here the space allocated is sum of all the above values i.e 34, and 2262 hence pre-allocation is perfect. 2263 2264 Level: intermediate 2265 2266 .keywords: matrix, aij, compressed row, sparse, parallel 2267 2268 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2269 @*/ 2270 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2271 { 2272 int ierr,size; 2273 2274 PetscFunctionBegin; 2275 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2276 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2277 if (size > 1) { 2278 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2279 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2280 } else { 2281 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2282 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2283 } 2284 PetscFunctionReturn(0); 2285 } 2286 2287 #undef __FUNCT__ 2288 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2289 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2290 { 2291 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2292 PetscFunctionBegin; 2293 *Ad = a->A; 2294 *Ao = a->B; 2295 *colmap = a->garray; 2296 PetscFunctionReturn(0); 2297 } 2298 2299 #undef __FUNCT__ 2300 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2301 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2302 { 2303 int ierr; 2304 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2305 2306 PetscFunctionBegin; 2307 if (coloring->ctype == IS_COLORING_LOCAL) { 2308 int *allcolors,*colors,i; 2309 ISColoring ocoloring; 2310 2311 /* set coloring for diagonal portion */ 2312 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2313 2314 /* set coloring for off-diagonal portion */ 2315 ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2316 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2317 for (i=0; i<a->B->n; i++) { 2318 colors[i] = allcolors[a->garray[i]]; 2319 } 2320 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2321 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2322 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2323 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2324 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2325 int *colors,i,*larray; 2326 ISColoring ocoloring; 2327 2328 /* set coloring for diagonal portion */ 2329 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2330 for (i=0; i<a->A->n; i++) { 2331 larray[i] = i + a->cstart; 2332 } 2333 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2334 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2335 for (i=0; i<a->A->n; i++) { 2336 colors[i] = coloring->colors[larray[i]]; 2337 } 2338 ierr = PetscFree(larray);CHKERRQ(ierr); 2339 ierr = ISColoringCreate(MPI_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2340 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2341 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2342 2343 /* set coloring for off-diagonal portion */ 2344 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2345 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2346 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2347 for (i=0; i<a->B->n; i++) { 2348 colors[i] = coloring->colors[larray[i]]; 2349 } 2350 ierr = PetscFree(larray);CHKERRQ(ierr); 2351 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2352 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2353 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2354 } else { 2355 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2356 } 2357 2358 PetscFunctionReturn(0); 2359 } 2360 2361 #undef __FUNCT__ 2362 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2363 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2364 { 2365 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2366 int ierr; 2367 2368 PetscFunctionBegin; 2369 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2370 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2371 PetscFunctionReturn(0); 2372 } 2373 2374 #undef __FUNCT__ 2375 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2376 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2377 { 2378 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2379 int ierr; 2380 2381 PetscFunctionBegin; 2382 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2383 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2384 PetscFunctionReturn(0); 2385 } 2386