1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 EXTERN int MatUseSuperLU_DIST_MPIAIJ(Mat); 14 EXTERN int MatUseSpooles_MPIAIJ(Mat); 15 16 /* 17 Local utility routine that creates a mapping from the global column 18 number to the local number in the off-diagonal part of the local 19 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 20 a slightly higher hash table cost; without it it is not scalable (each processor 21 has an order N integer array but is fast to acess. 22 */ 23 #undef __FUNCT__ 24 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 25 int CreateColmap_MPIAIJ_Private(Mat mat) 26 { 27 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 28 int n = aij->B->n,i,ierr; 29 30 PetscFunctionBegin; 31 #if defined (PETSC_USE_CTABLE) 32 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 33 for (i=0; i<n; i++){ 34 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 35 } 36 #else 37 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 38 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 39 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 40 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 41 #endif 42 PetscFunctionReturn(0); 43 } 44 45 #define CHUNKSIZE 15 46 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 47 { \ 48 \ 49 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 50 rmax = aimax[row]; nrow = ailen[row]; \ 51 col1 = col - shift; \ 52 \ 53 low = 0; high = nrow; \ 54 while (high-low > 5) { \ 55 t = (low+high)/2; \ 56 if (rp[t] > col) high = t; \ 57 else low = t; \ 58 } \ 59 for (_i=low; _i<high; _i++) { \ 60 if (rp[_i] > col1) break; \ 61 if (rp[_i] == col1) { \ 62 if (addv == ADD_VALUES) ap[_i] += value; \ 63 else ap[_i] = value; \ 64 goto a_noinsert; \ 65 } \ 66 } \ 67 if (nonew == 1) goto a_noinsert; \ 68 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 69 if (nrow >= rmax) { \ 70 /* there is no extra room in row, therefore enlarge */ \ 71 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 72 PetscScalar *new_a; \ 73 \ 74 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 75 \ 76 /* malloc new storage space */ \ 77 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 78 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 79 new_j = (int*)(new_a + new_nz); \ 80 new_i = new_j + new_nz; \ 81 \ 82 /* copy over old data into new slots */ \ 83 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 84 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 85 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 86 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 87 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 88 len*sizeof(int));CHKERRQ(ierr); \ 89 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 90 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 91 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 92 /* free up old matrix storage */ \ 93 \ 94 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 95 if (!a->singlemalloc) { \ 96 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 97 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 98 } \ 99 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 100 a->singlemalloc = PETSC_TRUE; \ 101 \ 102 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 103 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 104 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 105 a->maxnz += CHUNKSIZE; \ 106 a->reallocs++; \ 107 } \ 108 N = nrow++ - 1; a->nz++; \ 109 /* shift up all the later entries in this row */ \ 110 for (ii=N; ii>=_i; ii--) { \ 111 rp[ii+1] = rp[ii]; \ 112 ap[ii+1] = ap[ii]; \ 113 } \ 114 rp[_i] = col1; \ 115 ap[_i] = value; \ 116 a_noinsert: ; \ 117 ailen[row] = nrow; \ 118 } 119 120 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 121 { \ 122 \ 123 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 124 rmax = bimax[row]; nrow = bilen[row]; \ 125 col1 = col - shift; \ 126 \ 127 low = 0; high = nrow; \ 128 while (high-low > 5) { \ 129 t = (low+high)/2; \ 130 if (rp[t] > col) high = t; \ 131 else low = t; \ 132 } \ 133 for (_i=low; _i<high; _i++) { \ 134 if (rp[_i] > col1) break; \ 135 if (rp[_i] == col1) { \ 136 if (addv == ADD_VALUES) ap[_i] += value; \ 137 else ap[_i] = value; \ 138 goto b_noinsert; \ 139 } \ 140 } \ 141 if (nonew == 1) goto b_noinsert; \ 142 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 143 if (nrow >= rmax) { \ 144 /* there is no extra room in row, therefore enlarge */ \ 145 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 146 PetscScalar *new_a; \ 147 \ 148 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 149 \ 150 /* malloc new storage space */ \ 151 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 152 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 153 new_j = (int*)(new_a + new_nz); \ 154 new_i = new_j + new_nz; \ 155 \ 156 /* copy over old data into new slots */ \ 157 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 158 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 159 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 160 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 161 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 162 len*sizeof(int));CHKERRQ(ierr); \ 163 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 164 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 165 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 166 /* free up old matrix storage */ \ 167 \ 168 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 169 if (!b->singlemalloc) { \ 170 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 171 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 172 } \ 173 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 174 b->singlemalloc = PETSC_TRUE; \ 175 \ 176 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 177 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 178 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 179 b->maxnz += CHUNKSIZE; \ 180 b->reallocs++; \ 181 } \ 182 N = nrow++ - 1; b->nz++; \ 183 /* shift up all the later entries in this row */ \ 184 for (ii=N; ii>=_i; ii--) { \ 185 rp[ii+1] = rp[ii]; \ 186 ap[ii+1] = ap[ii]; \ 187 } \ 188 rp[_i] = col1; \ 189 ap[_i] = value; \ 190 b_noinsert: ; \ 191 bilen[row] = nrow; \ 192 } 193 194 #undef __FUNCT__ 195 #define __FUNCT__ "MatSetValues_MPIAIJ" 196 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 197 { 198 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 199 PetscScalar value; 200 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 201 int cstart = aij->cstart,cend = aij->cend,row,col; 202 PetscTruth roworiented = aij->roworiented; 203 204 /* Some Variables required in the macro */ 205 Mat A = aij->A; 206 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 207 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 208 PetscScalar *aa = a->a; 209 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 210 Mat B = aij->B; 211 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 212 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 213 PetscScalar *ba = b->a; 214 215 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 216 int nonew = a->nonew,shift = a->indexshift; 217 PetscScalar *ap; 218 219 PetscFunctionBegin; 220 for (i=0; i<m; i++) { 221 if (im[i] < 0) continue; 222 #if defined(PETSC_USE_BOPT_g) 223 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 224 #endif 225 if (im[i] >= rstart && im[i] < rend) { 226 row = im[i] - rstart; 227 for (j=0; j<n; j++) { 228 if (in[j] >= cstart && in[j] < cend){ 229 col = in[j] - cstart; 230 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 231 if (ignorezeroentries && value == 0.0) continue; 232 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 233 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 234 } else if (in[j] < 0) continue; 235 #if defined(PETSC_USE_BOPT_g) 236 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 237 #endif 238 else { 239 if (mat->was_assembled) { 240 if (!aij->colmap) { 241 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 242 } 243 #if defined (PETSC_USE_CTABLE) 244 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 245 col--; 246 #else 247 col = aij->colmap[in[j]] - 1; 248 #endif 249 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 250 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 251 col = in[j]; 252 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 253 B = aij->B; 254 b = (Mat_SeqAIJ*)B->data; 255 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 256 ba = b->a; 257 } 258 } else col = in[j]; 259 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 260 if (ignorezeroentries && value == 0.0) continue; 261 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 262 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 263 } 264 } 265 } else { 266 if (!aij->donotstash) { 267 if (roworiented) { 268 if (ignorezeroentries && v[i*n] == 0.0) continue; 269 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 270 } else { 271 if (ignorezeroentries && v[i] == 0.0) continue; 272 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 273 } 274 } 275 } 276 } 277 PetscFunctionReturn(0); 278 } 279 280 #undef __FUNCT__ 281 #define __FUNCT__ "MatGetValues_MPIAIJ" 282 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 283 { 284 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 285 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 286 int cstart = aij->cstart,cend = aij->cend,row,col; 287 288 PetscFunctionBegin; 289 for (i=0; i<m; i++) { 290 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 291 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 292 if (idxm[i] >= rstart && idxm[i] < rend) { 293 row = idxm[i] - rstart; 294 for (j=0; j<n; j++) { 295 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 296 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 297 if (idxn[j] >= cstart && idxn[j] < cend){ 298 col = idxn[j] - cstart; 299 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 300 } else { 301 if (!aij->colmap) { 302 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 303 } 304 #if defined (PETSC_USE_CTABLE) 305 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 306 col --; 307 #else 308 col = aij->colmap[idxn[j]] - 1; 309 #endif 310 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 311 else { 312 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 313 } 314 } 315 } 316 } else { 317 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 318 } 319 } 320 PetscFunctionReturn(0); 321 } 322 323 #undef __FUNCT__ 324 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 325 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 326 { 327 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 328 int ierr,nstash,reallocs; 329 InsertMode addv; 330 331 PetscFunctionBegin; 332 if (aij->donotstash) { 333 PetscFunctionReturn(0); 334 } 335 336 /* make sure all processors are either in INSERTMODE or ADDMODE */ 337 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 338 if (addv == (ADD_VALUES|INSERT_VALUES)) { 339 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 340 } 341 mat->insertmode = addv; /* in case this processor had no cache */ 342 343 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 344 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 345 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 346 PetscFunctionReturn(0); 347 } 348 349 350 #undef __FUNCT__ 351 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 352 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 353 { 354 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 355 int i,j,rstart,ncols,n,ierr,flg; 356 int *row,*col,other_disassembled; 357 PetscScalar *val; 358 InsertMode addv = mat->insertmode; 359 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES) 360 PetscTruth flag; 361 #endif 362 363 PetscFunctionBegin; 364 if (!aij->donotstash) { 365 while (1) { 366 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 367 if (!flg) break; 368 369 for (i=0; i<n;) { 370 /* Now identify the consecutive vals belonging to the same row */ 371 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 372 if (j < n) ncols = j-i; 373 else ncols = n-i; 374 /* Now assemble all these values with a single function call */ 375 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 376 i = j; 377 } 378 } 379 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 380 } 381 382 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 383 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 384 385 /* determine if any processor has disassembled, if so we must 386 also disassemble ourselfs, in order that we may reassemble. */ 387 /* 388 if nonzero structure of submatrix B cannot change then we know that 389 no processor disassembled thus we can skip this stuff 390 */ 391 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 392 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 393 if (mat->was_assembled && !other_disassembled) { 394 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 395 } 396 } 397 398 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 399 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 400 } 401 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 402 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 403 404 if (aij->rowvalues) { 405 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 406 aij->rowvalues = 0; 407 } 408 #if defined(PETSC_HAVE_SUPERLUDIST) 409 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 410 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); } 411 #endif 412 413 #if defined(PETSC_HAVE_SPOOLES) 414 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_spooles",&flag);CHKERRQ(ierr); 415 if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); } 416 #endif 417 PetscFunctionReturn(0); 418 } 419 420 #undef __FUNCT__ 421 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 422 int MatZeroEntries_MPIAIJ(Mat A) 423 { 424 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 425 int ierr; 426 427 PetscFunctionBegin; 428 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 429 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 430 PetscFunctionReturn(0); 431 } 432 433 #undef __FUNCT__ 434 #define __FUNCT__ "MatZeroRows_MPIAIJ" 435 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag) 436 { 437 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 438 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 439 int *procs,*nprocs,j,idx,nsends,*work,row; 440 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 441 int *rvalues,tag = A->tag,count,base,slen,n,*source; 442 int *lens,imdex,*lrows,*values,rstart=l->rstart; 443 MPI_Comm comm = A->comm; 444 MPI_Request *send_waits,*recv_waits; 445 MPI_Status recv_status,*send_status; 446 IS istmp; 447 PetscTruth found; 448 449 PetscFunctionBegin; 450 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 451 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 452 453 /* first count number of contributors to each processor */ 454 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 455 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 456 procs = nprocs + size; 457 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 458 for (i=0; i<N; i++) { 459 idx = rows[i]; 460 found = PETSC_FALSE; 461 for (j=0; j<size; j++) { 462 if (idx >= owners[j] && idx < owners[j+1]) { 463 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 464 } 465 } 466 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 467 } 468 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 469 470 /* inform other processors of number of messages and max length*/ 471 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 472 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 473 nrecvs = work[size+rank]; 474 nmax = work[rank]; 475 ierr = PetscFree(work);CHKERRQ(ierr); 476 477 /* post receives: */ 478 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 479 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 480 for (i=0; i<nrecvs; i++) { 481 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 482 } 483 484 /* do sends: 485 1) starts[i] gives the starting index in svalues for stuff going to 486 the ith processor 487 */ 488 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 489 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 490 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 491 starts[0] = 0; 492 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 493 for (i=0; i<N; i++) { 494 svalues[starts[owner[i]]++] = rows[i]; 495 } 496 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 497 498 starts[0] = 0; 499 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 500 count = 0; 501 for (i=0; i<size; i++) { 502 if (procs[i]) { 503 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 504 } 505 } 506 ierr = PetscFree(starts);CHKERRQ(ierr); 507 508 base = owners[rank]; 509 510 /* wait on receives */ 511 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 512 source = lens + nrecvs; 513 count = nrecvs; slen = 0; 514 while (count) { 515 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 516 /* unpack receives into our local space */ 517 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 518 source[imdex] = recv_status.MPI_SOURCE; 519 lens[imdex] = n; 520 slen += n; 521 count--; 522 } 523 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 524 525 /* move the data into the send scatter */ 526 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 527 count = 0; 528 for (i=0; i<nrecvs; i++) { 529 values = rvalues + i*nmax; 530 for (j=0; j<lens[i]; j++) { 531 lrows[count++] = values[j] - base; 532 } 533 } 534 ierr = PetscFree(rvalues);CHKERRQ(ierr); 535 ierr = PetscFree(lens);CHKERRQ(ierr); 536 ierr = PetscFree(owner);CHKERRQ(ierr); 537 ierr = PetscFree(nprocs);CHKERRQ(ierr); 538 539 /* actually zap the local rows */ 540 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 541 PetscLogObjectParent(A,istmp); 542 543 /* 544 Zero the required rows. If the "diagonal block" of the matrix 545 is square and the user wishes to set the diagonal we use seperate 546 code so that MatSetValues() is not called for each diagonal allocating 547 new memory, thus calling lots of mallocs and slowing things down. 548 549 Contributed by: Mathew Knepley 550 */ 551 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 552 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 553 if (diag && (l->A->M == l->A->N)) { 554 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 555 } else if (diag) { 556 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 557 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 558 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 559 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 560 } 561 for (i = 0; i < slen; i++) { 562 row = lrows[i] + rstart; 563 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 564 } 565 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 566 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 567 } else { 568 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 569 } 570 ierr = ISDestroy(istmp);CHKERRQ(ierr); 571 ierr = PetscFree(lrows);CHKERRQ(ierr); 572 573 /* wait on sends */ 574 if (nsends) { 575 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 576 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 577 ierr = PetscFree(send_status);CHKERRQ(ierr); 578 } 579 ierr = PetscFree(send_waits);CHKERRQ(ierr); 580 ierr = PetscFree(svalues);CHKERRQ(ierr); 581 582 PetscFunctionReturn(0); 583 } 584 585 #undef __FUNCT__ 586 #define __FUNCT__ "MatMult_MPIAIJ" 587 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 588 { 589 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 590 int ierr,nt; 591 592 PetscFunctionBegin; 593 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 594 if (nt != A->n) { 595 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 596 } 597 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 598 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 599 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 600 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 601 PetscFunctionReturn(0); 602 } 603 604 #undef __FUNCT__ 605 #define __FUNCT__ "MatMultAdd_MPIAIJ" 606 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 607 { 608 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 609 int ierr; 610 611 PetscFunctionBegin; 612 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 613 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 614 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 615 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 616 PetscFunctionReturn(0); 617 } 618 619 #undef __FUNCT__ 620 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 621 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 622 { 623 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 624 int ierr; 625 626 PetscFunctionBegin; 627 /* do nondiagonal part */ 628 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 629 /* send it on its way */ 630 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 631 /* do local part */ 632 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 633 /* receive remote parts: note this assumes the values are not actually */ 634 /* inserted in yy until the next line, which is true for my implementation*/ 635 /* but is not perhaps always true. */ 636 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 637 PetscFunctionReturn(0); 638 } 639 640 #undef __FUNCT__ 641 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 642 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 643 { 644 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 645 int ierr; 646 647 PetscFunctionBegin; 648 /* do nondiagonal part */ 649 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 650 /* send it on its way */ 651 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 652 /* do local part */ 653 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 654 /* receive remote parts: note this assumes the values are not actually */ 655 /* inserted in yy until the next line, which is true for my implementation*/ 656 /* but is not perhaps always true. */ 657 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 658 PetscFunctionReturn(0); 659 } 660 661 /* 662 This only works correctly for square matrices where the subblock A->A is the 663 diagonal block 664 */ 665 #undef __FUNCT__ 666 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 667 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 668 { 669 int ierr; 670 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 671 672 PetscFunctionBegin; 673 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 674 if (a->rstart != a->cstart || a->rend != a->cend) { 675 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 676 } 677 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 678 PetscFunctionReturn(0); 679 } 680 681 #undef __FUNCT__ 682 #define __FUNCT__ "MatScale_MPIAIJ" 683 int MatScale_MPIAIJ(PetscScalar *aa,Mat A) 684 { 685 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 686 int ierr; 687 688 PetscFunctionBegin; 689 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 690 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatDestroy_MPIAIJ" 696 int MatDestroy_MPIAIJ(Mat mat) 697 { 698 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 699 int ierr; 700 701 PetscFunctionBegin; 702 #if defined(PETSC_USE_LOG) 703 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 704 #endif 705 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 706 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 707 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 708 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 709 #if defined (PETSC_USE_CTABLE) 710 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 711 #else 712 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 713 #endif 714 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 715 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 716 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 717 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 718 ierr = PetscFree(aij);CHKERRQ(ierr); 719 PetscFunctionReturn(0); 720 } 721 722 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer); 723 724 #undef __FUNCT__ 725 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 726 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 727 { 728 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 729 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 730 int ierr,shift = C->indexshift,rank = aij->rank,size = aij->size; 731 PetscTruth isdraw,isascii,flg; 732 PetscViewer sviewer; 733 PetscViewerFormat format; 734 735 PetscFunctionBegin; 736 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 737 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 738 if (isascii) { 739 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 740 if (format == PETSC_VIEWER_ASCII_INFO_LONG) { 741 MatInfo info; 742 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 743 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 744 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 745 if (flg) { 746 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 747 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 748 } else { 749 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 750 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 751 } 752 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 753 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 754 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 755 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 756 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 757 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 758 PetscFunctionReturn(0); 759 } else if (format == PETSC_VIEWER_ASCII_INFO) { 760 PetscFunctionReturn(0); 761 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 762 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX) 763 ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr); 764 #endif 765 PetscFunctionReturn(0); 766 } 767 } else if (isdraw) { 768 PetscDraw draw; 769 PetscTruth isnull; 770 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 771 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 772 } 773 774 if (size == 1) { 775 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 776 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 777 } else { 778 /* assemble the entire matrix onto first processor. */ 779 Mat A; 780 Mat_SeqAIJ *Aloc; 781 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 782 PetscScalar *a; 783 784 if (!rank) { 785 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 786 } else { 787 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 788 } 789 PetscLogObjectParent(mat,A); 790 791 /* copy over the A part */ 792 Aloc = (Mat_SeqAIJ*)aij->A->data; 793 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 794 row = aij->rstart; 795 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 796 for (i=0; i<m; i++) { 797 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 798 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 799 } 800 aj = Aloc->j; 801 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 802 803 /* copy over the B part */ 804 Aloc = (Mat_SeqAIJ*)aij->B->data; 805 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 806 row = aij->rstart; 807 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 808 ct = cols; 809 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 810 for (i=0; i<m; i++) { 811 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 812 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 813 } 814 ierr = PetscFree(ct);CHKERRQ(ierr); 815 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 816 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 817 /* 818 Everyone has to call to draw the matrix since the graphics waits are 819 synchronized across all processors that share the PetscDraw object 820 */ 821 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 822 if (!rank) { 823 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 824 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 825 } 826 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 827 ierr = MatDestroy(A);CHKERRQ(ierr); 828 } 829 PetscFunctionReturn(0); 830 } 831 832 #undef __FUNCT__ 833 #define __FUNCT__ "MatView_MPIAIJ" 834 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 835 { 836 int ierr; 837 PetscTruth isascii,isdraw,issocket,isbinary; 838 839 PetscFunctionBegin; 840 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 841 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 842 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 843 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 844 if (isascii || isdraw || isbinary || issocket) { 845 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 846 } else { 847 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 848 } 849 PetscFunctionReturn(0); 850 } 851 852 853 854 #undef __FUNCT__ 855 #define __FUNCT__ "MatRelax_MPIAIJ" 856 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 857 { 858 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 859 int ierr; 860 Vec bb1; 861 PetscScalar mone=-1.0; 862 863 PetscFunctionBegin; 864 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 865 866 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 867 868 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 869 if (flag & SOR_ZERO_INITIAL_GUESS) { 870 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 871 its--; 872 } 873 874 while (its--) { 875 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 876 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 877 878 /* update rhs: bb1 = bb - B*x */ 879 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 880 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 881 882 /* local sweep */ 883 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 884 CHKERRQ(ierr); 885 } 886 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 887 if (flag & SOR_ZERO_INITIAL_GUESS) { 888 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 889 its--; 890 } 891 while (its--) { 892 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 893 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 894 895 /* update rhs: bb1 = bb - B*x */ 896 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 897 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 898 899 /* local sweep */ 900 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 901 CHKERRQ(ierr); 902 } 903 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 904 if (flag & SOR_ZERO_INITIAL_GUESS) { 905 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 906 its--; 907 } 908 while (its--) { 909 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 910 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 911 912 /* update rhs: bb1 = bb - B*x */ 913 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 914 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 915 916 /* local sweep */ 917 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 918 CHKERRQ(ierr); 919 } 920 } else { 921 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 922 } 923 924 ierr = VecDestroy(bb1);CHKERRQ(ierr); 925 PetscFunctionReturn(0); 926 } 927 928 #undef __FUNCT__ 929 #define __FUNCT__ "MatGetInfo_MPIAIJ" 930 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 931 { 932 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 933 Mat A = mat->A,B = mat->B; 934 int ierr; 935 PetscReal isend[5],irecv[5]; 936 937 PetscFunctionBegin; 938 info->block_size = 1.0; 939 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 940 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 941 isend[3] = info->memory; isend[4] = info->mallocs; 942 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 943 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 944 isend[3] += info->memory; isend[4] += info->mallocs; 945 if (flag == MAT_LOCAL) { 946 info->nz_used = isend[0]; 947 info->nz_allocated = isend[1]; 948 info->nz_unneeded = isend[2]; 949 info->memory = isend[3]; 950 info->mallocs = isend[4]; 951 } else if (flag == MAT_GLOBAL_MAX) { 952 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 953 info->nz_used = irecv[0]; 954 info->nz_allocated = irecv[1]; 955 info->nz_unneeded = irecv[2]; 956 info->memory = irecv[3]; 957 info->mallocs = irecv[4]; 958 } else if (flag == MAT_GLOBAL_SUM) { 959 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 960 info->nz_used = irecv[0]; 961 info->nz_allocated = irecv[1]; 962 info->nz_unneeded = irecv[2]; 963 info->memory = irecv[3]; 964 info->mallocs = irecv[4]; 965 } 966 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 967 info->fill_ratio_needed = 0; 968 info->factor_mallocs = 0; 969 info->rows_global = (double)matin->M; 970 info->columns_global = (double)matin->N; 971 info->rows_local = (double)matin->m; 972 info->columns_local = (double)matin->N; 973 974 PetscFunctionReturn(0); 975 } 976 977 #undef __FUNCT__ 978 #define __FUNCT__ "MatSetOption_MPIAIJ" 979 int MatSetOption_MPIAIJ(Mat A,MatOption op) 980 { 981 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 982 int ierr; 983 984 PetscFunctionBegin; 985 switch (op) { 986 case MAT_NO_NEW_NONZERO_LOCATIONS: 987 case MAT_YES_NEW_NONZERO_LOCATIONS: 988 case MAT_COLUMNS_UNSORTED: 989 case MAT_COLUMNS_SORTED: 990 case MAT_NEW_NONZERO_ALLOCATION_ERR: 991 case MAT_KEEP_ZEROED_ROWS: 992 case MAT_NEW_NONZERO_LOCATION_ERR: 993 case MAT_USE_INODES: 994 case MAT_DO_NOT_USE_INODES: 995 case MAT_IGNORE_ZERO_ENTRIES: 996 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 997 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 998 break; 999 case MAT_ROW_ORIENTED: 1000 a->roworiented = PETSC_TRUE; 1001 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1002 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1003 break; 1004 case MAT_ROWS_SORTED: 1005 case MAT_ROWS_UNSORTED: 1006 case MAT_YES_NEW_DIAGONALS: 1007 case MAT_USE_SINGLE_PRECISION_SOLVES: 1008 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1009 break; 1010 case MAT_COLUMN_ORIENTED: 1011 a->roworiented = PETSC_FALSE; 1012 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1013 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1014 break; 1015 case MAT_IGNORE_OFF_PROC_ENTRIES: 1016 a->donotstash = PETSC_TRUE; 1017 break; 1018 case MAT_NO_NEW_DIAGONALS: 1019 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1020 default: 1021 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1022 } 1023 PetscFunctionReturn(0); 1024 } 1025 1026 #undef __FUNCT__ 1027 #define __FUNCT__ "MatGetRow_MPIAIJ" 1028 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1029 { 1030 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1031 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1032 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1033 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1034 int *cmap,*idx_p; 1035 1036 PetscFunctionBegin; 1037 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1038 mat->getrowactive = PETSC_TRUE; 1039 1040 if (!mat->rowvalues && (idx || v)) { 1041 /* 1042 allocate enough space to hold information from the longest row. 1043 */ 1044 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1045 int max = 1,tmp; 1046 for (i=0; i<matin->m; i++) { 1047 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1048 if (max < tmp) { max = tmp; } 1049 } 1050 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1051 mat->rowindices = (int*)(mat->rowvalues + max); 1052 } 1053 1054 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1055 lrow = row - rstart; 1056 1057 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1058 if (!v) {pvA = 0; pvB = 0;} 1059 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1060 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1061 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1062 nztot = nzA + nzB; 1063 1064 cmap = mat->garray; 1065 if (v || idx) { 1066 if (nztot) { 1067 /* Sort by increasing column numbers, assuming A and B already sorted */ 1068 int imark = -1; 1069 if (v) { 1070 *v = v_p = mat->rowvalues; 1071 for (i=0; i<nzB; i++) { 1072 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1073 else break; 1074 } 1075 imark = i; 1076 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1077 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1078 } 1079 if (idx) { 1080 *idx = idx_p = mat->rowindices; 1081 if (imark > -1) { 1082 for (i=0; i<imark; i++) { 1083 idx_p[i] = cmap[cworkB[i]]; 1084 } 1085 } else { 1086 for (i=0; i<nzB; i++) { 1087 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1088 else break; 1089 } 1090 imark = i; 1091 } 1092 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1093 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1094 } 1095 } else { 1096 if (idx) *idx = 0; 1097 if (v) *v = 0; 1098 } 1099 } 1100 *nz = nztot; 1101 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1102 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1103 PetscFunctionReturn(0); 1104 } 1105 1106 #undef __FUNCT__ 1107 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1108 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1109 { 1110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1111 1112 PetscFunctionBegin; 1113 if (aij->getrowactive == PETSC_FALSE) { 1114 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1115 } 1116 aij->getrowactive = PETSC_FALSE; 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "MatNorm_MPIAIJ" 1122 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1123 { 1124 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1125 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1126 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1127 PetscReal sum = 0.0; 1128 PetscScalar *v; 1129 1130 PetscFunctionBegin; 1131 if (aij->size == 1) { 1132 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1133 } else { 1134 if (type == NORM_FROBENIUS) { 1135 v = amat->a; 1136 for (i=0; i<amat->nz; i++) { 1137 #if defined(PETSC_USE_COMPLEX) 1138 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1139 #else 1140 sum += (*v)*(*v); v++; 1141 #endif 1142 } 1143 v = bmat->a; 1144 for (i=0; i<bmat->nz; i++) { 1145 #if defined(PETSC_USE_COMPLEX) 1146 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1147 #else 1148 sum += (*v)*(*v); v++; 1149 #endif 1150 } 1151 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1152 *norm = sqrt(*norm); 1153 } else if (type == NORM_1) { /* max column norm */ 1154 PetscReal *tmp,*tmp2; 1155 int *jj,*garray = aij->garray; 1156 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1157 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1158 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1159 *norm = 0.0; 1160 v = amat->a; jj = amat->j; 1161 for (j=0; j<amat->nz; j++) { 1162 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1163 } 1164 v = bmat->a; jj = bmat->j; 1165 for (j=0; j<bmat->nz; j++) { 1166 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1167 } 1168 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1169 for (j=0; j<mat->N; j++) { 1170 if (tmp2[j] > *norm) *norm = tmp2[j]; 1171 } 1172 ierr = PetscFree(tmp);CHKERRQ(ierr); 1173 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1174 } else if (type == NORM_INFINITY) { /* max row norm */ 1175 PetscReal ntemp = 0.0; 1176 for (j=0; j<aij->A->m; j++) { 1177 v = amat->a + amat->i[j] + shift; 1178 sum = 0.0; 1179 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1180 sum += PetscAbsScalar(*v); v++; 1181 } 1182 v = bmat->a + bmat->i[j] + shift; 1183 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1184 sum += PetscAbsScalar(*v); v++; 1185 } 1186 if (sum > ntemp) ntemp = sum; 1187 } 1188 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1189 } else { 1190 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1191 } 1192 } 1193 PetscFunctionReturn(0); 1194 } 1195 1196 #undef __FUNCT__ 1197 #define __FUNCT__ "MatTranspose_MPIAIJ" 1198 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1199 { 1200 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1201 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1202 int ierr,shift = Aloc->indexshift; 1203 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1204 Mat B; 1205 PetscScalar *array; 1206 1207 PetscFunctionBegin; 1208 if (!matout && M != N) { 1209 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1210 } 1211 1212 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1213 1214 /* copy over the A part */ 1215 Aloc = (Mat_SeqAIJ*)a->A->data; 1216 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1217 row = a->rstart; 1218 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1219 for (i=0; i<m; i++) { 1220 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1221 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1222 } 1223 aj = Aloc->j; 1224 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1225 1226 /* copy over the B part */ 1227 Aloc = (Mat_SeqAIJ*)a->B->data; 1228 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1229 row = a->rstart; 1230 ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr); 1231 ct = cols; 1232 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1233 for (i=0; i<m; i++) { 1234 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1235 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1236 } 1237 ierr = PetscFree(ct);CHKERRQ(ierr); 1238 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1239 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1240 if (matout) { 1241 *matout = B; 1242 } else { 1243 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1244 } 1245 PetscFunctionReturn(0); 1246 } 1247 1248 #undef __FUNCT__ 1249 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1250 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1251 { 1252 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1253 Mat a = aij->A,b = aij->B; 1254 int ierr,s1,s2,s3; 1255 1256 PetscFunctionBegin; 1257 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1258 if (rr) { 1259 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1260 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1261 /* Overlap communication with computation. */ 1262 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1263 } 1264 if (ll) { 1265 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1266 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1267 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1268 } 1269 /* scale the diagonal block */ 1270 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1271 1272 if (rr) { 1273 /* Do a scatter end and then right scale the off-diagonal block */ 1274 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1275 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1276 } 1277 1278 PetscFunctionReturn(0); 1279 } 1280 1281 1282 #undef __FUNCT__ 1283 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1284 int MatPrintHelp_MPIAIJ(Mat A) 1285 { 1286 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1287 int ierr; 1288 1289 PetscFunctionBegin; 1290 if (!a->rank) { 1291 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1292 } 1293 PetscFunctionReturn(0); 1294 } 1295 1296 #undef __FUNCT__ 1297 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1298 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1299 { 1300 PetscFunctionBegin; 1301 *bs = 1; 1302 PetscFunctionReturn(0); 1303 } 1304 #undef __FUNCT__ 1305 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1306 int MatSetUnfactored_MPIAIJ(Mat A) 1307 { 1308 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1309 int ierr; 1310 1311 PetscFunctionBegin; 1312 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1313 PetscFunctionReturn(0); 1314 } 1315 1316 #undef __FUNCT__ 1317 #define __FUNCT__ "MatEqual_MPIAIJ" 1318 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1319 { 1320 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1321 Mat a,b,c,d; 1322 PetscTruth flg; 1323 int ierr; 1324 1325 PetscFunctionBegin; 1326 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1327 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1328 a = matA->A; b = matA->B; 1329 c = matB->A; d = matB->B; 1330 1331 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1332 if (flg == PETSC_TRUE) { 1333 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1334 } 1335 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1336 PetscFunctionReturn(0); 1337 } 1338 1339 #undef __FUNCT__ 1340 #define __FUNCT__ "MatCopy_MPIAIJ" 1341 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1342 { 1343 int ierr; 1344 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1345 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1346 PetscTruth flg; 1347 1348 PetscFunctionBegin; 1349 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1350 if (str != SAME_NONZERO_PATTERN || !flg) { 1351 /* because of the column compression in the off-processor part of the matrix a->B, 1352 the number of columns in a->B and b->B may be different, hence we cannot call 1353 the MatCopy() directly on the two parts. If need be, we can provide a more 1354 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1355 then copying the submatrices */ 1356 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1357 } else { 1358 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1359 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1360 } 1361 PetscFunctionReturn(0); 1362 } 1363 1364 #undef __FUNCT__ 1365 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1366 int MatSetUpPreallocation_MPIAIJ(Mat A) 1367 { 1368 int ierr; 1369 1370 PetscFunctionBegin; 1371 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1372 PetscFunctionReturn(0); 1373 } 1374 1375 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1376 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1377 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1378 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1379 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1380 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1381 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*); 1382 #endif 1383 1384 #include "petscblaslapack.h" 1385 1386 #undef __FUNCT__ 1387 #define __FUNCT__ "MatAXPY_MPIAIJ" 1388 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1389 { 1390 int ierr,one; 1391 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1392 Mat_SeqAIJ *x,*y; 1393 1394 PetscFunctionBegin; 1395 if (str == SAME_NONZERO_PATTERN) { 1396 x = (Mat_SeqAIJ *)xx->A->data; 1397 y = (Mat_SeqAIJ *)yy->A->data; 1398 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1399 x = (Mat_SeqAIJ *)xx->B->data; 1400 y = (Mat_SeqAIJ *)yy->B->data; 1401 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1402 } else { 1403 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1404 } 1405 PetscFunctionReturn(0); 1406 } 1407 1408 /* -------------------------------------------------------------------*/ 1409 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1410 MatGetRow_MPIAIJ, 1411 MatRestoreRow_MPIAIJ, 1412 MatMult_MPIAIJ, 1413 MatMultAdd_MPIAIJ, 1414 MatMultTranspose_MPIAIJ, 1415 MatMultTransposeAdd_MPIAIJ, 1416 0, 1417 0, 1418 0, 1419 0, 1420 0, 1421 0, 1422 MatRelax_MPIAIJ, 1423 MatTranspose_MPIAIJ, 1424 MatGetInfo_MPIAIJ, 1425 MatEqual_MPIAIJ, 1426 MatGetDiagonal_MPIAIJ, 1427 MatDiagonalScale_MPIAIJ, 1428 MatNorm_MPIAIJ, 1429 MatAssemblyBegin_MPIAIJ, 1430 MatAssemblyEnd_MPIAIJ, 1431 0, 1432 MatSetOption_MPIAIJ, 1433 MatZeroEntries_MPIAIJ, 1434 MatZeroRows_MPIAIJ, 1435 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1436 MatLUFactorSymbolic_MPIAIJ_TFS, 1437 #else 1438 0, 1439 #endif 1440 0, 1441 0, 1442 0, 1443 MatSetUpPreallocation_MPIAIJ, 1444 0, 1445 0, 1446 0, 1447 0, 1448 MatDuplicate_MPIAIJ, 1449 0, 1450 0, 1451 0, 1452 0, 1453 MatAXPY_MPIAIJ, 1454 MatGetSubMatrices_MPIAIJ, 1455 MatIncreaseOverlap_MPIAIJ, 1456 MatGetValues_MPIAIJ, 1457 MatCopy_MPIAIJ, 1458 MatPrintHelp_MPIAIJ, 1459 MatScale_MPIAIJ, 1460 0, 1461 0, 1462 0, 1463 MatGetBlockSize_MPIAIJ, 1464 0, 1465 0, 1466 0, 1467 0, 1468 MatFDColoringCreate_MPIAIJ, 1469 0, 1470 MatSetUnfactored_MPIAIJ, 1471 0, 1472 0, 1473 MatGetSubMatrix_MPIAIJ, 1474 MatDestroy_MPIAIJ, 1475 MatView_MPIAIJ, 1476 MatGetPetscMaps_Petsc, 1477 0, 1478 0, 1479 0, 1480 0, 1481 0, 1482 0, 1483 0, 1484 0, 1485 MatSetColoring_MPIAIJ, 1486 MatSetValuesAdic_MPIAIJ, 1487 MatSetValuesAdifor_MPIAIJ 1488 }; 1489 1490 /* ----------------------------------------------------------------------------------------*/ 1491 1492 EXTERN_C_BEGIN 1493 #undef __FUNCT__ 1494 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1495 int MatStoreValues_MPIAIJ(Mat mat) 1496 { 1497 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1498 int ierr; 1499 1500 PetscFunctionBegin; 1501 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1502 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1503 PetscFunctionReturn(0); 1504 } 1505 EXTERN_C_END 1506 1507 EXTERN_C_BEGIN 1508 #undef __FUNCT__ 1509 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1510 int MatRetrieveValues_MPIAIJ(Mat mat) 1511 { 1512 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1513 int ierr; 1514 1515 PetscFunctionBegin; 1516 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1517 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1518 PetscFunctionReturn(0); 1519 } 1520 EXTERN_C_END 1521 1522 #include "petscpc.h" 1523 EXTERN_C_BEGIN 1524 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1525 EXTERN_C_END 1526 1527 EXTERN_C_BEGIN 1528 #undef __FUNCT__ 1529 #define __FUNCT__ "MatCreate_MPIAIJ" 1530 int MatCreate_MPIAIJ(Mat B) 1531 { 1532 Mat_MPIAIJ *b; 1533 int ierr,i,size; 1534 #if defined(PETSC_HAVE_SUPERLUDIST) 1535 PetscTruth flg; 1536 #endif 1537 1538 1539 PetscFunctionBegin; 1540 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1541 1542 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1543 B->data = (void*)b; 1544 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1545 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1546 B->factor = 0; 1547 B->assembled = PETSC_FALSE; 1548 B->mapping = 0; 1549 1550 B->insertmode = NOT_SET_VALUES; 1551 b->size = size; 1552 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1553 1554 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1555 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1556 1557 /* the information in the maps duplicates the information computed below, eventually 1558 we should remove the duplicate information that is not contained in the maps */ 1559 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1560 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1561 1562 /* build local table of row and column ownerships */ 1563 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1564 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1565 b->cowners = b->rowners + b->size + 2; 1566 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1567 b->rowners[0] = 0; 1568 for (i=2; i<=b->size; i++) { 1569 b->rowners[i] += b->rowners[i-1]; 1570 } 1571 b->rstart = b->rowners[b->rank]; 1572 b->rend = b->rowners[b->rank+1]; 1573 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1574 b->cowners[0] = 0; 1575 for (i=2; i<=b->size; i++) { 1576 b->cowners[i] += b->cowners[i-1]; 1577 } 1578 b->cstart = b->cowners[b->rank]; 1579 b->cend = b->cowners[b->rank+1]; 1580 1581 /* build cache for off array entries formed */ 1582 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1583 b->donotstash = PETSC_FALSE; 1584 b->colmap = 0; 1585 b->garray = 0; 1586 b->roworiented = PETSC_TRUE; 1587 1588 /* stuff used for matrix vector multiply */ 1589 b->lvec = PETSC_NULL; 1590 b->Mvctx = PETSC_NULL; 1591 1592 /* stuff for MatGetRow() */ 1593 b->rowindices = 0; 1594 b->rowvalues = 0; 1595 b->getrowactive = PETSC_FALSE; 1596 /* 1597 #if defined(PETSC_HAVE_SUPERLUDIST) 1598 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flg);CHKERRQ(ierr); 1599 if (flg) { ierr = MatUseSuperLU_DIST_MPIAIJ(B);CHKERRQ(ierr); } 1600 #endif 1601 */ 1602 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1603 "MatStoreValues_MPIAIJ", 1604 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1605 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1606 "MatRetrieveValues_MPIAIJ", 1607 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1608 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1609 "MatGetDiagonalBlock_MPIAIJ", 1610 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1611 1612 PetscFunctionReturn(0); 1613 } 1614 EXTERN_C_END 1615 1616 #undef __FUNCT__ 1617 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1618 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1619 { 1620 Mat mat; 1621 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1622 int ierr; 1623 1624 PetscFunctionBegin; 1625 *newmat = 0; 1626 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1627 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1628 a = (Mat_MPIAIJ*)mat->data; 1629 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1630 mat->factor = matin->factor; 1631 mat->assembled = PETSC_TRUE; 1632 mat->insertmode = NOT_SET_VALUES; 1633 mat->preallocated = PETSC_TRUE; 1634 1635 a->rstart = oldmat->rstart; 1636 a->rend = oldmat->rend; 1637 a->cstart = oldmat->cstart; 1638 a->cend = oldmat->cend; 1639 a->size = oldmat->size; 1640 a->rank = oldmat->rank; 1641 a->donotstash = oldmat->donotstash; 1642 a->roworiented = oldmat->roworiented; 1643 a->rowindices = 0; 1644 a->rowvalues = 0; 1645 a->getrowactive = PETSC_FALSE; 1646 1647 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1648 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1649 if (oldmat->colmap) { 1650 #if defined (PETSC_USE_CTABLE) 1651 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1652 #else 1653 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1654 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1655 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1656 #endif 1657 } else a->colmap = 0; 1658 if (oldmat->garray) { 1659 int len; 1660 len = oldmat->B->n; 1661 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1662 PetscLogObjectMemory(mat,len*sizeof(int)); 1663 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1664 } else a->garray = 0; 1665 1666 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1667 PetscLogObjectParent(mat,a->lvec); 1668 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1669 PetscLogObjectParent(mat,a->Mvctx); 1670 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1671 PetscLogObjectParent(mat,a->A); 1672 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1673 PetscLogObjectParent(mat,a->B); 1674 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1675 *newmat = mat; 1676 PetscFunctionReturn(0); 1677 } 1678 1679 #include "petscsys.h" 1680 1681 EXTERN_C_BEGIN 1682 #undef __FUNCT__ 1683 #define __FUNCT__ "MatLoad_MPIAIJ" 1684 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1685 { 1686 Mat A; 1687 PetscScalar *vals,*svals; 1688 MPI_Comm comm = ((PetscObject)viewer)->comm; 1689 MPI_Status status; 1690 int i,nz,ierr,j,rstart,rend,fd; 1691 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1692 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1693 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1694 1695 PetscFunctionBegin; 1696 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1697 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1698 if (!rank) { 1699 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1700 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1701 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1702 if (header[3] < 0) { 1703 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1704 } 1705 } 1706 1707 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1708 M = header[1]; N = header[2]; 1709 /* determine ownership of all rows */ 1710 m = M/size + ((M % size) > rank); 1711 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1712 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1713 rowners[0] = 0; 1714 for (i=2; i<=size; i++) { 1715 rowners[i] += rowners[i-1]; 1716 } 1717 rstart = rowners[rank]; 1718 rend = rowners[rank+1]; 1719 1720 /* distribute row lengths to all processors */ 1721 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1722 offlens = ourlens + (rend-rstart); 1723 if (!rank) { 1724 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1725 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1726 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1727 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1728 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1729 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1730 } else { 1731 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1732 } 1733 1734 if (!rank) { 1735 /* calculate the number of nonzeros on each processor */ 1736 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1737 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1738 for (i=0; i<size; i++) { 1739 for (j=rowners[i]; j< rowners[i+1]; j++) { 1740 procsnz[i] += rowlengths[j]; 1741 } 1742 } 1743 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1744 1745 /* determine max buffer needed and allocate it */ 1746 maxnz = 0; 1747 for (i=0; i<size; i++) { 1748 maxnz = PetscMax(maxnz,procsnz[i]); 1749 } 1750 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1751 1752 /* read in my part of the matrix column indices */ 1753 nz = procsnz[0]; 1754 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1755 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1756 1757 /* read in every one elses and ship off */ 1758 for (i=1; i<size; i++) { 1759 nz = procsnz[i]; 1760 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1761 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1762 } 1763 ierr = PetscFree(cols);CHKERRQ(ierr); 1764 } else { 1765 /* determine buffer space needed for message */ 1766 nz = 0; 1767 for (i=0; i<m; i++) { 1768 nz += ourlens[i]; 1769 } 1770 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1771 1772 /* receive message of column indices*/ 1773 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1774 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1775 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1776 } 1777 1778 /* determine column ownership if matrix is not square */ 1779 if (N != M) { 1780 n = N/size + ((N % size) > rank); 1781 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1782 cstart = cend - n; 1783 } else { 1784 cstart = rstart; 1785 cend = rend; 1786 n = cend - cstart; 1787 } 1788 1789 /* loop over local rows, determining number of off diagonal entries */ 1790 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1791 jj = 0; 1792 for (i=0; i<m; i++) { 1793 for (j=0; j<ourlens[i]; j++) { 1794 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1795 jj++; 1796 } 1797 } 1798 1799 /* create our matrix */ 1800 for (i=0; i<m; i++) { 1801 ourlens[i] -= offlens[i]; 1802 } 1803 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1804 A = *newmat; 1805 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1806 for (i=0; i<m; i++) { 1807 ourlens[i] += offlens[i]; 1808 } 1809 1810 if (!rank) { 1811 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1812 1813 /* read in my part of the matrix numerical values */ 1814 nz = procsnz[0]; 1815 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1816 1817 /* insert into matrix */ 1818 jj = rstart; 1819 smycols = mycols; 1820 svals = vals; 1821 for (i=0; i<m; i++) { 1822 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1823 smycols += ourlens[i]; 1824 svals += ourlens[i]; 1825 jj++; 1826 } 1827 1828 /* read in other processors and ship out */ 1829 for (i=1; i<size; i++) { 1830 nz = procsnz[i]; 1831 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1832 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1833 } 1834 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1835 } else { 1836 /* receive numeric values */ 1837 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1838 1839 /* receive message of values*/ 1840 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1841 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1842 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1843 1844 /* insert into matrix */ 1845 jj = rstart; 1846 smycols = mycols; 1847 svals = vals; 1848 for (i=0; i<m; i++) { 1849 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1850 smycols += ourlens[i]; 1851 svals += ourlens[i]; 1852 jj++; 1853 } 1854 } 1855 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1856 ierr = PetscFree(vals);CHKERRQ(ierr); 1857 ierr = PetscFree(mycols);CHKERRQ(ierr); 1858 ierr = PetscFree(rowners);CHKERRQ(ierr); 1859 1860 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1861 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1862 PetscFunctionReturn(0); 1863 } 1864 EXTERN_C_END 1865 1866 #undef __FUNCT__ 1867 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 1868 /* 1869 Not great since it makes two copies of the submatrix, first an SeqAIJ 1870 in local and then by concatenating the local matrices the end result. 1871 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1872 */ 1873 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 1874 { 1875 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1876 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend; 1877 Mat *local,M,Mreuse; 1878 PetscScalar *vwork,*aa; 1879 MPI_Comm comm = mat->comm; 1880 Mat_SeqAIJ *aij; 1881 1882 1883 PetscFunctionBegin; 1884 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1885 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1886 1887 if (call == MAT_REUSE_MATRIX) { 1888 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1889 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 1890 local = &Mreuse; 1891 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1892 } else { 1893 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1894 Mreuse = *local; 1895 ierr = PetscFree(local);CHKERRQ(ierr); 1896 } 1897 1898 /* 1899 m - number of local rows 1900 n - number of columns (same on all processors) 1901 rstart - first row in new global matrix generated 1902 */ 1903 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1904 if (call == MAT_INITIAL_MATRIX) { 1905 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1906 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1907 ii = aij->i; 1908 jj = aij->j; 1909 1910 /* 1911 Determine the number of non-zeros in the diagonal and off-diagonal 1912 portions of the matrix in order to do correct preallocation 1913 */ 1914 1915 /* first get start and end of "diagonal" columns */ 1916 if (csize == PETSC_DECIDE) { 1917 nlocal = n/size + ((n % size) > rank); 1918 } else { 1919 nlocal = csize; 1920 } 1921 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1922 rstart = rend - nlocal; 1923 if (rank == size - 1 && rend != n) { 1924 SETERRQ(1,"Local column sizes do not add up to total number of columns"); 1925 } 1926 1927 /* next, compute all the lengths */ 1928 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 1929 olens = dlens + m; 1930 for (i=0; i<m; i++) { 1931 jend = ii[i+1] - ii[i]; 1932 olen = 0; 1933 dlen = 0; 1934 for (j=0; j<jend; j++) { 1935 if (*jj < rstart || *jj >= rend) olen++; 1936 else dlen++; 1937 jj++; 1938 } 1939 olens[i] = olen; 1940 dlens[i] = dlen; 1941 } 1942 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 1943 ierr = PetscFree(dlens);CHKERRQ(ierr); 1944 } else { 1945 int ml,nl; 1946 1947 M = *newmat; 1948 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1949 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1950 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1951 /* 1952 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1953 rather than the slower MatSetValues(). 1954 */ 1955 M->was_assembled = PETSC_TRUE; 1956 M->assembled = PETSC_FALSE; 1957 } 1958 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1959 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1960 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1961 ii = aij->i; 1962 jj = aij->j; 1963 aa = aij->a; 1964 for (i=0; i<m; i++) { 1965 row = rstart + i; 1966 nz = ii[i+1] - ii[i]; 1967 cwork = jj; jj += nz; 1968 vwork = aa; aa += nz; 1969 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1970 } 1971 1972 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1973 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1974 *newmat = M; 1975 1976 /* save submatrix used in processor for next request */ 1977 if (call == MAT_INITIAL_MATRIX) { 1978 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 1979 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 1980 } 1981 1982 PetscFunctionReturn(0); 1983 } 1984 1985 #undef __FUNCT__ 1986 #define __FUNCT__ "MatMPIAIJSetPreallocation" 1987 /*@C 1988 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 1989 (the default parallel PETSc format). For good matrix assembly performance 1990 the user should preallocate the matrix storage by setting the parameters 1991 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1992 performance can be increased by more than a factor of 50. 1993 1994 Collective on MPI_Comm 1995 1996 Input Parameters: 1997 + A - the matrix 1998 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1999 (same value is used for all local rows) 2000 . d_nnz - array containing the number of nonzeros in the various rows of the 2001 DIAGONAL portion of the local submatrix (possibly different for each row) 2002 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2003 The size of this array is equal to the number of local rows, i.e 'm'. 2004 You must leave room for the diagonal entry even if it is zero. 2005 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2006 submatrix (same value is used for all local rows). 2007 - o_nnz - array containing the number of nonzeros in the various rows of the 2008 OFF-DIAGONAL portion of the local submatrix (possibly different for 2009 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2010 structure. The size of this array is equal to the number 2011 of local rows, i.e 'm'. 2012 2013 The AIJ format (also called the Yale sparse matrix format or 2014 compressed row storage), is fully compatible with standard Fortran 77 2015 storage. That is, the stored row and column indices can begin at 2016 either one (as in Fortran) or zero. See the users manual for details. 2017 2018 The user MUST specify either the local or global matrix dimensions 2019 (possibly both). 2020 2021 The parallel matrix is partitioned such that the first m0 rows belong to 2022 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2023 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2024 2025 The DIAGONAL portion of the local submatrix of a processor can be defined 2026 as the submatrix which is obtained by extraction the part corresponding 2027 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2028 first row that belongs to the processor, and r2 is the last row belonging 2029 to the this processor. This is a square mxm matrix. The remaining portion 2030 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2031 2032 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2033 2034 By default, this format uses inodes (identical nodes) when possible. 2035 We search for consecutive rows with the same nonzero structure, thereby 2036 reusing matrix information to achieve increased efficiency. 2037 2038 Options Database Keys: 2039 + -mat_aij_no_inode - Do not use inodes 2040 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2041 - -mat_aij_oneindex - Internally use indexing starting at 1 2042 rather than 0. Note that when calling MatSetValues(), 2043 the user still MUST index entries starting at 0! 2044 2045 Example usage: 2046 2047 Consider the following 8x8 matrix with 34 non-zero values, that is 2048 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2049 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2050 as follows: 2051 2052 .vb 2053 1 2 0 | 0 3 0 | 0 4 2054 Proc0 0 5 6 | 7 0 0 | 8 0 2055 9 0 10 | 11 0 0 | 12 0 2056 ------------------------------------- 2057 13 0 14 | 15 16 17 | 0 0 2058 Proc1 0 18 0 | 19 20 21 | 0 0 2059 0 0 0 | 22 23 0 | 24 0 2060 ------------------------------------- 2061 Proc2 25 26 27 | 0 0 28 | 29 0 2062 30 0 0 | 31 32 33 | 0 34 2063 .ve 2064 2065 This can be represented as a collection of submatrices as: 2066 2067 .vb 2068 A B C 2069 D E F 2070 G H I 2071 .ve 2072 2073 Where the submatrices A,B,C are owned by proc0, D,E,F are 2074 owned by proc1, G,H,I are owned by proc2. 2075 2076 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2077 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2078 The 'M','N' parameters are 8,8, and have the same values on all procs. 2079 2080 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2081 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2082 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2083 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2084 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2085 matrix, ans [DF] as another SeqAIJ matrix. 2086 2087 When d_nz, o_nz parameters are specified, d_nz storage elements are 2088 allocated for every row of the local diagonal submatrix, and o_nz 2089 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2090 One way to choose d_nz and o_nz is to use the max nonzerors per local 2091 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2092 In this case, the values of d_nz,o_nz are: 2093 .vb 2094 proc0 : dnz = 2, o_nz = 2 2095 proc1 : dnz = 3, o_nz = 2 2096 proc2 : dnz = 1, o_nz = 4 2097 .ve 2098 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2099 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2100 for proc3. i.e we are using 12+15+10=37 storage locations to store 2101 34 values. 2102 2103 When d_nnz, o_nnz parameters are specified, the storage is specified 2104 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2105 In the above case the values for d_nnz,o_nnz are: 2106 .vb 2107 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2108 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2109 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2110 .ve 2111 Here the space allocated is sum of all the above values i.e 34, and 2112 hence pre-allocation is perfect. 2113 2114 Level: intermediate 2115 2116 .keywords: matrix, aij, compressed row, sparse, parallel 2117 2118 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2119 @*/ 2120 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2121 { 2122 Mat_MPIAIJ *b; 2123 int ierr,i; 2124 PetscTruth flg2; 2125 2126 PetscFunctionBegin; 2127 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr); 2128 if (!flg2) PetscFunctionReturn(0); 2129 B->preallocated = PETSC_TRUE; 2130 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2131 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2132 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2133 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2134 if (d_nnz) { 2135 for (i=0; i<B->m; i++) { 2136 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 2137 } 2138 } 2139 if (o_nnz) { 2140 for (i=0; i<B->m; i++) { 2141 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 2142 } 2143 } 2144 b = (Mat_MPIAIJ*)B->data; 2145 2146 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2147 PetscLogObjectParent(B,b->A); 2148 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2149 PetscLogObjectParent(B,b->B); 2150 2151 PetscFunctionReturn(0); 2152 } 2153 2154 #undef __FUNCT__ 2155 #define __FUNCT__ "MatCreateMPIAIJ" 2156 /*@C 2157 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2158 (the default parallel PETSc format). For good matrix assembly performance 2159 the user should preallocate the matrix storage by setting the parameters 2160 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2161 performance can be increased by more than a factor of 50. 2162 2163 Collective on MPI_Comm 2164 2165 Input Parameters: 2166 + comm - MPI communicator 2167 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2168 This value should be the same as the local size used in creating the 2169 y vector for the matrix-vector product y = Ax. 2170 . n - This value should be the same as the local size used in creating the 2171 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2172 calculated if N is given) For square matrices n is almost always m. 2173 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2174 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2175 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2176 (same value is used for all local rows) 2177 . d_nnz - array containing the number of nonzeros in the various rows of the 2178 DIAGONAL portion of the local submatrix (possibly different for each row) 2179 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2180 The size of this array is equal to the number of local rows, i.e 'm'. 2181 You must leave room for the diagonal entry even if it is zero. 2182 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2183 submatrix (same value is used for all local rows). 2184 - o_nnz - array containing the number of nonzeros in the various rows of the 2185 OFF-DIAGONAL portion of the local submatrix (possibly different for 2186 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2187 structure. The size of this array is equal to the number 2188 of local rows, i.e 'm'. 2189 2190 Output Parameter: 2191 . A - the matrix 2192 2193 Notes: 2194 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2195 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2196 storage requirements for this matrix. 2197 2198 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2199 processor than it must be used on all processors that share the object for 2200 that argument. 2201 2202 The AIJ format (also called the Yale sparse matrix format or 2203 compressed row storage), is fully compatible with standard Fortran 77 2204 storage. That is, the stored row and column indices can begin at 2205 either one (as in Fortran) or zero. See the users manual for details. 2206 2207 The user MUST specify either the local or global matrix dimensions 2208 (possibly both). 2209 2210 The parallel matrix is partitioned such that the first m0 rows belong to 2211 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2212 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2213 2214 The DIAGONAL portion of the local submatrix of a processor can be defined 2215 as the submatrix which is obtained by extraction the part corresponding 2216 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2217 first row that belongs to the processor, and r2 is the last row belonging 2218 to the this processor. This is a square mxm matrix. The remaining portion 2219 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2220 2221 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2222 2223 By default, this format uses inodes (identical nodes) when possible. 2224 We search for consecutive rows with the same nonzero structure, thereby 2225 reusing matrix information to achieve increased efficiency. 2226 2227 Options Database Keys: 2228 + -mat_aij_no_inode - Do not use inodes 2229 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2230 - -mat_aij_oneindex - Internally use indexing starting at 1 2231 rather than 0. Note that when calling MatSetValues(), 2232 the user still MUST index entries starting at 0! 2233 2234 2235 Example usage: 2236 2237 Consider the following 8x8 matrix with 34 non-zero values, that is 2238 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2239 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2240 as follows: 2241 2242 .vb 2243 1 2 0 | 0 3 0 | 0 4 2244 Proc0 0 5 6 | 7 0 0 | 8 0 2245 9 0 10 | 11 0 0 | 12 0 2246 ------------------------------------- 2247 13 0 14 | 15 16 17 | 0 0 2248 Proc1 0 18 0 | 19 20 21 | 0 0 2249 0 0 0 | 22 23 0 | 24 0 2250 ------------------------------------- 2251 Proc2 25 26 27 | 0 0 28 | 29 0 2252 30 0 0 | 31 32 33 | 0 34 2253 .ve 2254 2255 This can be represented as a collection of submatrices as: 2256 2257 .vb 2258 A B C 2259 D E F 2260 G H I 2261 .ve 2262 2263 Where the submatrices A,B,C are owned by proc0, D,E,F are 2264 owned by proc1, G,H,I are owned by proc2. 2265 2266 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2267 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2268 The 'M','N' parameters are 8,8, and have the same values on all procs. 2269 2270 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2271 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2272 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2273 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2274 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2275 matrix, ans [DF] as another SeqAIJ matrix. 2276 2277 When d_nz, o_nz parameters are specified, d_nz storage elements are 2278 allocated for every row of the local diagonal submatrix, and o_nz 2279 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2280 One way to choose d_nz and o_nz is to use the max nonzerors per local 2281 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2282 In this case, the values of d_nz,o_nz are: 2283 .vb 2284 proc0 : dnz = 2, o_nz = 2 2285 proc1 : dnz = 3, o_nz = 2 2286 proc2 : dnz = 1, o_nz = 4 2287 .ve 2288 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2289 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2290 for proc3. i.e we are using 12+15+10=37 storage locations to store 2291 34 values. 2292 2293 When d_nnz, o_nnz parameters are specified, the storage is specified 2294 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2295 In the above case the values for d_nnz,o_nnz are: 2296 .vb 2297 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2298 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2299 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2300 .ve 2301 Here the space allocated is sum of all the above values i.e 34, and 2302 hence pre-allocation is perfect. 2303 2304 Level: intermediate 2305 2306 .keywords: matrix, aij, compressed row, sparse, parallel 2307 2308 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2309 @*/ 2310 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2311 { 2312 int ierr,size; 2313 2314 PetscFunctionBegin; 2315 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2316 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2317 if (size > 1) { 2318 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2319 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2320 } else { 2321 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2322 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2323 } 2324 PetscFunctionReturn(0); 2325 } 2326 2327 #undef __FUNCT__ 2328 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2329 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2330 { 2331 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2332 PetscFunctionBegin; 2333 *Ad = a->A; 2334 *Ao = a->B; 2335 *colmap = a->garray; 2336 PetscFunctionReturn(0); 2337 } 2338 2339 #undef __FUNCT__ 2340 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2341 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2342 { 2343 int ierr; 2344 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2345 2346 PetscFunctionBegin; 2347 if (coloring->ctype == IS_COLORING_LOCAL) { 2348 int *allcolors,*colors,i; 2349 ISColoring ocoloring; 2350 2351 /* set coloring for diagonal portion */ 2352 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2353 2354 /* set coloring for off-diagonal portion */ 2355 ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2356 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2357 for (i=0; i<a->B->n; i++) { 2358 colors[i] = allcolors[a->garray[i]]; 2359 } 2360 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2361 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2362 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2363 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2364 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2365 int *colors,i,*larray; 2366 ISColoring ocoloring; 2367 2368 /* set coloring for diagonal portion */ 2369 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2370 for (i=0; i<a->A->n; i++) { 2371 larray[i] = i + a->cstart; 2372 } 2373 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2374 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2375 for (i=0; i<a->A->n; i++) { 2376 colors[i] = coloring->colors[larray[i]]; 2377 } 2378 ierr = PetscFree(larray);CHKERRQ(ierr); 2379 ierr = ISColoringCreate(MPI_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2380 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2381 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2382 2383 /* set coloring for off-diagonal portion */ 2384 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2385 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2386 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2387 for (i=0; i<a->B->n; i++) { 2388 colors[i] = coloring->colors[larray[i]]; 2389 } 2390 ierr = PetscFree(larray);CHKERRQ(ierr); 2391 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2392 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2393 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2394 } else { 2395 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2396 } 2397 2398 PetscFunctionReturn(0); 2399 } 2400 2401 #undef __FUNCT__ 2402 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2403 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2404 { 2405 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2406 int ierr; 2407 2408 PetscFunctionBegin; 2409 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2410 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2411 PetscFunctionReturn(0); 2412 } 2413 2414 #undef __FUNCT__ 2415 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2416 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2417 { 2418 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2419 int ierr; 2420 2421 PetscFunctionBegin; 2422 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2423 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2424 PetscFunctionReturn(0); 2425 } 2426