1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 EXTERN int MatUseSuperLU_DIST_MPIAIJ(Mat); 14 EXTERN int MatUseSpooles_MPIAIJ(Mat); 15 16 /* 17 Local utility routine that creates a mapping from the global column 18 number to the local number in the off-diagonal part of the local 19 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 20 a slightly higher hash table cost; without it it is not scalable (each processor 21 has an order N integer array but is fast to acess. 22 */ 23 #undef __FUNCT__ 24 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 25 int CreateColmap_MPIAIJ_Private(Mat mat) 26 { 27 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 28 int n = aij->B->n,i,ierr; 29 30 PetscFunctionBegin; 31 #if defined (PETSC_USE_CTABLE) 32 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 33 for (i=0; i<n; i++){ 34 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 35 } 36 #else 37 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 38 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 39 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 40 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 41 #endif 42 PetscFunctionReturn(0); 43 } 44 45 #define CHUNKSIZE 15 46 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 47 { \ 48 \ 49 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 50 rmax = aimax[row]; nrow = ailen[row]; \ 51 col1 = col - shift; \ 52 \ 53 low = 0; high = nrow; \ 54 while (high-low > 5) { \ 55 t = (low+high)/2; \ 56 if (rp[t] > col) high = t; \ 57 else low = t; \ 58 } \ 59 for (_i=low; _i<high; _i++) { \ 60 if (rp[_i] > col1) break; \ 61 if (rp[_i] == col1) { \ 62 if (addv == ADD_VALUES) ap[_i] += value; \ 63 else ap[_i] = value; \ 64 goto a_noinsert; \ 65 } \ 66 } \ 67 if (nonew == 1) goto a_noinsert; \ 68 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 69 if (nrow >= rmax) { \ 70 /* there is no extra room in row, therefore enlarge */ \ 71 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 72 PetscScalar *new_a; \ 73 \ 74 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 75 \ 76 /* malloc new storage space */ \ 77 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 78 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 79 new_j = (int*)(new_a + new_nz); \ 80 new_i = new_j + new_nz; \ 81 \ 82 /* copy over old data into new slots */ \ 83 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 84 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 85 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 86 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 87 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 88 len*sizeof(int));CHKERRQ(ierr); \ 89 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 90 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 91 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 92 /* free up old matrix storage */ \ 93 \ 94 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 95 if (!a->singlemalloc) { \ 96 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 97 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 98 } \ 99 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 100 a->singlemalloc = PETSC_TRUE; \ 101 \ 102 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 103 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 104 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 105 a->maxnz += CHUNKSIZE; \ 106 a->reallocs++; \ 107 } \ 108 N = nrow++ - 1; a->nz++; \ 109 /* shift up all the later entries in this row */ \ 110 for (ii=N; ii>=_i; ii--) { \ 111 rp[ii+1] = rp[ii]; \ 112 ap[ii+1] = ap[ii]; \ 113 } \ 114 rp[_i] = col1; \ 115 ap[_i] = value; \ 116 a_noinsert: ; \ 117 ailen[row] = nrow; \ 118 } 119 120 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 121 { \ 122 \ 123 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 124 rmax = bimax[row]; nrow = bilen[row]; \ 125 col1 = col - shift; \ 126 \ 127 low = 0; high = nrow; \ 128 while (high-low > 5) { \ 129 t = (low+high)/2; \ 130 if (rp[t] > col) high = t; \ 131 else low = t; \ 132 } \ 133 for (_i=low; _i<high; _i++) { \ 134 if (rp[_i] > col1) break; \ 135 if (rp[_i] == col1) { \ 136 if (addv == ADD_VALUES) ap[_i] += value; \ 137 else ap[_i] = value; \ 138 goto b_noinsert; \ 139 } \ 140 } \ 141 if (nonew == 1) goto b_noinsert; \ 142 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 143 if (nrow >= rmax) { \ 144 /* there is no extra room in row, therefore enlarge */ \ 145 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 146 PetscScalar *new_a; \ 147 \ 148 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 149 \ 150 /* malloc new storage space */ \ 151 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 152 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 153 new_j = (int*)(new_a + new_nz); \ 154 new_i = new_j + new_nz; \ 155 \ 156 /* copy over old data into new slots */ \ 157 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 158 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 159 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 160 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 161 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 162 len*sizeof(int));CHKERRQ(ierr); \ 163 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 164 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 165 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 166 /* free up old matrix storage */ \ 167 \ 168 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 169 if (!b->singlemalloc) { \ 170 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 171 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 172 } \ 173 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 174 b->singlemalloc = PETSC_TRUE; \ 175 \ 176 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 177 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 178 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 179 b->maxnz += CHUNKSIZE; \ 180 b->reallocs++; \ 181 } \ 182 N = nrow++ - 1; b->nz++; \ 183 /* shift up all the later entries in this row */ \ 184 for (ii=N; ii>=_i; ii--) { \ 185 rp[ii+1] = rp[ii]; \ 186 ap[ii+1] = ap[ii]; \ 187 } \ 188 rp[_i] = col1; \ 189 ap[_i] = value; \ 190 b_noinsert: ; \ 191 bilen[row] = nrow; \ 192 } 193 194 #undef __FUNCT__ 195 #define __FUNCT__ "MatSetValues_MPIAIJ" 196 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 197 { 198 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 199 PetscScalar value; 200 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 201 int cstart = aij->cstart,cend = aij->cend,row,col; 202 PetscTruth roworiented = aij->roworiented; 203 204 /* Some Variables required in the macro */ 205 Mat A = aij->A; 206 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 207 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 208 PetscScalar *aa = a->a; 209 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 210 Mat B = aij->B; 211 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 212 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 213 PetscScalar *ba = b->a; 214 215 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 216 int nonew = a->nonew,shift = a->indexshift; 217 PetscScalar *ap; 218 219 PetscFunctionBegin; 220 for (i=0; i<m; i++) { 221 if (im[i] < 0) continue; 222 #if defined(PETSC_USE_BOPT_g) 223 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 224 #endif 225 if (im[i] >= rstart && im[i] < rend) { 226 row = im[i] - rstart; 227 for (j=0; j<n; j++) { 228 if (in[j] >= cstart && in[j] < cend){ 229 col = in[j] - cstart; 230 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 231 if (ignorezeroentries && value == 0.0) continue; 232 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 233 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 234 } else if (in[j] < 0) continue; 235 #if defined(PETSC_USE_BOPT_g) 236 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 237 #endif 238 else { 239 if (mat->was_assembled) { 240 if (!aij->colmap) { 241 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 242 } 243 #if defined (PETSC_USE_CTABLE) 244 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 245 col--; 246 #else 247 col = aij->colmap[in[j]] - 1; 248 #endif 249 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 250 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 251 col = in[j]; 252 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 253 B = aij->B; 254 b = (Mat_SeqAIJ*)B->data; 255 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 256 ba = b->a; 257 } 258 } else col = in[j]; 259 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 260 if (ignorezeroentries && value == 0.0) continue; 261 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 262 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 263 } 264 } 265 } else { 266 if (!aij->donotstash) { 267 if (roworiented) { 268 if (ignorezeroentries && v[i*n] == 0.0) continue; 269 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 270 } else { 271 if (ignorezeroentries && v[i] == 0.0) continue; 272 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 273 } 274 } 275 } 276 } 277 PetscFunctionReturn(0); 278 } 279 280 #undef __FUNCT__ 281 #define __FUNCT__ "MatGetValues_MPIAIJ" 282 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 283 { 284 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 285 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 286 int cstart = aij->cstart,cend = aij->cend,row,col; 287 288 PetscFunctionBegin; 289 for (i=0; i<m; i++) { 290 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 291 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 292 if (idxm[i] >= rstart && idxm[i] < rend) { 293 row = idxm[i] - rstart; 294 for (j=0; j<n; j++) { 295 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 296 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 297 if (idxn[j] >= cstart && idxn[j] < cend){ 298 col = idxn[j] - cstart; 299 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 300 } else { 301 if (!aij->colmap) { 302 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 303 } 304 #if defined (PETSC_USE_CTABLE) 305 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 306 col --; 307 #else 308 col = aij->colmap[idxn[j]] - 1; 309 #endif 310 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 311 else { 312 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 313 } 314 } 315 } 316 } else { 317 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 318 } 319 } 320 PetscFunctionReturn(0); 321 } 322 323 #undef __FUNCT__ 324 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 325 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 326 { 327 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 328 int ierr,nstash,reallocs; 329 InsertMode addv; 330 331 PetscFunctionBegin; 332 if (aij->donotstash) { 333 PetscFunctionReturn(0); 334 } 335 336 /* make sure all processors are either in INSERTMODE or ADDMODE */ 337 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 338 if (addv == (ADD_VALUES|INSERT_VALUES)) { 339 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 340 } 341 mat->insertmode = addv; /* in case this processor had no cache */ 342 343 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 344 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 345 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 346 PetscFunctionReturn(0); 347 } 348 349 350 #undef __FUNCT__ 351 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 352 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 353 { 354 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 355 int i,j,rstart,ncols,n,ierr,flg; 356 int *row,*col,other_disassembled; 357 PetscScalar *val; 358 InsertMode addv = mat->insertmode; 359 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES) 360 PetscTruth flag; 361 #endif 362 363 PetscFunctionBegin; 364 if (!aij->donotstash) { 365 while (1) { 366 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 367 if (!flg) break; 368 369 for (i=0; i<n;) { 370 /* Now identify the consecutive vals belonging to the same row */ 371 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 372 if (j < n) ncols = j-i; 373 else ncols = n-i; 374 /* Now assemble all these values with a single function call */ 375 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 376 i = j; 377 } 378 } 379 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 380 } 381 382 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 383 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 384 385 /* determine if any processor has disassembled, if so we must 386 also disassemble ourselfs, in order that we may reassemble. */ 387 /* 388 if nonzero structure of submatrix B cannot change then we know that 389 no processor disassembled thus we can skip this stuff 390 */ 391 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 392 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 393 if (mat->was_assembled && !other_disassembled) { 394 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 395 } 396 } 397 398 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 399 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 400 } 401 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 402 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 403 404 if (aij->rowvalues) { 405 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 406 aij->rowvalues = 0; 407 } 408 #if defined(PETSC_HAVE_SUPERLUDIST) 409 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 410 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); } 411 #endif 412 413 #if defined(PETSC_HAVE_SPOOLES) 414 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_spooles",&flag);CHKERRQ(ierr); 415 if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); } 416 #endif 417 PetscFunctionReturn(0); 418 } 419 420 #undef __FUNCT__ 421 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 422 int MatZeroEntries_MPIAIJ(Mat A) 423 { 424 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 425 int ierr; 426 427 PetscFunctionBegin; 428 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 429 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 430 PetscFunctionReturn(0); 431 } 432 433 #undef __FUNCT__ 434 #define __FUNCT__ "MatZeroRows_MPIAIJ" 435 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag) 436 { 437 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 438 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 439 int *procs,*nprocs,j,idx,nsends,*work,row; 440 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 441 int *rvalues,tag = A->tag,count,base,slen,n,*source; 442 int *lens,imdex,*lrows,*values,rstart=l->rstart; 443 MPI_Comm comm = A->comm; 444 MPI_Request *send_waits,*recv_waits; 445 MPI_Status recv_status,*send_status; 446 IS istmp; 447 PetscTruth found; 448 449 PetscFunctionBegin; 450 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 451 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 452 453 /* first count number of contributors to each processor */ 454 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 455 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 456 procs = nprocs + size; 457 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 458 for (i=0; i<N; i++) { 459 idx = rows[i]; 460 found = PETSC_FALSE; 461 for (j=0; j<size; j++) { 462 if (idx >= owners[j] && idx < owners[j+1]) { 463 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 464 } 465 } 466 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 467 } 468 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 469 470 /* inform other processors of number of messages and max length*/ 471 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 472 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 473 nrecvs = work[size+rank]; 474 nmax = work[rank]; 475 ierr = PetscFree(work);CHKERRQ(ierr); 476 477 /* post receives: */ 478 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 479 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 480 for (i=0; i<nrecvs; i++) { 481 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 482 } 483 484 /* do sends: 485 1) starts[i] gives the starting index in svalues for stuff going to 486 the ith processor 487 */ 488 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 489 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 490 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 491 starts[0] = 0; 492 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 493 for (i=0; i<N; i++) { 494 svalues[starts[owner[i]]++] = rows[i]; 495 } 496 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 497 498 starts[0] = 0; 499 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 500 count = 0; 501 for (i=0; i<size; i++) { 502 if (procs[i]) { 503 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 504 } 505 } 506 ierr = PetscFree(starts);CHKERRQ(ierr); 507 508 base = owners[rank]; 509 510 /* wait on receives */ 511 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 512 source = lens + nrecvs; 513 count = nrecvs; slen = 0; 514 while (count) { 515 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 516 /* unpack receives into our local space */ 517 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 518 source[imdex] = recv_status.MPI_SOURCE; 519 lens[imdex] = n; 520 slen += n; 521 count--; 522 } 523 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 524 525 /* move the data into the send scatter */ 526 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 527 count = 0; 528 for (i=0; i<nrecvs; i++) { 529 values = rvalues + i*nmax; 530 for (j=0; j<lens[i]; j++) { 531 lrows[count++] = values[j] - base; 532 } 533 } 534 ierr = PetscFree(rvalues);CHKERRQ(ierr); 535 ierr = PetscFree(lens);CHKERRQ(ierr); 536 ierr = PetscFree(owner);CHKERRQ(ierr); 537 ierr = PetscFree(nprocs);CHKERRQ(ierr); 538 539 /* actually zap the local rows */ 540 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 541 PetscLogObjectParent(A,istmp); 542 543 /* 544 Zero the required rows. If the "diagonal block" of the matrix 545 is square and the user wishes to set the diagonal we use seperate 546 code so that MatSetValues() is not called for each diagonal allocating 547 new memory, thus calling lots of mallocs and slowing things down. 548 549 Contributed by: Mathew Knepley 550 */ 551 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 552 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 553 if (diag && (l->A->M == l->A->N)) { 554 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 555 } else if (diag) { 556 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 557 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 558 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 559 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 560 } 561 for (i = 0; i < slen; i++) { 562 row = lrows[i] + rstart; 563 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 564 } 565 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 566 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 567 } else { 568 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 569 } 570 ierr = ISDestroy(istmp);CHKERRQ(ierr); 571 ierr = PetscFree(lrows);CHKERRQ(ierr); 572 573 /* wait on sends */ 574 if (nsends) { 575 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 576 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 577 ierr = PetscFree(send_status);CHKERRQ(ierr); 578 } 579 ierr = PetscFree(send_waits);CHKERRQ(ierr); 580 ierr = PetscFree(svalues);CHKERRQ(ierr); 581 582 PetscFunctionReturn(0); 583 } 584 585 #undef __FUNCT__ 586 #define __FUNCT__ "MatMult_MPIAIJ" 587 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 588 { 589 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 590 int ierr,nt; 591 592 PetscFunctionBegin; 593 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 594 if (nt != A->n) { 595 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 596 } 597 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 598 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 599 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 600 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 601 PetscFunctionReturn(0); 602 } 603 604 #undef __FUNCT__ 605 #define __FUNCT__ "MatMultAdd_MPIAIJ" 606 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 607 { 608 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 609 int ierr; 610 611 PetscFunctionBegin; 612 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 613 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 614 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 615 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 616 PetscFunctionReturn(0); 617 } 618 619 #undef __FUNCT__ 620 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 621 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 622 { 623 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 624 int ierr; 625 626 PetscFunctionBegin; 627 /* do nondiagonal part */ 628 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 629 /* send it on its way */ 630 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 631 /* do local part */ 632 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 633 /* receive remote parts: note this assumes the values are not actually */ 634 /* inserted in yy until the next line, which is true for my implementation*/ 635 /* but is not perhaps always true. */ 636 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 637 PetscFunctionReturn(0); 638 } 639 640 #undef __FUNCT__ 641 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 642 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 643 { 644 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 645 int ierr; 646 647 PetscFunctionBegin; 648 /* do nondiagonal part */ 649 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 650 /* send it on its way */ 651 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 652 /* do local part */ 653 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 654 /* receive remote parts: note this assumes the values are not actually */ 655 /* inserted in yy until the next line, which is true for my implementation*/ 656 /* but is not perhaps always true. */ 657 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 658 PetscFunctionReturn(0); 659 } 660 661 /* 662 This only works correctly for square matrices where the subblock A->A is the 663 diagonal block 664 */ 665 #undef __FUNCT__ 666 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 667 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 668 { 669 int ierr; 670 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 671 672 PetscFunctionBegin; 673 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 674 if (a->rstart != a->cstart || a->rend != a->cend) { 675 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 676 } 677 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 678 PetscFunctionReturn(0); 679 } 680 681 #undef __FUNCT__ 682 #define __FUNCT__ "MatScale_MPIAIJ" 683 int MatScale_MPIAIJ(PetscScalar *aa,Mat A) 684 { 685 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 686 int ierr; 687 688 PetscFunctionBegin; 689 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 690 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatDestroy_MPIAIJ" 696 int MatDestroy_MPIAIJ(Mat mat) 697 { 698 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 699 int ierr; 700 701 PetscFunctionBegin; 702 #if defined(PETSC_USE_LOG) 703 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 704 #endif 705 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 706 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 707 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 708 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 709 #if defined (PETSC_USE_CTABLE) 710 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 711 #else 712 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 713 #endif 714 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 715 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 716 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 717 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 718 ierr = PetscFree(aij);CHKERRQ(ierr); 719 PetscFunctionReturn(0); 720 } 721 722 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer); 723 extern int MatMPIAIJFactorInfo_Spooles(Mat,PetscViewer); 724 725 #undef __FUNCT__ 726 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 727 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 728 { 729 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 730 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 731 int ierr,shift = C->indexshift,rank = aij->rank,size = aij->size; 732 PetscTruth isdraw,isascii,flg; 733 PetscViewer sviewer; 734 PetscViewerFormat format; 735 736 PetscFunctionBegin; 737 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 738 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 739 if (isascii) { 740 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 741 if (format == PETSC_VIEWER_ASCII_INFO_LONG) { 742 MatInfo info; 743 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 744 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 745 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 746 if (flg) { 747 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 748 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 749 } else { 750 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 751 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 752 } 753 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 754 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 755 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 756 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 757 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 758 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 759 PetscFunctionReturn(0); 760 } else if (format == PETSC_VIEWER_ASCII_INFO) { 761 PetscFunctionReturn(0); 762 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 763 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX) 764 ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr); 765 #endif 766 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX) 767 ierr = MatMPIAIJFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr); 768 #endif 769 PetscFunctionReturn(0); 770 } 771 } else if (isdraw) { 772 PetscDraw draw; 773 PetscTruth isnull; 774 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 775 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 776 } 777 778 if (size == 1) { 779 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 780 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 781 } else { 782 /* assemble the entire matrix onto first processor. */ 783 Mat A; 784 Mat_SeqAIJ *Aloc; 785 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 786 PetscScalar *a; 787 788 if (!rank) { 789 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 790 } else { 791 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 792 } 793 PetscLogObjectParent(mat,A); 794 795 /* copy over the A part */ 796 Aloc = (Mat_SeqAIJ*)aij->A->data; 797 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 798 row = aij->rstart; 799 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 800 for (i=0; i<m; i++) { 801 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 802 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 803 } 804 aj = Aloc->j; 805 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 806 807 /* copy over the B part */ 808 Aloc = (Mat_SeqAIJ*)aij->B->data; 809 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 810 row = aij->rstart; 811 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 812 ct = cols; 813 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 814 for (i=0; i<m; i++) { 815 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 816 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 817 } 818 ierr = PetscFree(ct);CHKERRQ(ierr); 819 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 820 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 821 /* 822 Everyone has to call to draw the matrix since the graphics waits are 823 synchronized across all processors that share the PetscDraw object 824 */ 825 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 826 if (!rank) { 827 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 828 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 829 } 830 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 831 ierr = MatDestroy(A);CHKERRQ(ierr); 832 } 833 PetscFunctionReturn(0); 834 } 835 836 #undef __FUNCT__ 837 #define __FUNCT__ "MatView_MPIAIJ" 838 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 839 { 840 int ierr; 841 PetscTruth isascii,isdraw,issocket,isbinary; 842 843 PetscFunctionBegin; 844 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 845 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 846 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 847 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 848 if (isascii || isdraw || isbinary || issocket) { 849 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 850 } else { 851 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 852 } 853 PetscFunctionReturn(0); 854 } 855 856 857 858 #undef __FUNCT__ 859 #define __FUNCT__ "MatRelax_MPIAIJ" 860 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 861 { 862 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 863 int ierr; 864 Vec bb1; 865 PetscScalar mone=-1.0; 866 867 PetscFunctionBegin; 868 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 869 870 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 871 872 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 873 if (flag & SOR_ZERO_INITIAL_GUESS) { 874 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 875 its--; 876 } 877 878 while (its--) { 879 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 880 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 881 882 /* update rhs: bb1 = bb - B*x */ 883 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 884 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 885 886 /* local sweep */ 887 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 888 CHKERRQ(ierr); 889 } 890 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 891 if (flag & SOR_ZERO_INITIAL_GUESS) { 892 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 893 its--; 894 } 895 while (its--) { 896 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 897 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 898 899 /* update rhs: bb1 = bb - B*x */ 900 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 901 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 902 903 /* local sweep */ 904 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 905 CHKERRQ(ierr); 906 } 907 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 908 if (flag & SOR_ZERO_INITIAL_GUESS) { 909 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 910 its--; 911 } 912 while (its--) { 913 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 914 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 915 916 /* update rhs: bb1 = bb - B*x */ 917 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 918 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 919 920 /* local sweep */ 921 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 922 CHKERRQ(ierr); 923 } 924 } else { 925 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 926 } 927 928 ierr = VecDestroy(bb1);CHKERRQ(ierr); 929 PetscFunctionReturn(0); 930 } 931 932 #undef __FUNCT__ 933 #define __FUNCT__ "MatGetInfo_MPIAIJ" 934 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 935 { 936 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 937 Mat A = mat->A,B = mat->B; 938 int ierr; 939 PetscReal isend[5],irecv[5]; 940 941 PetscFunctionBegin; 942 info->block_size = 1.0; 943 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 944 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 945 isend[3] = info->memory; isend[4] = info->mallocs; 946 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 947 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 948 isend[3] += info->memory; isend[4] += info->mallocs; 949 if (flag == MAT_LOCAL) { 950 info->nz_used = isend[0]; 951 info->nz_allocated = isend[1]; 952 info->nz_unneeded = isend[2]; 953 info->memory = isend[3]; 954 info->mallocs = isend[4]; 955 } else if (flag == MAT_GLOBAL_MAX) { 956 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 957 info->nz_used = irecv[0]; 958 info->nz_allocated = irecv[1]; 959 info->nz_unneeded = irecv[2]; 960 info->memory = irecv[3]; 961 info->mallocs = irecv[4]; 962 } else if (flag == MAT_GLOBAL_SUM) { 963 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 964 info->nz_used = irecv[0]; 965 info->nz_allocated = irecv[1]; 966 info->nz_unneeded = irecv[2]; 967 info->memory = irecv[3]; 968 info->mallocs = irecv[4]; 969 } 970 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 971 info->fill_ratio_needed = 0; 972 info->factor_mallocs = 0; 973 info->rows_global = (double)matin->M; 974 info->columns_global = (double)matin->N; 975 info->rows_local = (double)matin->m; 976 info->columns_local = (double)matin->N; 977 978 PetscFunctionReturn(0); 979 } 980 981 #undef __FUNCT__ 982 #define __FUNCT__ "MatSetOption_MPIAIJ" 983 int MatSetOption_MPIAIJ(Mat A,MatOption op) 984 { 985 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 986 int ierr; 987 988 PetscFunctionBegin; 989 switch (op) { 990 case MAT_NO_NEW_NONZERO_LOCATIONS: 991 case MAT_YES_NEW_NONZERO_LOCATIONS: 992 case MAT_COLUMNS_UNSORTED: 993 case MAT_COLUMNS_SORTED: 994 case MAT_NEW_NONZERO_ALLOCATION_ERR: 995 case MAT_KEEP_ZEROED_ROWS: 996 case MAT_NEW_NONZERO_LOCATION_ERR: 997 case MAT_USE_INODES: 998 case MAT_DO_NOT_USE_INODES: 999 case MAT_IGNORE_ZERO_ENTRIES: 1000 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1001 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1002 break; 1003 case MAT_ROW_ORIENTED: 1004 a->roworiented = PETSC_TRUE; 1005 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1006 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1007 break; 1008 case MAT_ROWS_SORTED: 1009 case MAT_ROWS_UNSORTED: 1010 case MAT_YES_NEW_DIAGONALS: 1011 case MAT_USE_SINGLE_PRECISION_SOLVES: 1012 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1013 break; 1014 case MAT_COLUMN_ORIENTED: 1015 a->roworiented = PETSC_FALSE; 1016 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1017 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1018 break; 1019 case MAT_IGNORE_OFF_PROC_ENTRIES: 1020 a->donotstash = PETSC_TRUE; 1021 break; 1022 case MAT_NO_NEW_DIAGONALS: 1023 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1024 default: 1025 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1026 } 1027 PetscFunctionReturn(0); 1028 } 1029 1030 #undef __FUNCT__ 1031 #define __FUNCT__ "MatGetRow_MPIAIJ" 1032 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1033 { 1034 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1035 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1036 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1037 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1038 int *cmap,*idx_p; 1039 1040 PetscFunctionBegin; 1041 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1042 mat->getrowactive = PETSC_TRUE; 1043 1044 if (!mat->rowvalues && (idx || v)) { 1045 /* 1046 allocate enough space to hold information from the longest row. 1047 */ 1048 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1049 int max = 1,tmp; 1050 for (i=0; i<matin->m; i++) { 1051 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1052 if (max < tmp) { max = tmp; } 1053 } 1054 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1055 mat->rowindices = (int*)(mat->rowvalues + max); 1056 } 1057 1058 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1059 lrow = row - rstart; 1060 1061 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1062 if (!v) {pvA = 0; pvB = 0;} 1063 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1064 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1065 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1066 nztot = nzA + nzB; 1067 1068 cmap = mat->garray; 1069 if (v || idx) { 1070 if (nztot) { 1071 /* Sort by increasing column numbers, assuming A and B already sorted */ 1072 int imark = -1; 1073 if (v) { 1074 *v = v_p = mat->rowvalues; 1075 for (i=0; i<nzB; i++) { 1076 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1077 else break; 1078 } 1079 imark = i; 1080 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1081 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1082 } 1083 if (idx) { 1084 *idx = idx_p = mat->rowindices; 1085 if (imark > -1) { 1086 for (i=0; i<imark; i++) { 1087 idx_p[i] = cmap[cworkB[i]]; 1088 } 1089 } else { 1090 for (i=0; i<nzB; i++) { 1091 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1092 else break; 1093 } 1094 imark = i; 1095 } 1096 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1097 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1098 } 1099 } else { 1100 if (idx) *idx = 0; 1101 if (v) *v = 0; 1102 } 1103 } 1104 *nz = nztot; 1105 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1106 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1107 PetscFunctionReturn(0); 1108 } 1109 1110 #undef __FUNCT__ 1111 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1112 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1113 { 1114 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1115 1116 PetscFunctionBegin; 1117 if (aij->getrowactive == PETSC_FALSE) { 1118 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1119 } 1120 aij->getrowactive = PETSC_FALSE; 1121 PetscFunctionReturn(0); 1122 } 1123 1124 #undef __FUNCT__ 1125 #define __FUNCT__ "MatNorm_MPIAIJ" 1126 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1127 { 1128 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1129 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1130 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1131 PetscReal sum = 0.0; 1132 PetscScalar *v; 1133 1134 PetscFunctionBegin; 1135 if (aij->size == 1) { 1136 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1137 } else { 1138 if (type == NORM_FROBENIUS) { 1139 v = amat->a; 1140 for (i=0; i<amat->nz; i++) { 1141 #if defined(PETSC_USE_COMPLEX) 1142 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1143 #else 1144 sum += (*v)*(*v); v++; 1145 #endif 1146 } 1147 v = bmat->a; 1148 for (i=0; i<bmat->nz; i++) { 1149 #if defined(PETSC_USE_COMPLEX) 1150 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1151 #else 1152 sum += (*v)*(*v); v++; 1153 #endif 1154 } 1155 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1156 *norm = sqrt(*norm); 1157 } else if (type == NORM_1) { /* max column norm */ 1158 PetscReal *tmp,*tmp2; 1159 int *jj,*garray = aij->garray; 1160 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1161 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1162 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1163 *norm = 0.0; 1164 v = amat->a; jj = amat->j; 1165 for (j=0; j<amat->nz; j++) { 1166 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1167 } 1168 v = bmat->a; jj = bmat->j; 1169 for (j=0; j<bmat->nz; j++) { 1170 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1171 } 1172 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1173 for (j=0; j<mat->N; j++) { 1174 if (tmp2[j] > *norm) *norm = tmp2[j]; 1175 } 1176 ierr = PetscFree(tmp);CHKERRQ(ierr); 1177 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1178 } else if (type == NORM_INFINITY) { /* max row norm */ 1179 PetscReal ntemp = 0.0; 1180 for (j=0; j<aij->A->m; j++) { 1181 v = amat->a + amat->i[j] + shift; 1182 sum = 0.0; 1183 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1184 sum += PetscAbsScalar(*v); v++; 1185 } 1186 v = bmat->a + bmat->i[j] + shift; 1187 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1188 sum += PetscAbsScalar(*v); v++; 1189 } 1190 if (sum > ntemp) ntemp = sum; 1191 } 1192 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1193 } else { 1194 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1195 } 1196 } 1197 PetscFunctionReturn(0); 1198 } 1199 1200 #undef __FUNCT__ 1201 #define __FUNCT__ "MatTranspose_MPIAIJ" 1202 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1203 { 1204 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1205 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1206 int ierr,shift = Aloc->indexshift; 1207 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1208 Mat B; 1209 PetscScalar *array; 1210 1211 PetscFunctionBegin; 1212 if (!matout && M != N) { 1213 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1214 } 1215 1216 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1217 1218 /* copy over the A part */ 1219 Aloc = (Mat_SeqAIJ*)a->A->data; 1220 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1221 row = a->rstart; 1222 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1223 for (i=0; i<m; i++) { 1224 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1225 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1226 } 1227 aj = Aloc->j; 1228 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1229 1230 /* copy over the B part */ 1231 Aloc = (Mat_SeqAIJ*)a->B->data; 1232 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1233 row = a->rstart; 1234 ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr); 1235 ct = cols; 1236 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1237 for (i=0; i<m; i++) { 1238 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1239 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1240 } 1241 ierr = PetscFree(ct);CHKERRQ(ierr); 1242 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1243 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1244 if (matout) { 1245 *matout = B; 1246 } else { 1247 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1248 } 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNCT__ 1253 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1254 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1255 { 1256 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1257 Mat a = aij->A,b = aij->B; 1258 int ierr,s1,s2,s3; 1259 1260 PetscFunctionBegin; 1261 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1262 if (rr) { 1263 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1264 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1265 /* Overlap communication with computation. */ 1266 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1267 } 1268 if (ll) { 1269 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1270 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1271 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1272 } 1273 /* scale the diagonal block */ 1274 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1275 1276 if (rr) { 1277 /* Do a scatter end and then right scale the off-diagonal block */ 1278 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1279 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1280 } 1281 1282 PetscFunctionReturn(0); 1283 } 1284 1285 1286 #undef __FUNCT__ 1287 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1288 int MatPrintHelp_MPIAIJ(Mat A) 1289 { 1290 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1291 int ierr; 1292 1293 PetscFunctionBegin; 1294 if (!a->rank) { 1295 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1296 } 1297 PetscFunctionReturn(0); 1298 } 1299 1300 #undef __FUNCT__ 1301 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1302 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1303 { 1304 PetscFunctionBegin; 1305 *bs = 1; 1306 PetscFunctionReturn(0); 1307 } 1308 #undef __FUNCT__ 1309 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1310 int MatSetUnfactored_MPIAIJ(Mat A) 1311 { 1312 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1313 int ierr; 1314 1315 PetscFunctionBegin; 1316 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1317 PetscFunctionReturn(0); 1318 } 1319 1320 #undef __FUNCT__ 1321 #define __FUNCT__ "MatEqual_MPIAIJ" 1322 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1323 { 1324 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1325 Mat a,b,c,d; 1326 PetscTruth flg; 1327 int ierr; 1328 1329 PetscFunctionBegin; 1330 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1331 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1332 a = matA->A; b = matA->B; 1333 c = matB->A; d = matB->B; 1334 1335 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1336 if (flg == PETSC_TRUE) { 1337 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1338 } 1339 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1340 PetscFunctionReturn(0); 1341 } 1342 1343 #undef __FUNCT__ 1344 #define __FUNCT__ "MatCopy_MPIAIJ" 1345 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1346 { 1347 int ierr; 1348 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1349 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1350 PetscTruth flg; 1351 1352 PetscFunctionBegin; 1353 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1354 if (str != SAME_NONZERO_PATTERN || !flg) { 1355 /* because of the column compression in the off-processor part of the matrix a->B, 1356 the number of columns in a->B and b->B may be different, hence we cannot call 1357 the MatCopy() directly on the two parts. If need be, we can provide a more 1358 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1359 then copying the submatrices */ 1360 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1361 } else { 1362 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1363 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1364 } 1365 PetscFunctionReturn(0); 1366 } 1367 1368 #undef __FUNCT__ 1369 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1370 int MatSetUpPreallocation_MPIAIJ(Mat A) 1371 { 1372 int ierr; 1373 1374 PetscFunctionBegin; 1375 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1376 PetscFunctionReturn(0); 1377 } 1378 1379 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1380 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1381 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1382 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1383 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1384 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1385 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*); 1386 #endif 1387 1388 #include "petscblaslapack.h" 1389 1390 #undef __FUNCT__ 1391 #define __FUNCT__ "MatAXPY_MPIAIJ" 1392 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1393 { 1394 int ierr,one; 1395 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1396 Mat_SeqAIJ *x,*y; 1397 1398 PetscFunctionBegin; 1399 if (str == SAME_NONZERO_PATTERN) { 1400 x = (Mat_SeqAIJ *)xx->A->data; 1401 y = (Mat_SeqAIJ *)yy->A->data; 1402 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1403 x = (Mat_SeqAIJ *)xx->B->data; 1404 y = (Mat_SeqAIJ *)yy->B->data; 1405 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1406 } else { 1407 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1408 } 1409 PetscFunctionReturn(0); 1410 } 1411 1412 /* -------------------------------------------------------------------*/ 1413 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1414 MatGetRow_MPIAIJ, 1415 MatRestoreRow_MPIAIJ, 1416 MatMult_MPIAIJ, 1417 MatMultAdd_MPIAIJ, 1418 MatMultTranspose_MPIAIJ, 1419 MatMultTransposeAdd_MPIAIJ, 1420 0, 1421 0, 1422 0, 1423 0, 1424 0, 1425 0, 1426 MatRelax_MPIAIJ, 1427 MatTranspose_MPIAIJ, 1428 MatGetInfo_MPIAIJ, 1429 MatEqual_MPIAIJ, 1430 MatGetDiagonal_MPIAIJ, 1431 MatDiagonalScale_MPIAIJ, 1432 MatNorm_MPIAIJ, 1433 MatAssemblyBegin_MPIAIJ, 1434 MatAssemblyEnd_MPIAIJ, 1435 0, 1436 MatSetOption_MPIAIJ, 1437 MatZeroEntries_MPIAIJ, 1438 MatZeroRows_MPIAIJ, 1439 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1440 MatLUFactorSymbolic_MPIAIJ_TFS, 1441 #else 1442 0, 1443 #endif 1444 0, 1445 0, 1446 0, 1447 MatSetUpPreallocation_MPIAIJ, 1448 0, 1449 0, 1450 0, 1451 0, 1452 MatDuplicate_MPIAIJ, 1453 0, 1454 0, 1455 0, 1456 0, 1457 MatAXPY_MPIAIJ, 1458 MatGetSubMatrices_MPIAIJ, 1459 MatIncreaseOverlap_MPIAIJ, 1460 MatGetValues_MPIAIJ, 1461 MatCopy_MPIAIJ, 1462 MatPrintHelp_MPIAIJ, 1463 MatScale_MPIAIJ, 1464 0, 1465 0, 1466 0, 1467 MatGetBlockSize_MPIAIJ, 1468 0, 1469 0, 1470 0, 1471 0, 1472 MatFDColoringCreate_MPIAIJ, 1473 0, 1474 MatSetUnfactored_MPIAIJ, 1475 0, 1476 0, 1477 MatGetSubMatrix_MPIAIJ, 1478 MatDestroy_MPIAIJ, 1479 MatView_MPIAIJ, 1480 MatGetPetscMaps_Petsc, 1481 0, 1482 0, 1483 0, 1484 0, 1485 0, 1486 0, 1487 0, 1488 0, 1489 MatSetColoring_MPIAIJ, 1490 MatSetValuesAdic_MPIAIJ, 1491 MatSetValuesAdifor_MPIAIJ 1492 }; 1493 1494 /* ----------------------------------------------------------------------------------------*/ 1495 1496 EXTERN_C_BEGIN 1497 #undef __FUNCT__ 1498 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1499 int MatStoreValues_MPIAIJ(Mat mat) 1500 { 1501 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1502 int ierr; 1503 1504 PetscFunctionBegin; 1505 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1506 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1507 PetscFunctionReturn(0); 1508 } 1509 EXTERN_C_END 1510 1511 EXTERN_C_BEGIN 1512 #undef __FUNCT__ 1513 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1514 int MatRetrieveValues_MPIAIJ(Mat mat) 1515 { 1516 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1517 int ierr; 1518 1519 PetscFunctionBegin; 1520 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1521 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1522 PetscFunctionReturn(0); 1523 } 1524 EXTERN_C_END 1525 1526 #include "petscpc.h" 1527 EXTERN_C_BEGIN 1528 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1529 EXTERN_C_END 1530 1531 EXTERN_C_BEGIN 1532 #undef __FUNCT__ 1533 #define __FUNCT__ "MatCreate_MPIAIJ" 1534 int MatCreate_MPIAIJ(Mat B) 1535 { 1536 Mat_MPIAIJ *b; 1537 int ierr,i,size; 1538 #if defined(PETSC_HAVE_SUPERLUDIST) 1539 PetscTruth flg; 1540 #endif 1541 1542 1543 PetscFunctionBegin; 1544 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1545 1546 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1547 B->data = (void*)b; 1548 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1549 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1550 B->factor = 0; 1551 B->assembled = PETSC_FALSE; 1552 B->mapping = 0; 1553 1554 B->insertmode = NOT_SET_VALUES; 1555 b->size = size; 1556 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1557 1558 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1559 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1560 1561 /* the information in the maps duplicates the information computed below, eventually 1562 we should remove the duplicate information that is not contained in the maps */ 1563 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1564 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1565 1566 /* build local table of row and column ownerships */ 1567 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1568 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1569 b->cowners = b->rowners + b->size + 2; 1570 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1571 b->rowners[0] = 0; 1572 for (i=2; i<=b->size; i++) { 1573 b->rowners[i] += b->rowners[i-1]; 1574 } 1575 b->rstart = b->rowners[b->rank]; 1576 b->rend = b->rowners[b->rank+1]; 1577 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1578 b->cowners[0] = 0; 1579 for (i=2; i<=b->size; i++) { 1580 b->cowners[i] += b->cowners[i-1]; 1581 } 1582 b->cstart = b->cowners[b->rank]; 1583 b->cend = b->cowners[b->rank+1]; 1584 1585 /* build cache for off array entries formed */ 1586 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1587 b->donotstash = PETSC_FALSE; 1588 b->colmap = 0; 1589 b->garray = 0; 1590 b->roworiented = PETSC_TRUE; 1591 1592 /* stuff used for matrix vector multiply */ 1593 b->lvec = PETSC_NULL; 1594 b->Mvctx = PETSC_NULL; 1595 1596 /* stuff for MatGetRow() */ 1597 b->rowindices = 0; 1598 b->rowvalues = 0; 1599 b->getrowactive = PETSC_FALSE; 1600 /* 1601 #if defined(PETSC_HAVE_SUPERLUDIST) 1602 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flg);CHKERRQ(ierr); 1603 if (flg) { ierr = MatUseSuperLU_DIST_MPIAIJ(B);CHKERRQ(ierr); } 1604 #endif 1605 */ 1606 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1607 "MatStoreValues_MPIAIJ", 1608 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1609 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1610 "MatRetrieveValues_MPIAIJ", 1611 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1612 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1613 "MatGetDiagonalBlock_MPIAIJ", 1614 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1615 1616 PetscFunctionReturn(0); 1617 } 1618 EXTERN_C_END 1619 1620 #undef __FUNCT__ 1621 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1622 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1623 { 1624 Mat mat; 1625 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1626 int ierr; 1627 1628 PetscFunctionBegin; 1629 *newmat = 0; 1630 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1631 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1632 a = (Mat_MPIAIJ*)mat->data; 1633 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1634 mat->factor = matin->factor; 1635 mat->assembled = PETSC_TRUE; 1636 mat->insertmode = NOT_SET_VALUES; 1637 mat->preallocated = PETSC_TRUE; 1638 1639 a->rstart = oldmat->rstart; 1640 a->rend = oldmat->rend; 1641 a->cstart = oldmat->cstart; 1642 a->cend = oldmat->cend; 1643 a->size = oldmat->size; 1644 a->rank = oldmat->rank; 1645 a->donotstash = oldmat->donotstash; 1646 a->roworiented = oldmat->roworiented; 1647 a->rowindices = 0; 1648 a->rowvalues = 0; 1649 a->getrowactive = PETSC_FALSE; 1650 1651 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1652 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1653 if (oldmat->colmap) { 1654 #if defined (PETSC_USE_CTABLE) 1655 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1656 #else 1657 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1658 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1659 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1660 #endif 1661 } else a->colmap = 0; 1662 if (oldmat->garray) { 1663 int len; 1664 len = oldmat->B->n; 1665 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1666 PetscLogObjectMemory(mat,len*sizeof(int)); 1667 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1668 } else a->garray = 0; 1669 1670 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1671 PetscLogObjectParent(mat,a->lvec); 1672 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1673 PetscLogObjectParent(mat,a->Mvctx); 1674 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1675 PetscLogObjectParent(mat,a->A); 1676 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1677 PetscLogObjectParent(mat,a->B); 1678 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1679 *newmat = mat; 1680 PetscFunctionReturn(0); 1681 } 1682 1683 #include "petscsys.h" 1684 1685 EXTERN_C_BEGIN 1686 #undef __FUNCT__ 1687 #define __FUNCT__ "MatLoad_MPIAIJ" 1688 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1689 { 1690 Mat A; 1691 PetscScalar *vals,*svals; 1692 MPI_Comm comm = ((PetscObject)viewer)->comm; 1693 MPI_Status status; 1694 int i,nz,ierr,j,rstart,rend,fd; 1695 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1696 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1697 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1698 1699 PetscFunctionBegin; 1700 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1701 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1702 if (!rank) { 1703 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1704 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1705 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1706 if (header[3] < 0) { 1707 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1708 } 1709 } 1710 1711 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1712 M = header[1]; N = header[2]; 1713 /* determine ownership of all rows */ 1714 m = M/size + ((M % size) > rank); 1715 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1716 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1717 rowners[0] = 0; 1718 for (i=2; i<=size; i++) { 1719 rowners[i] += rowners[i-1]; 1720 } 1721 rstart = rowners[rank]; 1722 rend = rowners[rank+1]; 1723 1724 /* distribute row lengths to all processors */ 1725 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1726 offlens = ourlens + (rend-rstart); 1727 if (!rank) { 1728 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1729 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1730 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1731 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1732 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1733 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1734 } else { 1735 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1736 } 1737 1738 if (!rank) { 1739 /* calculate the number of nonzeros on each processor */ 1740 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1741 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1742 for (i=0; i<size; i++) { 1743 for (j=rowners[i]; j< rowners[i+1]; j++) { 1744 procsnz[i] += rowlengths[j]; 1745 } 1746 } 1747 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1748 1749 /* determine max buffer needed and allocate it */ 1750 maxnz = 0; 1751 for (i=0; i<size; i++) { 1752 maxnz = PetscMax(maxnz,procsnz[i]); 1753 } 1754 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1755 1756 /* read in my part of the matrix column indices */ 1757 nz = procsnz[0]; 1758 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1759 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1760 1761 /* read in every one elses and ship off */ 1762 for (i=1; i<size; i++) { 1763 nz = procsnz[i]; 1764 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1765 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1766 } 1767 ierr = PetscFree(cols);CHKERRQ(ierr); 1768 } else { 1769 /* determine buffer space needed for message */ 1770 nz = 0; 1771 for (i=0; i<m; i++) { 1772 nz += ourlens[i]; 1773 } 1774 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1775 1776 /* receive message of column indices*/ 1777 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1778 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1779 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1780 } 1781 1782 /* determine column ownership if matrix is not square */ 1783 if (N != M) { 1784 n = N/size + ((N % size) > rank); 1785 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1786 cstart = cend - n; 1787 } else { 1788 cstart = rstart; 1789 cend = rend; 1790 n = cend - cstart; 1791 } 1792 1793 /* loop over local rows, determining number of off diagonal entries */ 1794 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1795 jj = 0; 1796 for (i=0; i<m; i++) { 1797 for (j=0; j<ourlens[i]; j++) { 1798 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1799 jj++; 1800 } 1801 } 1802 1803 /* create our matrix */ 1804 for (i=0; i<m; i++) { 1805 ourlens[i] -= offlens[i]; 1806 } 1807 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1808 A = *newmat; 1809 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1810 for (i=0; i<m; i++) { 1811 ourlens[i] += offlens[i]; 1812 } 1813 1814 if (!rank) { 1815 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1816 1817 /* read in my part of the matrix numerical values */ 1818 nz = procsnz[0]; 1819 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1820 1821 /* insert into matrix */ 1822 jj = rstart; 1823 smycols = mycols; 1824 svals = vals; 1825 for (i=0; i<m; i++) { 1826 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1827 smycols += ourlens[i]; 1828 svals += ourlens[i]; 1829 jj++; 1830 } 1831 1832 /* read in other processors and ship out */ 1833 for (i=1; i<size; i++) { 1834 nz = procsnz[i]; 1835 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1836 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1837 } 1838 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1839 } else { 1840 /* receive numeric values */ 1841 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1842 1843 /* receive message of values*/ 1844 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1845 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1846 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1847 1848 /* insert into matrix */ 1849 jj = rstart; 1850 smycols = mycols; 1851 svals = vals; 1852 for (i=0; i<m; i++) { 1853 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1854 smycols += ourlens[i]; 1855 svals += ourlens[i]; 1856 jj++; 1857 } 1858 } 1859 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1860 ierr = PetscFree(vals);CHKERRQ(ierr); 1861 ierr = PetscFree(mycols);CHKERRQ(ierr); 1862 ierr = PetscFree(rowners);CHKERRQ(ierr); 1863 1864 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1865 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1866 PetscFunctionReturn(0); 1867 } 1868 EXTERN_C_END 1869 1870 #undef __FUNCT__ 1871 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 1872 /* 1873 Not great since it makes two copies of the submatrix, first an SeqAIJ 1874 in local and then by concatenating the local matrices the end result. 1875 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1876 */ 1877 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 1878 { 1879 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1880 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend; 1881 Mat *local,M,Mreuse; 1882 PetscScalar *vwork,*aa; 1883 MPI_Comm comm = mat->comm; 1884 Mat_SeqAIJ *aij; 1885 1886 1887 PetscFunctionBegin; 1888 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1889 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1890 1891 if (call == MAT_REUSE_MATRIX) { 1892 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1893 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 1894 local = &Mreuse; 1895 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1896 } else { 1897 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1898 Mreuse = *local; 1899 ierr = PetscFree(local);CHKERRQ(ierr); 1900 } 1901 1902 /* 1903 m - number of local rows 1904 n - number of columns (same on all processors) 1905 rstart - first row in new global matrix generated 1906 */ 1907 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1908 if (call == MAT_INITIAL_MATRIX) { 1909 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1910 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1911 ii = aij->i; 1912 jj = aij->j; 1913 1914 /* 1915 Determine the number of non-zeros in the diagonal and off-diagonal 1916 portions of the matrix in order to do correct preallocation 1917 */ 1918 1919 /* first get start and end of "diagonal" columns */ 1920 if (csize == PETSC_DECIDE) { 1921 nlocal = n/size + ((n % size) > rank); 1922 } else { 1923 nlocal = csize; 1924 } 1925 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1926 rstart = rend - nlocal; 1927 if (rank == size - 1 && rend != n) { 1928 SETERRQ(1,"Local column sizes do not add up to total number of columns"); 1929 } 1930 1931 /* next, compute all the lengths */ 1932 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 1933 olens = dlens + m; 1934 for (i=0; i<m; i++) { 1935 jend = ii[i+1] - ii[i]; 1936 olen = 0; 1937 dlen = 0; 1938 for (j=0; j<jend; j++) { 1939 if (*jj < rstart || *jj >= rend) olen++; 1940 else dlen++; 1941 jj++; 1942 } 1943 olens[i] = olen; 1944 dlens[i] = dlen; 1945 } 1946 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 1947 ierr = PetscFree(dlens);CHKERRQ(ierr); 1948 } else { 1949 int ml,nl; 1950 1951 M = *newmat; 1952 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1953 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1954 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1955 /* 1956 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1957 rather than the slower MatSetValues(). 1958 */ 1959 M->was_assembled = PETSC_TRUE; 1960 M->assembled = PETSC_FALSE; 1961 } 1962 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1963 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1964 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1965 ii = aij->i; 1966 jj = aij->j; 1967 aa = aij->a; 1968 for (i=0; i<m; i++) { 1969 row = rstart + i; 1970 nz = ii[i+1] - ii[i]; 1971 cwork = jj; jj += nz; 1972 vwork = aa; aa += nz; 1973 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1974 } 1975 1976 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1977 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1978 *newmat = M; 1979 1980 /* save submatrix used in processor for next request */ 1981 if (call == MAT_INITIAL_MATRIX) { 1982 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 1983 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 1984 } 1985 1986 PetscFunctionReturn(0); 1987 } 1988 1989 #undef __FUNCT__ 1990 #define __FUNCT__ "MatMPIAIJSetPreallocation" 1991 /*@C 1992 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 1993 (the default parallel PETSc format). For good matrix assembly performance 1994 the user should preallocate the matrix storage by setting the parameters 1995 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1996 performance can be increased by more than a factor of 50. 1997 1998 Collective on MPI_Comm 1999 2000 Input Parameters: 2001 + A - the matrix 2002 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2003 (same value is used for all local rows) 2004 . d_nnz - array containing the number of nonzeros in the various rows of the 2005 DIAGONAL portion of the local submatrix (possibly different for each row) 2006 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2007 The size of this array is equal to the number of local rows, i.e 'm'. 2008 You must leave room for the diagonal entry even if it is zero. 2009 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2010 submatrix (same value is used for all local rows). 2011 - o_nnz - array containing the number of nonzeros in the various rows of the 2012 OFF-DIAGONAL portion of the local submatrix (possibly different for 2013 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2014 structure. The size of this array is equal to the number 2015 of local rows, i.e 'm'. 2016 2017 The AIJ format (also called the Yale sparse matrix format or 2018 compressed row storage), is fully compatible with standard Fortran 77 2019 storage. That is, the stored row and column indices can begin at 2020 either one (as in Fortran) or zero. See the users manual for details. 2021 2022 The user MUST specify either the local or global matrix dimensions 2023 (possibly both). 2024 2025 The parallel matrix is partitioned such that the first m0 rows belong to 2026 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2027 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2028 2029 The DIAGONAL portion of the local submatrix of a processor can be defined 2030 as the submatrix which is obtained by extraction the part corresponding 2031 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2032 first row that belongs to the processor, and r2 is the last row belonging 2033 to the this processor. This is a square mxm matrix. The remaining portion 2034 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2035 2036 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2037 2038 By default, this format uses inodes (identical nodes) when possible. 2039 We search for consecutive rows with the same nonzero structure, thereby 2040 reusing matrix information to achieve increased efficiency. 2041 2042 Options Database Keys: 2043 + -mat_aij_no_inode - Do not use inodes 2044 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2045 - -mat_aij_oneindex - Internally use indexing starting at 1 2046 rather than 0. Note that when calling MatSetValues(), 2047 the user still MUST index entries starting at 0! 2048 2049 Example usage: 2050 2051 Consider the following 8x8 matrix with 34 non-zero values, that is 2052 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2053 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2054 as follows: 2055 2056 .vb 2057 1 2 0 | 0 3 0 | 0 4 2058 Proc0 0 5 6 | 7 0 0 | 8 0 2059 9 0 10 | 11 0 0 | 12 0 2060 ------------------------------------- 2061 13 0 14 | 15 16 17 | 0 0 2062 Proc1 0 18 0 | 19 20 21 | 0 0 2063 0 0 0 | 22 23 0 | 24 0 2064 ------------------------------------- 2065 Proc2 25 26 27 | 0 0 28 | 29 0 2066 30 0 0 | 31 32 33 | 0 34 2067 .ve 2068 2069 This can be represented as a collection of submatrices as: 2070 2071 .vb 2072 A B C 2073 D E F 2074 G H I 2075 .ve 2076 2077 Where the submatrices A,B,C are owned by proc0, D,E,F are 2078 owned by proc1, G,H,I are owned by proc2. 2079 2080 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2081 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2082 The 'M','N' parameters are 8,8, and have the same values on all procs. 2083 2084 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2085 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2086 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2087 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2088 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2089 matrix, ans [DF] as another SeqAIJ matrix. 2090 2091 When d_nz, o_nz parameters are specified, d_nz storage elements are 2092 allocated for every row of the local diagonal submatrix, and o_nz 2093 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2094 One way to choose d_nz and o_nz is to use the max nonzerors per local 2095 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2096 In this case, the values of d_nz,o_nz are: 2097 .vb 2098 proc0 : dnz = 2, o_nz = 2 2099 proc1 : dnz = 3, o_nz = 2 2100 proc2 : dnz = 1, o_nz = 4 2101 .ve 2102 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2103 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2104 for proc3. i.e we are using 12+15+10=37 storage locations to store 2105 34 values. 2106 2107 When d_nnz, o_nnz parameters are specified, the storage is specified 2108 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2109 In the above case the values for d_nnz,o_nnz are: 2110 .vb 2111 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2112 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2113 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2114 .ve 2115 Here the space allocated is sum of all the above values i.e 34, and 2116 hence pre-allocation is perfect. 2117 2118 Level: intermediate 2119 2120 .keywords: matrix, aij, compressed row, sparse, parallel 2121 2122 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2123 @*/ 2124 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2125 { 2126 Mat_MPIAIJ *b; 2127 int ierr,i; 2128 PetscTruth flg2; 2129 2130 PetscFunctionBegin; 2131 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr); 2132 if (!flg2) PetscFunctionReturn(0); 2133 B->preallocated = PETSC_TRUE; 2134 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2135 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2136 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2137 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2138 if (d_nnz) { 2139 for (i=0; i<B->m; i++) { 2140 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 2141 } 2142 } 2143 if (o_nnz) { 2144 for (i=0; i<B->m; i++) { 2145 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 2146 } 2147 } 2148 b = (Mat_MPIAIJ*)B->data; 2149 2150 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2151 PetscLogObjectParent(B,b->A); 2152 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2153 PetscLogObjectParent(B,b->B); 2154 2155 PetscFunctionReturn(0); 2156 } 2157 2158 #undef __FUNCT__ 2159 #define __FUNCT__ "MatCreateMPIAIJ" 2160 /*@C 2161 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2162 (the default parallel PETSc format). For good matrix assembly performance 2163 the user should preallocate the matrix storage by setting the parameters 2164 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2165 performance can be increased by more than a factor of 50. 2166 2167 Collective on MPI_Comm 2168 2169 Input Parameters: 2170 + comm - MPI communicator 2171 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2172 This value should be the same as the local size used in creating the 2173 y vector for the matrix-vector product y = Ax. 2174 . n - This value should be the same as the local size used in creating the 2175 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2176 calculated if N is given) For square matrices n is almost always m. 2177 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2178 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2179 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2180 (same value is used for all local rows) 2181 . d_nnz - array containing the number of nonzeros in the various rows of the 2182 DIAGONAL portion of the local submatrix (possibly different for each row) 2183 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2184 The size of this array is equal to the number of local rows, i.e 'm'. 2185 You must leave room for the diagonal entry even if it is zero. 2186 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2187 submatrix (same value is used for all local rows). 2188 - o_nnz - array containing the number of nonzeros in the various rows of the 2189 OFF-DIAGONAL portion of the local submatrix (possibly different for 2190 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2191 structure. The size of this array is equal to the number 2192 of local rows, i.e 'm'. 2193 2194 Output Parameter: 2195 . A - the matrix 2196 2197 Notes: 2198 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2199 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2200 storage requirements for this matrix. 2201 2202 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2203 processor than it must be used on all processors that share the object for 2204 that argument. 2205 2206 The AIJ format (also called the Yale sparse matrix format or 2207 compressed row storage), is fully compatible with standard Fortran 77 2208 storage. That is, the stored row and column indices can begin at 2209 either one (as in Fortran) or zero. See the users manual for details. 2210 2211 The user MUST specify either the local or global matrix dimensions 2212 (possibly both). 2213 2214 The parallel matrix is partitioned such that the first m0 rows belong to 2215 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2216 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2217 2218 The DIAGONAL portion of the local submatrix of a processor can be defined 2219 as the submatrix which is obtained by extraction the part corresponding 2220 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2221 first row that belongs to the processor, and r2 is the last row belonging 2222 to the this processor. This is a square mxm matrix. The remaining portion 2223 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2224 2225 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2226 2227 By default, this format uses inodes (identical nodes) when possible. 2228 We search for consecutive rows with the same nonzero structure, thereby 2229 reusing matrix information to achieve increased efficiency. 2230 2231 Options Database Keys: 2232 + -mat_aij_no_inode - Do not use inodes 2233 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2234 - -mat_aij_oneindex - Internally use indexing starting at 1 2235 rather than 0. Note that when calling MatSetValues(), 2236 the user still MUST index entries starting at 0! 2237 2238 2239 Example usage: 2240 2241 Consider the following 8x8 matrix with 34 non-zero values, that is 2242 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2243 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2244 as follows: 2245 2246 .vb 2247 1 2 0 | 0 3 0 | 0 4 2248 Proc0 0 5 6 | 7 0 0 | 8 0 2249 9 0 10 | 11 0 0 | 12 0 2250 ------------------------------------- 2251 13 0 14 | 15 16 17 | 0 0 2252 Proc1 0 18 0 | 19 20 21 | 0 0 2253 0 0 0 | 22 23 0 | 24 0 2254 ------------------------------------- 2255 Proc2 25 26 27 | 0 0 28 | 29 0 2256 30 0 0 | 31 32 33 | 0 34 2257 .ve 2258 2259 This can be represented as a collection of submatrices as: 2260 2261 .vb 2262 A B C 2263 D E F 2264 G H I 2265 .ve 2266 2267 Where the submatrices A,B,C are owned by proc0, D,E,F are 2268 owned by proc1, G,H,I are owned by proc2. 2269 2270 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2271 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2272 The 'M','N' parameters are 8,8, and have the same values on all procs. 2273 2274 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2275 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2276 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2277 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2278 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2279 matrix, ans [DF] as another SeqAIJ matrix. 2280 2281 When d_nz, o_nz parameters are specified, d_nz storage elements are 2282 allocated for every row of the local diagonal submatrix, and o_nz 2283 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2284 One way to choose d_nz and o_nz is to use the max nonzerors per local 2285 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2286 In this case, the values of d_nz,o_nz are: 2287 .vb 2288 proc0 : dnz = 2, o_nz = 2 2289 proc1 : dnz = 3, o_nz = 2 2290 proc2 : dnz = 1, o_nz = 4 2291 .ve 2292 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2293 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2294 for proc3. i.e we are using 12+15+10=37 storage locations to store 2295 34 values. 2296 2297 When d_nnz, o_nnz parameters are specified, the storage is specified 2298 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2299 In the above case the values for d_nnz,o_nnz are: 2300 .vb 2301 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2302 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2303 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2304 .ve 2305 Here the space allocated is sum of all the above values i.e 34, and 2306 hence pre-allocation is perfect. 2307 2308 Level: intermediate 2309 2310 .keywords: matrix, aij, compressed row, sparse, parallel 2311 2312 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2313 @*/ 2314 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2315 { 2316 int ierr,size; 2317 2318 PetscFunctionBegin; 2319 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2320 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2321 if (size > 1) { 2322 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2323 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2324 } else { 2325 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2326 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2327 } 2328 PetscFunctionReturn(0); 2329 } 2330 2331 #undef __FUNCT__ 2332 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2333 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2334 { 2335 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2336 PetscFunctionBegin; 2337 *Ad = a->A; 2338 *Ao = a->B; 2339 *colmap = a->garray; 2340 PetscFunctionReturn(0); 2341 } 2342 2343 #undef __FUNCT__ 2344 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2345 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2346 { 2347 int ierr; 2348 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2349 2350 PetscFunctionBegin; 2351 if (coloring->ctype == IS_COLORING_LOCAL) { 2352 int *allcolors,*colors,i; 2353 ISColoring ocoloring; 2354 2355 /* set coloring for diagonal portion */ 2356 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2357 2358 /* set coloring for off-diagonal portion */ 2359 ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2360 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2361 for (i=0; i<a->B->n; i++) { 2362 colors[i] = allcolors[a->garray[i]]; 2363 } 2364 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2365 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2366 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2367 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2368 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2369 int *colors,i,*larray; 2370 ISColoring ocoloring; 2371 2372 /* set coloring for diagonal portion */ 2373 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2374 for (i=0; i<a->A->n; i++) { 2375 larray[i] = i + a->cstart; 2376 } 2377 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2378 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2379 for (i=0; i<a->A->n; i++) { 2380 colors[i] = coloring->colors[larray[i]]; 2381 } 2382 ierr = PetscFree(larray);CHKERRQ(ierr); 2383 ierr = ISColoringCreate(MPI_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2384 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2385 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2386 2387 /* set coloring for off-diagonal portion */ 2388 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2389 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2390 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2391 for (i=0; i<a->B->n; i++) { 2392 colors[i] = coloring->colors[larray[i]]; 2393 } 2394 ierr = PetscFree(larray);CHKERRQ(ierr); 2395 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2396 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2397 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2398 } else { 2399 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2400 } 2401 2402 PetscFunctionReturn(0); 2403 } 2404 2405 #undef __FUNCT__ 2406 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2407 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2408 { 2409 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2410 int ierr; 2411 2412 PetscFunctionBegin; 2413 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2414 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2415 PetscFunctionReturn(0); 2416 } 2417 2418 #undef __FUNCT__ 2419 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2420 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2421 { 2422 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2423 int ierr; 2424 2425 PetscFunctionBegin; 2426 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2427 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2428 PetscFunctionReturn(0); 2429 } 2430