1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNCT__ 22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 26 int n = aij->B->n,i,ierr; 27 28 PetscFunctionBegin; 29 #if defined (PETSC_USE_CTABLE) 30 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 31 for (i=0; i<n; i++){ 32 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 33 } 34 #else 35 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 36 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 37 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 38 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 39 #endif 40 PetscFunctionReturn(0); 41 } 42 43 #define CHUNKSIZE 15 44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 45 { \ 46 \ 47 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 48 rmax = aimax[row]; nrow = ailen[row]; \ 49 col1 = col - shift; \ 50 \ 51 low = 0; high = nrow; \ 52 while (high-low > 5) { \ 53 t = (low+high)/2; \ 54 if (rp[t] > col) high = t; \ 55 else low = t; \ 56 } \ 57 for (_i=low; _i<high; _i++) { \ 58 if (rp[_i] > col1) break; \ 59 if (rp[_i] == col1) { \ 60 if (addv == ADD_VALUES) ap[_i] += value; \ 61 else ap[_i] = value; \ 62 goto a_noinsert; \ 63 } \ 64 } \ 65 if (nonew == 1) goto a_noinsert; \ 66 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 67 if (nrow >= rmax) { \ 68 /* there is no extra room in row, therefore enlarge */ \ 69 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 70 PetscScalar *new_a; \ 71 \ 72 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 73 \ 74 /* malloc new storage space */ \ 75 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 76 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 77 new_j = (int*)(new_a + new_nz); \ 78 new_i = new_j + new_nz; \ 79 \ 80 /* copy over old data into new slots */ \ 81 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 82 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 83 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 84 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 85 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 86 len*sizeof(int));CHKERRQ(ierr); \ 87 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 89 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 90 /* free up old matrix storage */ \ 91 \ 92 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 93 if (!a->singlemalloc) { \ 94 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 95 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 96 } \ 97 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 98 a->singlemalloc = PETSC_TRUE; \ 99 \ 100 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 101 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 102 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 103 a->maxnz += CHUNKSIZE; \ 104 a->reallocs++; \ 105 } \ 106 N = nrow++ - 1; a->nz++; \ 107 /* shift up all the later entries in this row */ \ 108 for (ii=N; ii>=_i; ii--) { \ 109 rp[ii+1] = rp[ii]; \ 110 ap[ii+1] = ap[ii]; \ 111 } \ 112 rp[_i] = col1; \ 113 ap[_i] = value; \ 114 a_noinsert: ; \ 115 ailen[row] = nrow; \ 116 } 117 118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 119 { \ 120 \ 121 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 122 rmax = bimax[row]; nrow = bilen[row]; \ 123 col1 = col - shift; \ 124 \ 125 low = 0; high = nrow; \ 126 while (high-low > 5) { \ 127 t = (low+high)/2; \ 128 if (rp[t] > col) high = t; \ 129 else low = t; \ 130 } \ 131 for (_i=low; _i<high; _i++) { \ 132 if (rp[_i] > col1) break; \ 133 if (rp[_i] == col1) { \ 134 if (addv == ADD_VALUES) ap[_i] += value; \ 135 else ap[_i] = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (nonew == 1) goto b_noinsert; \ 140 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 141 if (nrow >= rmax) { \ 142 /* there is no extra room in row, therefore enlarge */ \ 143 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 144 PetscScalar *new_a; \ 145 \ 146 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 147 \ 148 /* malloc new storage space */ \ 149 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 150 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 151 new_j = (int*)(new_a + new_nz); \ 152 new_i = new_j + new_nz; \ 153 \ 154 /* copy over old data into new slots */ \ 155 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 156 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 157 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 158 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 159 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 160 len*sizeof(int));CHKERRQ(ierr); \ 161 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 163 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 164 /* free up old matrix storage */ \ 165 \ 166 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 167 if (!b->singlemalloc) { \ 168 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 169 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 170 } \ 171 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 172 b->singlemalloc = PETSC_TRUE; \ 173 \ 174 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 175 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 176 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 177 b->maxnz += CHUNKSIZE; \ 178 b->reallocs++; \ 179 } \ 180 N = nrow++ - 1; b->nz++; \ 181 /* shift up all the later entries in this row */ \ 182 for (ii=N; ii>=_i; ii--) { \ 183 rp[ii+1] = rp[ii]; \ 184 ap[ii+1] = ap[ii]; \ 185 } \ 186 rp[_i] = col1; \ 187 ap[_i] = value; \ 188 b_noinsert: ; \ 189 bilen[row] = nrow; \ 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatSetValues_MPIAIJ" 194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 195 { 196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 197 PetscScalar value; 198 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 199 int cstart = aij->cstart,cend = aij->cend,row,col; 200 PetscTruth roworiented = aij->roworiented; 201 202 /* Some Variables required in the macro */ 203 Mat A = aij->A; 204 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 205 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 206 PetscScalar *aa = a->a; 207 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 208 Mat B = aij->B; 209 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 210 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 211 PetscScalar *ba = b->a; 212 213 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 214 int nonew = a->nonew,shift = a->indexshift; 215 PetscScalar *ap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_BOPT_g) 221 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 222 #endif 223 if (im[i] >= rstart && im[i] < rend) { 224 row = im[i] - rstart; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart && in[j] < cend){ 227 col = in[j] - cstart; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 if (ignorezeroentries && value == 0.0) continue; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_BOPT_g) 234 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!aij->colmap) { 239 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 243 col--; 244 #else 245 col = aij->colmap[in[j]] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 248 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 251 B = aij->B; 252 b = (Mat_SeqAIJ*)B->data; 253 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 254 ba = b->a; 255 } 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 if (ignorezeroentries && value == 0.0) continue; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 if (ignorezeroentries && v[i*n] == 0.0) continue; 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 268 } else { 269 if (ignorezeroentries && v[i] == 0.0) continue; 270 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 271 } 272 } 273 } 274 } 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatGetValues_MPIAIJ" 280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 283 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 284 int cstart = aij->cstart,cend = aij->cend,row,col; 285 286 PetscFunctionBegin; 287 for (i=0; i<m; i++) { 288 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 289 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 290 if (idxm[i] >= rstart && idxm[i] < rend) { 291 row = idxm[i] - rstart; 292 for (j=0; j<n; j++) { 293 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 294 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 295 if (idxn[j] >= cstart && idxn[j] < cend){ 296 col = idxn[j] - cstart; 297 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 298 } else { 299 if (!aij->colmap) { 300 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 #if defined (PETSC_USE_CTABLE) 303 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 304 col --; 305 #else 306 col = aij->colmap[idxn[j]] - 1; 307 #endif 308 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 309 else { 310 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 311 } 312 } 313 } 314 } else { 315 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 316 } 317 } 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 324 { 325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 326 int ierr,nstash,reallocs; 327 InsertMode addv; 328 329 PetscFunctionBegin; 330 if (aij->donotstash) { 331 PetscFunctionReturn(0); 332 } 333 334 /* make sure all processors are either in INSERTMODE or ADDMODE */ 335 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 336 if (addv == (ADD_VALUES|INSERT_VALUES)) { 337 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 338 } 339 mat->insertmode = addv; /* in case this processor had no cache */ 340 341 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 342 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 343 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 344 PetscFunctionReturn(0); 345 } 346 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 351 { 352 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 353 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 354 int i,j,rstart,ncols,n,ierr,flg; 355 int *row,*col,other_disassembled; 356 PetscScalar *val; 357 InsertMode addv = mat->insertmode; 358 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES) 359 PetscTruth flag; 360 #endif 361 362 PetscFunctionBegin; 363 if (!aij->donotstash) { 364 while (1) { 365 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 366 if (!flg) break; 367 368 for (i=0; i<n;) { 369 /* Now identify the consecutive vals belonging to the same row */ 370 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 371 if (j < n) ncols = j-i; 372 else ncols = n-i; 373 /* Now assemble all these values with a single function call */ 374 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 375 i = j; 376 } 377 } 378 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 379 } 380 381 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 382 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 383 384 /* determine if any processor has disassembled, if so we must 385 also disassemble ourselfs, in order that we may reassemble. */ 386 /* 387 if nonzero structure of submatrix B cannot change then we know that 388 no processor disassembled thus we can skip this stuff 389 */ 390 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 391 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 392 if (mat->was_assembled && !other_disassembled) { 393 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 394 } 395 } 396 397 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 398 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 399 } 400 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 401 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 402 403 if (aij->rowvalues) { 404 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 405 aij->rowvalues = 0; 406 } 407 408 /* used by MatAXPY() */ 409 a->xtoy = 0; b->xtoy = 0; 410 a->XtoY = 0; b->XtoY = 0; 411 412 #if defined(PETSC_HAVE_SUPERLUDIST) 413 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 414 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); } 415 #endif 416 417 #if defined(PETSC_HAVE_SPOOLES) 418 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr); 419 if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); } 420 #endif 421 PetscFunctionReturn(0); 422 } 423 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 426 int MatZeroEntries_MPIAIJ(Mat A) 427 { 428 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 429 int ierr; 430 431 PetscFunctionBegin; 432 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 433 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 434 PetscFunctionReturn(0); 435 } 436 437 #undef __FUNCT__ 438 #define __FUNCT__ "MatZeroRows_MPIAIJ" 439 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag) 440 { 441 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 442 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 443 int *nprocs,j,idx,nsends,row; 444 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 445 int *rvalues,tag = A->tag,count,base,slen,n,*source; 446 int *lens,imdex,*lrows,*values,rstart=l->rstart; 447 MPI_Comm comm = A->comm; 448 MPI_Request *send_waits,*recv_waits; 449 MPI_Status recv_status,*send_status; 450 IS istmp; 451 PetscTruth found; 452 453 PetscFunctionBegin; 454 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 455 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 456 457 /* first count number of contributors to each processor */ 458 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 459 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 460 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 461 for (i=0; i<N; i++) { 462 idx = rows[i]; 463 found = PETSC_FALSE; 464 for (j=0; j<size; j++) { 465 if (idx >= owners[j] && idx < owners[j+1]) { 466 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 467 } 468 } 469 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 470 } 471 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 472 473 /* inform other processors of number of messages and max length*/ 474 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 475 476 /* post receives: */ 477 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 478 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 479 for (i=0; i<nrecvs; i++) { 480 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 481 } 482 483 /* do sends: 484 1) starts[i] gives the starting index in svalues for stuff going to 485 the ith processor 486 */ 487 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 488 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 489 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 490 starts[0] = 0; 491 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 492 for (i=0; i<N; i++) { 493 svalues[starts[owner[i]]++] = rows[i]; 494 } 495 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 496 497 starts[0] = 0; 498 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 499 count = 0; 500 for (i=0; i<size; i++) { 501 if (nprocs[2*i+1]) { 502 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 503 } 504 } 505 ierr = PetscFree(starts);CHKERRQ(ierr); 506 507 base = owners[rank]; 508 509 /* wait on receives */ 510 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 511 source = lens + nrecvs; 512 count = nrecvs; slen = 0; 513 while (count) { 514 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 515 /* unpack receives into our local space */ 516 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 517 source[imdex] = recv_status.MPI_SOURCE; 518 lens[imdex] = n; 519 slen += n; 520 count--; 521 } 522 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 523 524 /* move the data into the send scatter */ 525 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 526 count = 0; 527 for (i=0; i<nrecvs; i++) { 528 values = rvalues + i*nmax; 529 for (j=0; j<lens[i]; j++) { 530 lrows[count++] = values[j] - base; 531 } 532 } 533 ierr = PetscFree(rvalues);CHKERRQ(ierr); 534 ierr = PetscFree(lens);CHKERRQ(ierr); 535 ierr = PetscFree(owner);CHKERRQ(ierr); 536 ierr = PetscFree(nprocs);CHKERRQ(ierr); 537 538 /* actually zap the local rows */ 539 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 540 PetscLogObjectParent(A,istmp); 541 542 /* 543 Zero the required rows. If the "diagonal block" of the matrix 544 is square and the user wishes to set the diagonal we use seperate 545 code so that MatSetValues() is not called for each diagonal allocating 546 new memory, thus calling lots of mallocs and slowing things down. 547 548 Contributed by: Mathew Knepley 549 */ 550 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 551 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 552 if (diag && (l->A->M == l->A->N)) { 553 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 554 } else if (diag) { 555 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 556 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 557 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 558 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 559 } 560 for (i = 0; i < slen; i++) { 561 row = lrows[i] + rstart; 562 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 563 } 564 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 565 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 566 } else { 567 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 568 } 569 ierr = ISDestroy(istmp);CHKERRQ(ierr); 570 ierr = PetscFree(lrows);CHKERRQ(ierr); 571 572 /* wait on sends */ 573 if (nsends) { 574 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 575 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 576 ierr = PetscFree(send_status);CHKERRQ(ierr); 577 } 578 ierr = PetscFree(send_waits);CHKERRQ(ierr); 579 ierr = PetscFree(svalues);CHKERRQ(ierr); 580 581 PetscFunctionReturn(0); 582 } 583 584 #undef __FUNCT__ 585 #define __FUNCT__ "MatMult_MPIAIJ" 586 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 587 { 588 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 589 int ierr,nt; 590 591 PetscFunctionBegin; 592 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 593 if (nt != A->n) { 594 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 595 } 596 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 597 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 598 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 599 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 600 PetscFunctionReturn(0); 601 } 602 603 #undef __FUNCT__ 604 #define __FUNCT__ "MatMultAdd_MPIAIJ" 605 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 606 { 607 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 608 int ierr; 609 610 PetscFunctionBegin; 611 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 612 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 613 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 614 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 615 PetscFunctionReturn(0); 616 } 617 618 #undef __FUNCT__ 619 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 620 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 621 { 622 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 623 int ierr; 624 625 PetscFunctionBegin; 626 /* do nondiagonal part */ 627 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 628 /* send it on its way */ 629 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 630 /* do local part */ 631 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 632 /* receive remote parts: note this assumes the values are not actually */ 633 /* inserted in yy until the next line, which is true for my implementation*/ 634 /* but is not perhaps always true. */ 635 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 636 PetscFunctionReturn(0); 637 } 638 639 #undef __FUNCT__ 640 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 641 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 642 { 643 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 644 int ierr; 645 646 PetscFunctionBegin; 647 /* do nondiagonal part */ 648 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 649 /* send it on its way */ 650 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 651 /* do local part */ 652 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 653 /* receive remote parts: note this assumes the values are not actually */ 654 /* inserted in yy until the next line, which is true for my implementation*/ 655 /* but is not perhaps always true. */ 656 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 657 PetscFunctionReturn(0); 658 } 659 660 /* 661 This only works correctly for square matrices where the subblock A->A is the 662 diagonal block 663 */ 664 #undef __FUNCT__ 665 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 666 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 667 { 668 int ierr; 669 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 670 671 PetscFunctionBegin; 672 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 673 if (a->rstart != a->cstart || a->rend != a->cend) { 674 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 675 } 676 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 677 PetscFunctionReturn(0); 678 } 679 680 #undef __FUNCT__ 681 #define __FUNCT__ "MatScale_MPIAIJ" 682 int MatScale_MPIAIJ(PetscScalar *aa,Mat A) 683 { 684 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 685 int ierr; 686 687 PetscFunctionBegin; 688 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 689 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 690 PetscFunctionReturn(0); 691 } 692 693 #undef __FUNCT__ 694 #define __FUNCT__ "MatDestroy_MPIAIJ" 695 int MatDestroy_MPIAIJ(Mat mat) 696 { 697 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 698 int ierr; 699 700 PetscFunctionBegin; 701 #if defined(PETSC_USE_LOG) 702 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 703 #endif 704 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 705 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 706 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 707 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 708 #if defined (PETSC_USE_CTABLE) 709 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 710 #else 711 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 712 #endif 713 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 714 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 715 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 716 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 717 ierr = PetscFree(aij);CHKERRQ(ierr); 718 PetscFunctionReturn(0); 719 } 720 721 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer); 722 extern int MatFactorInfo_Spooles(Mat,PetscViewer); 723 724 #undef __FUNCT__ 725 #define __FUNCT__ "MatView_MPIAIJ_Binary" 726 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 727 { 728 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 729 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 730 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 731 int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 732 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 733 PetscScalar *column_values; 734 735 PetscFunctionBegin; 736 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 737 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 738 nz = A->nz + B->nz; 739 if (rank == 0) { 740 header[0] = MAT_FILE_COOKIE; 741 header[1] = mat->M; 742 header[2] = mat->N; 743 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 744 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 745 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 746 /* get largest number of rows any processor has */ 747 rlen = mat->m; 748 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 749 for (i=1; i<size; i++) { 750 rlen = PetscMax(rlen,range[i+1] - range[i]); 751 } 752 } else { 753 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 754 rlen = mat->m; 755 } 756 757 /* load up the local row counts */ 758 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 759 for (i=0; i<mat->m; i++) { 760 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 761 } 762 763 /* store the row lengths to the file */ 764 if (rank == 0) { 765 MPI_Status status; 766 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 767 for (i=1; i<size; i++) { 768 rlen = range[i+1] - range[i]; 769 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 770 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 771 } 772 } else { 773 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 774 } 775 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 776 777 /* load up the local column indices */ 778 nzmax = nz; /* )th processor needs space a largest processor needs */ 779 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 780 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 781 cnt = 0; 782 for (i=0; i<mat->m; i++) { 783 for (j=B->i[i]; j<B->i[i+1]; j++) { 784 if ( (col = garray[B->j[j]]) > cstart) break; 785 column_indices[cnt++] = col; 786 } 787 for (k=A->i[i]; k<A->i[i+1]; k++) { 788 column_indices[cnt++] = A->j[k] + cstart; 789 } 790 for (; j<B->i[i+1]; j++) { 791 column_indices[cnt++] = garray[B->j[j]]; 792 } 793 } 794 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 795 796 /* store the column indices to the file */ 797 if (rank == 0) { 798 MPI_Status status; 799 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 800 for (i=1; i<size; i++) { 801 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 802 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 803 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 804 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 805 } 806 } else { 807 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 808 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 809 } 810 ierr = PetscFree(column_indices);CHKERRQ(ierr); 811 812 /* load up the local column values */ 813 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 814 cnt = 0; 815 for (i=0; i<mat->m; i++) { 816 for (j=B->i[i]; j<B->i[i+1]; j++) { 817 if ( garray[B->j[j]] > cstart) break; 818 column_values[cnt++] = B->a[j]; 819 } 820 for (k=A->i[i]; k<A->i[i+1]; k++) { 821 column_values[cnt++] = A->a[k]; 822 } 823 for (; j<B->i[i+1]; j++) { 824 column_values[cnt++] = B->a[j]; 825 } 826 } 827 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 828 829 /* store the column values to the file */ 830 if (rank == 0) { 831 MPI_Status status; 832 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 833 for (i=1; i<size; i++) { 834 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 835 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 836 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 837 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 838 } 839 } else { 840 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 841 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 842 } 843 ierr = PetscFree(column_values);CHKERRQ(ierr); 844 PetscFunctionReturn(0); 845 } 846 847 #undef __FUNCT__ 848 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 849 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 850 { 851 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 852 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 853 int ierr,shift = C->indexshift,rank = aij->rank,size = aij->size; 854 PetscTruth isdraw,isascii,flg,isbinary; 855 PetscViewer sviewer; 856 PetscViewerFormat format; 857 858 PetscFunctionBegin; 859 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 860 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 861 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 862 if (isascii) { 863 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 864 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 865 MatInfo info; 866 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 867 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 868 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 869 if (flg) { 870 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 871 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 872 } else { 873 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 874 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 875 } 876 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 877 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 878 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 879 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 880 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 881 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 882 PetscFunctionReturn(0); 883 } else if (format == PETSC_VIEWER_ASCII_INFO) { 884 PetscFunctionReturn(0); 885 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 886 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE) 887 ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr); 888 #endif 889 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE) 890 ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr); 891 #endif 892 893 PetscFunctionReturn(0); 894 } 895 } else if (isbinary) { 896 if (size == 1) { 897 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 898 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 899 } else { 900 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 901 } 902 PetscFunctionReturn(0); 903 } else if (isdraw) { 904 PetscDraw draw; 905 PetscTruth isnull; 906 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 907 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 908 } 909 910 if (size == 1) { 911 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 912 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 913 } else { 914 /* assemble the entire matrix onto first processor. */ 915 Mat A; 916 Mat_SeqAIJ *Aloc; 917 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 918 PetscScalar *a; 919 920 if (!rank) { 921 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 922 } else { 923 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 924 } 925 PetscLogObjectParent(mat,A); 926 927 /* copy over the A part */ 928 Aloc = (Mat_SeqAIJ*)aij->A->data; 929 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 930 row = aij->rstart; 931 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 932 for (i=0; i<m; i++) { 933 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 934 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 935 } 936 aj = Aloc->j; 937 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 938 939 /* copy over the B part */ 940 Aloc = (Mat_SeqAIJ*)aij->B->data; 941 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 942 row = aij->rstart; 943 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 944 ct = cols; 945 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 946 for (i=0; i<m; i++) { 947 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 948 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 949 } 950 ierr = PetscFree(ct);CHKERRQ(ierr); 951 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 952 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 953 /* 954 Everyone has to call to draw the matrix since the graphics waits are 955 synchronized across all processors that share the PetscDraw object 956 */ 957 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 958 if (!rank) { 959 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 960 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 961 } 962 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 963 ierr = MatDestroy(A);CHKERRQ(ierr); 964 } 965 PetscFunctionReturn(0); 966 } 967 968 #undef __FUNCT__ 969 #define __FUNCT__ "MatView_MPIAIJ" 970 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 971 { 972 int ierr; 973 PetscTruth isascii,isdraw,issocket,isbinary; 974 975 PetscFunctionBegin; 976 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 977 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 978 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 979 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 980 if (isascii || isdraw || isbinary || issocket) { 981 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 982 } else { 983 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 984 } 985 PetscFunctionReturn(0); 986 } 987 988 989 990 #undef __FUNCT__ 991 #define __FUNCT__ "MatRelax_MPIAIJ" 992 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 993 { 994 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 995 int ierr; 996 Vec bb1; 997 PetscScalar mone=-1.0; 998 999 PetscFunctionBegin; 1000 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1001 1002 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1003 1004 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1005 if (flag & SOR_ZERO_INITIAL_GUESS) { 1006 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1007 its--; 1008 } 1009 1010 while (its--) { 1011 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1012 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1013 1014 /* update rhs: bb1 = bb - B*x */ 1015 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1016 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1017 1018 /* local sweep */ 1019 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1020 CHKERRQ(ierr); 1021 } 1022 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1023 if (flag & SOR_ZERO_INITIAL_GUESS) { 1024 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1025 its--; 1026 } 1027 while (its--) { 1028 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1029 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1030 1031 /* update rhs: bb1 = bb - B*x */ 1032 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1033 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1034 1035 /* local sweep */ 1036 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1037 CHKERRQ(ierr); 1038 } 1039 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1040 if (flag & SOR_ZERO_INITIAL_GUESS) { 1041 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1042 its--; 1043 } 1044 while (its--) { 1045 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1046 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1047 1048 /* update rhs: bb1 = bb - B*x */ 1049 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1050 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1051 1052 /* local sweep */ 1053 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1054 CHKERRQ(ierr); 1055 } 1056 } else { 1057 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1058 } 1059 1060 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1061 PetscFunctionReturn(0); 1062 } 1063 1064 #undef __FUNCT__ 1065 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1066 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1067 { 1068 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1069 Mat A = mat->A,B = mat->B; 1070 int ierr; 1071 PetscReal isend[5],irecv[5]; 1072 1073 PetscFunctionBegin; 1074 info->block_size = 1.0; 1075 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1076 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1077 isend[3] = info->memory; isend[4] = info->mallocs; 1078 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1079 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1080 isend[3] += info->memory; isend[4] += info->mallocs; 1081 if (flag == MAT_LOCAL) { 1082 info->nz_used = isend[0]; 1083 info->nz_allocated = isend[1]; 1084 info->nz_unneeded = isend[2]; 1085 info->memory = isend[3]; 1086 info->mallocs = isend[4]; 1087 } else if (flag == MAT_GLOBAL_MAX) { 1088 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1089 info->nz_used = irecv[0]; 1090 info->nz_allocated = irecv[1]; 1091 info->nz_unneeded = irecv[2]; 1092 info->memory = irecv[3]; 1093 info->mallocs = irecv[4]; 1094 } else if (flag == MAT_GLOBAL_SUM) { 1095 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1096 info->nz_used = irecv[0]; 1097 info->nz_allocated = irecv[1]; 1098 info->nz_unneeded = irecv[2]; 1099 info->memory = irecv[3]; 1100 info->mallocs = irecv[4]; 1101 } 1102 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1103 info->fill_ratio_needed = 0; 1104 info->factor_mallocs = 0; 1105 info->rows_global = (double)matin->M; 1106 info->columns_global = (double)matin->N; 1107 info->rows_local = (double)matin->m; 1108 info->columns_local = (double)matin->N; 1109 1110 PetscFunctionReturn(0); 1111 } 1112 1113 #undef __FUNCT__ 1114 #define __FUNCT__ "MatSetOption_MPIAIJ" 1115 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1116 { 1117 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1118 int ierr; 1119 1120 PetscFunctionBegin; 1121 switch (op) { 1122 case MAT_NO_NEW_NONZERO_LOCATIONS: 1123 case MAT_YES_NEW_NONZERO_LOCATIONS: 1124 case MAT_COLUMNS_UNSORTED: 1125 case MAT_COLUMNS_SORTED: 1126 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1127 case MAT_KEEP_ZEROED_ROWS: 1128 case MAT_NEW_NONZERO_LOCATION_ERR: 1129 case MAT_USE_INODES: 1130 case MAT_DO_NOT_USE_INODES: 1131 case MAT_IGNORE_ZERO_ENTRIES: 1132 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1133 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1134 break; 1135 case MAT_ROW_ORIENTED: 1136 a->roworiented = PETSC_TRUE; 1137 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1138 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1139 break; 1140 case MAT_ROWS_SORTED: 1141 case MAT_ROWS_UNSORTED: 1142 case MAT_YES_NEW_DIAGONALS: 1143 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1144 break; 1145 case MAT_COLUMN_ORIENTED: 1146 a->roworiented = PETSC_FALSE; 1147 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1148 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1149 break; 1150 case MAT_IGNORE_OFF_PROC_ENTRIES: 1151 a->donotstash = PETSC_TRUE; 1152 break; 1153 case MAT_NO_NEW_DIAGONALS: 1154 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1155 default: 1156 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1157 } 1158 PetscFunctionReturn(0); 1159 } 1160 1161 #undef __FUNCT__ 1162 #define __FUNCT__ "MatGetRow_MPIAIJ" 1163 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1164 { 1165 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1166 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1167 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1168 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1169 int *cmap,*idx_p; 1170 1171 PetscFunctionBegin; 1172 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1173 mat->getrowactive = PETSC_TRUE; 1174 1175 if (!mat->rowvalues && (idx || v)) { 1176 /* 1177 allocate enough space to hold information from the longest row. 1178 */ 1179 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1180 int max = 1,tmp; 1181 for (i=0; i<matin->m; i++) { 1182 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1183 if (max < tmp) { max = tmp; } 1184 } 1185 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1186 mat->rowindices = (int*)(mat->rowvalues + max); 1187 } 1188 1189 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1190 lrow = row - rstart; 1191 1192 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1193 if (!v) {pvA = 0; pvB = 0;} 1194 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1195 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1196 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1197 nztot = nzA + nzB; 1198 1199 cmap = mat->garray; 1200 if (v || idx) { 1201 if (nztot) { 1202 /* Sort by increasing column numbers, assuming A and B already sorted */ 1203 int imark = -1; 1204 if (v) { 1205 *v = v_p = mat->rowvalues; 1206 for (i=0; i<nzB; i++) { 1207 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1208 else break; 1209 } 1210 imark = i; 1211 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1212 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1213 } 1214 if (idx) { 1215 *idx = idx_p = mat->rowindices; 1216 if (imark > -1) { 1217 for (i=0; i<imark; i++) { 1218 idx_p[i] = cmap[cworkB[i]]; 1219 } 1220 } else { 1221 for (i=0; i<nzB; i++) { 1222 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1223 else break; 1224 } 1225 imark = i; 1226 } 1227 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1228 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1229 } 1230 } else { 1231 if (idx) *idx = 0; 1232 if (v) *v = 0; 1233 } 1234 } 1235 *nz = nztot; 1236 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1237 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1238 PetscFunctionReturn(0); 1239 } 1240 1241 #undef __FUNCT__ 1242 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1243 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1244 { 1245 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1246 1247 PetscFunctionBegin; 1248 if (aij->getrowactive == PETSC_FALSE) { 1249 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1250 } 1251 aij->getrowactive = PETSC_FALSE; 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatNorm_MPIAIJ" 1257 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1258 { 1259 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1260 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1261 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1262 PetscReal sum = 0.0; 1263 PetscScalar *v; 1264 1265 PetscFunctionBegin; 1266 if (aij->size == 1) { 1267 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1268 } else { 1269 if (type == NORM_FROBENIUS) { 1270 v = amat->a; 1271 for (i=0; i<amat->nz; i++) { 1272 #if defined(PETSC_USE_COMPLEX) 1273 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1274 #else 1275 sum += (*v)*(*v); v++; 1276 #endif 1277 } 1278 v = bmat->a; 1279 for (i=0; i<bmat->nz; i++) { 1280 #if defined(PETSC_USE_COMPLEX) 1281 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1282 #else 1283 sum += (*v)*(*v); v++; 1284 #endif 1285 } 1286 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1287 *norm = sqrt(*norm); 1288 } else if (type == NORM_1) { /* max column norm */ 1289 PetscReal *tmp,*tmp2; 1290 int *jj,*garray = aij->garray; 1291 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1292 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1293 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1294 *norm = 0.0; 1295 v = amat->a; jj = amat->j; 1296 for (j=0; j<amat->nz; j++) { 1297 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1298 } 1299 v = bmat->a; jj = bmat->j; 1300 for (j=0; j<bmat->nz; j++) { 1301 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1302 } 1303 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1304 for (j=0; j<mat->N; j++) { 1305 if (tmp2[j] > *norm) *norm = tmp2[j]; 1306 } 1307 ierr = PetscFree(tmp);CHKERRQ(ierr); 1308 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1309 } else if (type == NORM_INFINITY) { /* max row norm */ 1310 PetscReal ntemp = 0.0; 1311 for (j=0; j<aij->A->m; j++) { 1312 v = amat->a + amat->i[j] + shift; 1313 sum = 0.0; 1314 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1315 sum += PetscAbsScalar(*v); v++; 1316 } 1317 v = bmat->a + bmat->i[j] + shift; 1318 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1319 sum += PetscAbsScalar(*v); v++; 1320 } 1321 if (sum > ntemp) ntemp = sum; 1322 } 1323 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1324 } else { 1325 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1326 } 1327 } 1328 PetscFunctionReturn(0); 1329 } 1330 1331 #undef __FUNCT__ 1332 #define __FUNCT__ "MatTranspose_MPIAIJ" 1333 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1334 { 1335 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1336 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1337 int ierr,shift = Aloc->indexshift; 1338 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1339 Mat B; 1340 PetscScalar *array; 1341 1342 PetscFunctionBegin; 1343 if (!matout && M != N) { 1344 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1345 } 1346 1347 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1348 1349 /* copy over the A part */ 1350 Aloc = (Mat_SeqAIJ*)a->A->data; 1351 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1352 row = a->rstart; 1353 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1354 for (i=0; i<m; i++) { 1355 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1356 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1357 } 1358 aj = Aloc->j; 1359 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1360 1361 /* copy over the B part */ 1362 Aloc = (Mat_SeqAIJ*)a->B->data; 1363 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1364 row = a->rstart; 1365 ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr); 1366 ct = cols; 1367 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1368 for (i=0; i<m; i++) { 1369 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1370 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1371 } 1372 ierr = PetscFree(ct);CHKERRQ(ierr); 1373 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1374 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1375 if (matout) { 1376 *matout = B; 1377 } else { 1378 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1379 } 1380 PetscFunctionReturn(0); 1381 } 1382 1383 #undef __FUNCT__ 1384 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1385 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1386 { 1387 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1388 Mat a = aij->A,b = aij->B; 1389 int ierr,s1,s2,s3; 1390 1391 PetscFunctionBegin; 1392 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1393 if (rr) { 1394 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1395 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1396 /* Overlap communication with computation. */ 1397 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1398 } 1399 if (ll) { 1400 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1401 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1402 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1403 } 1404 /* scale the diagonal block */ 1405 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1406 1407 if (rr) { 1408 /* Do a scatter end and then right scale the off-diagonal block */ 1409 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1410 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1411 } 1412 1413 PetscFunctionReturn(0); 1414 } 1415 1416 1417 #undef __FUNCT__ 1418 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1419 int MatPrintHelp_MPIAIJ(Mat A) 1420 { 1421 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1422 int ierr; 1423 1424 PetscFunctionBegin; 1425 if (!a->rank) { 1426 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1427 } 1428 PetscFunctionReturn(0); 1429 } 1430 1431 #undef __FUNCT__ 1432 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1433 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1434 { 1435 PetscFunctionBegin; 1436 *bs = 1; 1437 PetscFunctionReturn(0); 1438 } 1439 #undef __FUNCT__ 1440 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1441 int MatSetUnfactored_MPIAIJ(Mat A) 1442 { 1443 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1444 int ierr; 1445 1446 PetscFunctionBegin; 1447 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1448 PetscFunctionReturn(0); 1449 } 1450 1451 #undef __FUNCT__ 1452 #define __FUNCT__ "MatEqual_MPIAIJ" 1453 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1454 { 1455 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1456 Mat a,b,c,d; 1457 PetscTruth flg; 1458 int ierr; 1459 1460 PetscFunctionBegin; 1461 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1462 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1463 a = matA->A; b = matA->B; 1464 c = matB->A; d = matB->B; 1465 1466 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1467 if (flg == PETSC_TRUE) { 1468 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1469 } 1470 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1471 PetscFunctionReturn(0); 1472 } 1473 1474 #undef __FUNCT__ 1475 #define __FUNCT__ "MatCopy_MPIAIJ" 1476 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1477 { 1478 int ierr; 1479 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1480 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1481 PetscTruth flg; 1482 1483 PetscFunctionBegin; 1484 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1485 if (str != SAME_NONZERO_PATTERN || !flg) { 1486 /* because of the column compression in the off-processor part of the matrix a->B, 1487 the number of columns in a->B and b->B may be different, hence we cannot call 1488 the MatCopy() directly on the two parts. If need be, we can provide a more 1489 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1490 then copying the submatrices */ 1491 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1492 } else { 1493 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1494 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1495 } 1496 PetscFunctionReturn(0); 1497 } 1498 1499 #undef __FUNCT__ 1500 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1501 int MatSetUpPreallocation_MPIAIJ(Mat A) 1502 { 1503 int ierr; 1504 1505 PetscFunctionBegin; 1506 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1507 PetscFunctionReturn(0); 1508 } 1509 1510 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1511 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1512 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1513 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1514 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1515 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1516 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatFactorInfo*,Mat*); 1517 #endif 1518 1519 #include "petscblaslapack.h" 1520 extern int MatAXPY_SeqAIJ(PetscScalar *,Mat,Mat,MatStructure); 1521 #undef __FUNCT__ 1522 #define __FUNCT__ "MatAXPY_MPIAIJ" 1523 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1524 { 1525 int ierr,one=1,i; 1526 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1527 Mat_SeqAIJ *x,*y; 1528 1529 PetscFunctionBegin; 1530 if (str == SAME_NONZERO_PATTERN) { 1531 x = (Mat_SeqAIJ *)xx->A->data; 1532 y = (Mat_SeqAIJ *)yy->A->data; 1533 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1534 x = (Mat_SeqAIJ *)xx->B->data; 1535 y = (Mat_SeqAIJ *)yy->B->data; 1536 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1537 } else if (str == SUBSET_NONZERO_PATTERN) { 1538 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1539 1540 x = (Mat_SeqAIJ *)xx->B->data; 1541 y = (Mat_SeqAIJ *)yy->B->data; 1542 if (y->xtoy && y->XtoY != xx->B) { 1543 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1544 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1545 } 1546 if (!y->xtoy) { /* get xtoy */ 1547 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1548 y->XtoY = xx->B; 1549 } 1550 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1551 } else { 1552 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1553 } 1554 PetscFunctionReturn(0); 1555 } 1556 1557 /* -------------------------------------------------------------------*/ 1558 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1559 MatGetRow_MPIAIJ, 1560 MatRestoreRow_MPIAIJ, 1561 MatMult_MPIAIJ, 1562 MatMultAdd_MPIAIJ, 1563 MatMultTranspose_MPIAIJ, 1564 MatMultTransposeAdd_MPIAIJ, 1565 0, 1566 0, 1567 0, 1568 0, 1569 0, 1570 0, 1571 MatRelax_MPIAIJ, 1572 MatTranspose_MPIAIJ, 1573 MatGetInfo_MPIAIJ, 1574 MatEqual_MPIAIJ, 1575 MatGetDiagonal_MPIAIJ, 1576 MatDiagonalScale_MPIAIJ, 1577 MatNorm_MPIAIJ, 1578 MatAssemblyBegin_MPIAIJ, 1579 MatAssemblyEnd_MPIAIJ, 1580 0, 1581 MatSetOption_MPIAIJ, 1582 MatZeroEntries_MPIAIJ, 1583 MatZeroRows_MPIAIJ, 1584 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1585 MatLUFactorSymbolic_MPIAIJ_TFS, 1586 #else 1587 0, 1588 #endif 1589 0, 1590 0, 1591 0, 1592 MatSetUpPreallocation_MPIAIJ, 1593 0, 1594 0, 1595 0, 1596 0, 1597 MatDuplicate_MPIAIJ, 1598 0, 1599 0, 1600 0, 1601 0, 1602 MatAXPY_MPIAIJ, 1603 MatGetSubMatrices_MPIAIJ, 1604 MatIncreaseOverlap_MPIAIJ, 1605 MatGetValues_MPIAIJ, 1606 MatCopy_MPIAIJ, 1607 MatPrintHelp_MPIAIJ, 1608 MatScale_MPIAIJ, 1609 0, 1610 0, 1611 0, 1612 MatGetBlockSize_MPIAIJ, 1613 0, 1614 0, 1615 0, 1616 0, 1617 MatFDColoringCreate_MPIAIJ, 1618 0, 1619 MatSetUnfactored_MPIAIJ, 1620 0, 1621 0, 1622 MatGetSubMatrix_MPIAIJ, 1623 MatDestroy_MPIAIJ, 1624 MatView_MPIAIJ, 1625 MatGetPetscMaps_Petsc, 1626 0, 1627 0, 1628 0, 1629 0, 1630 0, 1631 0, 1632 0, 1633 0, 1634 MatSetColoring_MPIAIJ, 1635 MatSetValuesAdic_MPIAIJ, 1636 MatSetValuesAdifor_MPIAIJ 1637 }; 1638 1639 /* ----------------------------------------------------------------------------------------*/ 1640 1641 EXTERN_C_BEGIN 1642 #undef __FUNCT__ 1643 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1644 int MatStoreValues_MPIAIJ(Mat mat) 1645 { 1646 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1647 int ierr; 1648 1649 PetscFunctionBegin; 1650 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1651 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1652 PetscFunctionReturn(0); 1653 } 1654 EXTERN_C_END 1655 1656 EXTERN_C_BEGIN 1657 #undef __FUNCT__ 1658 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1659 int MatRetrieveValues_MPIAIJ(Mat mat) 1660 { 1661 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1662 int ierr; 1663 1664 PetscFunctionBegin; 1665 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1666 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1667 PetscFunctionReturn(0); 1668 } 1669 EXTERN_C_END 1670 1671 #include "petscpc.h" 1672 EXTERN_C_BEGIN 1673 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1674 EXTERN_C_END 1675 1676 EXTERN_C_BEGIN 1677 #undef __FUNCT__ 1678 #define __FUNCT__ "MatCreate_MPIAIJ" 1679 int MatCreate_MPIAIJ(Mat B) 1680 { 1681 Mat_MPIAIJ *b; 1682 int ierr,i,size; 1683 1684 PetscFunctionBegin; 1685 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1686 1687 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1688 B->data = (void*)b; 1689 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1690 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1691 B->factor = 0; 1692 B->assembled = PETSC_FALSE; 1693 B->mapping = 0; 1694 1695 B->insertmode = NOT_SET_VALUES; 1696 b->size = size; 1697 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1698 1699 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1700 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1701 1702 /* the information in the maps duplicates the information computed below, eventually 1703 we should remove the duplicate information that is not contained in the maps */ 1704 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1705 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1706 1707 /* build local table of row and column ownerships */ 1708 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1709 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1710 b->cowners = b->rowners + b->size + 2; 1711 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1712 b->rowners[0] = 0; 1713 for (i=2; i<=b->size; i++) { 1714 b->rowners[i] += b->rowners[i-1]; 1715 } 1716 b->rstart = b->rowners[b->rank]; 1717 b->rend = b->rowners[b->rank+1]; 1718 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1719 b->cowners[0] = 0; 1720 for (i=2; i<=b->size; i++) { 1721 b->cowners[i] += b->cowners[i-1]; 1722 } 1723 b->cstart = b->cowners[b->rank]; 1724 b->cend = b->cowners[b->rank+1]; 1725 1726 /* build cache for off array entries formed */ 1727 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1728 b->donotstash = PETSC_FALSE; 1729 b->colmap = 0; 1730 b->garray = 0; 1731 b->roworiented = PETSC_TRUE; 1732 1733 /* stuff used for matrix vector multiply */ 1734 b->lvec = PETSC_NULL; 1735 b->Mvctx = PETSC_NULL; 1736 1737 /* stuff for MatGetRow() */ 1738 b->rowindices = 0; 1739 b->rowvalues = 0; 1740 b->getrowactive = PETSC_FALSE; 1741 1742 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1743 "MatStoreValues_MPIAIJ", 1744 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1745 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1746 "MatRetrieveValues_MPIAIJ", 1747 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1748 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1749 "MatGetDiagonalBlock_MPIAIJ", 1750 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1751 1752 PetscFunctionReturn(0); 1753 } 1754 EXTERN_C_END 1755 1756 #undef __FUNCT__ 1757 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1758 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1759 { 1760 Mat mat; 1761 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1762 int ierr; 1763 1764 PetscFunctionBegin; 1765 *newmat = 0; 1766 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1767 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1768 a = (Mat_MPIAIJ*)mat->data; 1769 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1770 mat->factor = matin->factor; 1771 mat->assembled = PETSC_TRUE; 1772 mat->insertmode = NOT_SET_VALUES; 1773 mat->preallocated = PETSC_TRUE; 1774 1775 a->rstart = oldmat->rstart; 1776 a->rend = oldmat->rend; 1777 a->cstart = oldmat->cstart; 1778 a->cend = oldmat->cend; 1779 a->size = oldmat->size; 1780 a->rank = oldmat->rank; 1781 a->donotstash = oldmat->donotstash; 1782 a->roworiented = oldmat->roworiented; 1783 a->rowindices = 0; 1784 a->rowvalues = 0; 1785 a->getrowactive = PETSC_FALSE; 1786 1787 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1788 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1789 if (oldmat->colmap) { 1790 #if defined (PETSC_USE_CTABLE) 1791 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1792 #else 1793 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1794 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1795 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1796 #endif 1797 } else a->colmap = 0; 1798 if (oldmat->garray) { 1799 int len; 1800 len = oldmat->B->n; 1801 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1802 PetscLogObjectMemory(mat,len*sizeof(int)); 1803 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1804 } else a->garray = 0; 1805 1806 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1807 PetscLogObjectParent(mat,a->lvec); 1808 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1809 PetscLogObjectParent(mat,a->Mvctx); 1810 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1811 PetscLogObjectParent(mat,a->A); 1812 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1813 PetscLogObjectParent(mat,a->B); 1814 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1815 *newmat = mat; 1816 PetscFunctionReturn(0); 1817 } 1818 1819 #include "petscsys.h" 1820 1821 EXTERN_C_BEGIN 1822 #undef __FUNCT__ 1823 #define __FUNCT__ "MatLoad_MPIAIJ" 1824 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1825 { 1826 Mat A; 1827 PetscScalar *vals,*svals; 1828 MPI_Comm comm = ((PetscObject)viewer)->comm; 1829 MPI_Status status; 1830 int i,nz,ierr,j,rstart,rend,fd; 1831 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1832 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1833 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1834 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_SUPERLUDIST) 1835 PetscTruth flag; 1836 #endif 1837 1838 PetscFunctionBegin; 1839 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1840 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1841 if (!rank) { 1842 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1843 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1844 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1845 if (header[3] < 0) { 1846 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1847 } 1848 } 1849 1850 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1851 M = header[1]; N = header[2]; 1852 /* determine ownership of all rows */ 1853 m = M/size + ((M % size) > rank); 1854 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1855 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1856 rowners[0] = 0; 1857 for (i=2; i<=size; i++) { 1858 rowners[i] += rowners[i-1]; 1859 } 1860 rstart = rowners[rank]; 1861 rend = rowners[rank+1]; 1862 1863 /* distribute row lengths to all processors */ 1864 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1865 offlens = ourlens + (rend-rstart); 1866 if (!rank) { 1867 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1868 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1869 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1870 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1871 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1872 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1873 } else { 1874 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1875 } 1876 1877 if (!rank) { 1878 /* calculate the number of nonzeros on each processor */ 1879 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1880 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1881 for (i=0; i<size; i++) { 1882 for (j=rowners[i]; j< rowners[i+1]; j++) { 1883 procsnz[i] += rowlengths[j]; 1884 } 1885 } 1886 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1887 1888 /* determine max buffer needed and allocate it */ 1889 maxnz = 0; 1890 for (i=0; i<size; i++) { 1891 maxnz = PetscMax(maxnz,procsnz[i]); 1892 } 1893 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1894 1895 /* read in my part of the matrix column indices */ 1896 nz = procsnz[0]; 1897 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1898 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1899 1900 /* read in every one elses and ship off */ 1901 for (i=1; i<size; i++) { 1902 nz = procsnz[i]; 1903 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1904 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1905 } 1906 ierr = PetscFree(cols);CHKERRQ(ierr); 1907 } else { 1908 /* determine buffer space needed for message */ 1909 nz = 0; 1910 for (i=0; i<m; i++) { 1911 nz += ourlens[i]; 1912 } 1913 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1914 1915 /* receive message of column indices*/ 1916 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1917 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1918 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1919 } 1920 1921 /* determine column ownership if matrix is not square */ 1922 if (N != M) { 1923 n = N/size + ((N % size) > rank); 1924 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1925 cstart = cend - n; 1926 } else { 1927 cstart = rstart; 1928 cend = rend; 1929 n = cend - cstart; 1930 } 1931 1932 /* loop over local rows, determining number of off diagonal entries */ 1933 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1934 jj = 0; 1935 for (i=0; i<m; i++) { 1936 for (j=0; j<ourlens[i]; j++) { 1937 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1938 jj++; 1939 } 1940 } 1941 1942 /* create our matrix */ 1943 for (i=0; i<m; i++) { 1944 ourlens[i] -= offlens[i]; 1945 } 1946 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1947 A = *newmat; 1948 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1949 for (i=0; i<m; i++) { 1950 ourlens[i] += offlens[i]; 1951 } 1952 1953 if (!rank) { 1954 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1955 1956 /* read in my part of the matrix numerical values */ 1957 nz = procsnz[0]; 1958 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1959 1960 /* insert into matrix */ 1961 jj = rstart; 1962 smycols = mycols; 1963 svals = vals; 1964 for (i=0; i<m; i++) { 1965 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1966 smycols += ourlens[i]; 1967 svals += ourlens[i]; 1968 jj++; 1969 } 1970 1971 /* read in other processors and ship out */ 1972 for (i=1; i<size; i++) { 1973 nz = procsnz[i]; 1974 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1975 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1976 } 1977 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1978 } else { 1979 /* receive numeric values */ 1980 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1981 1982 /* receive message of values*/ 1983 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1984 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1985 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1986 1987 /* insert into matrix */ 1988 jj = rstart; 1989 smycols = mycols; 1990 svals = vals; 1991 for (i=0; i<m; i++) { 1992 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1993 smycols += ourlens[i]; 1994 svals += ourlens[i]; 1995 jj++; 1996 } 1997 } 1998 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1999 ierr = PetscFree(vals);CHKERRQ(ierr); 2000 ierr = PetscFree(mycols);CHKERRQ(ierr); 2001 ierr = PetscFree(rowners);CHKERRQ(ierr); 2002 2003 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2004 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2005 #if defined(PETSC_HAVE_SPOOLES) 2006 ierr = PetscOptionsHasName(A->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr); 2007 if (flag) { 2008 if (size == 1) { 2009 ierr = MatUseSpooles_SeqAIJ(A);CHKERRQ(ierr); 2010 } else { 2011 ierr = MatUseSpooles_MPIAIJ(A);CHKERRQ(ierr); 2012 } 2013 } 2014 #endif 2015 #if defined(PETSC_HAVE_SUPERLUDIST) 2016 ierr = PetscOptionsHasName(A->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 2017 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(A);CHKERRQ(ierr); } 2018 #endif 2019 PetscFunctionReturn(0); 2020 } 2021 EXTERN_C_END 2022 2023 #undef __FUNCT__ 2024 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2025 /* 2026 Not great since it makes two copies of the submatrix, first an SeqAIJ 2027 in local and then by concatenating the local matrices the end result. 2028 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2029 */ 2030 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2031 { 2032 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2033 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2034 Mat *local,M,Mreuse; 2035 PetscScalar *vwork,*aa; 2036 MPI_Comm comm = mat->comm; 2037 Mat_SeqAIJ *aij; 2038 2039 2040 PetscFunctionBegin; 2041 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2042 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2043 2044 if (call == MAT_REUSE_MATRIX) { 2045 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2046 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2047 local = &Mreuse; 2048 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2049 } else { 2050 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2051 Mreuse = *local; 2052 ierr = PetscFree(local);CHKERRQ(ierr); 2053 } 2054 2055 /* 2056 m - number of local rows 2057 n - number of columns (same on all processors) 2058 rstart - first row in new global matrix generated 2059 */ 2060 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2061 if (call == MAT_INITIAL_MATRIX) { 2062 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2063 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 2064 ii = aij->i; 2065 jj = aij->j; 2066 2067 /* 2068 Determine the number of non-zeros in the diagonal and off-diagonal 2069 portions of the matrix in order to do correct preallocation 2070 */ 2071 2072 /* first get start and end of "diagonal" columns */ 2073 if (csize == PETSC_DECIDE) { 2074 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2075 if (mglobal == n) { /* square matrix */ 2076 nlocal = m; 2077 } else { 2078 nlocal = n/size + ((n % size) > rank); 2079 } 2080 } else { 2081 nlocal = csize; 2082 } 2083 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2084 rstart = rend - nlocal; 2085 if (rank == size - 1 && rend != n) { 2086 SETERRQ(1,"Local column sizes do not add up to total number of columns"); 2087 } 2088 2089 /* next, compute all the lengths */ 2090 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2091 olens = dlens + m; 2092 for (i=0; i<m; i++) { 2093 jend = ii[i+1] - ii[i]; 2094 olen = 0; 2095 dlen = 0; 2096 for (j=0; j<jend; j++) { 2097 if (*jj < rstart || *jj >= rend) olen++; 2098 else dlen++; 2099 jj++; 2100 } 2101 olens[i] = olen; 2102 dlens[i] = dlen; 2103 } 2104 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2105 ierr = PetscFree(dlens);CHKERRQ(ierr); 2106 } else { 2107 int ml,nl; 2108 2109 M = *newmat; 2110 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2111 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2112 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2113 /* 2114 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2115 rather than the slower MatSetValues(). 2116 */ 2117 M->was_assembled = PETSC_TRUE; 2118 M->assembled = PETSC_FALSE; 2119 } 2120 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2121 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2122 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 2123 ii = aij->i; 2124 jj = aij->j; 2125 aa = aij->a; 2126 for (i=0; i<m; i++) { 2127 row = rstart + i; 2128 nz = ii[i+1] - ii[i]; 2129 cwork = jj; jj += nz; 2130 vwork = aa; aa += nz; 2131 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2132 } 2133 2134 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2135 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2136 *newmat = M; 2137 2138 /* save submatrix used in processor for next request */ 2139 if (call == MAT_INITIAL_MATRIX) { 2140 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2141 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2142 } 2143 2144 PetscFunctionReturn(0); 2145 } 2146 2147 #undef __FUNCT__ 2148 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2149 /*@C 2150 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2151 (the default parallel PETSc format). For good matrix assembly performance 2152 the user should preallocate the matrix storage by setting the parameters 2153 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2154 performance can be increased by more than a factor of 50. 2155 2156 Collective on MPI_Comm 2157 2158 Input Parameters: 2159 + A - the matrix 2160 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2161 (same value is used for all local rows) 2162 . d_nnz - array containing the number of nonzeros in the various rows of the 2163 DIAGONAL portion of the local submatrix (possibly different for each row) 2164 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2165 The size of this array is equal to the number of local rows, i.e 'm'. 2166 You must leave room for the diagonal entry even if it is zero. 2167 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2168 submatrix (same value is used for all local rows). 2169 - o_nnz - array containing the number of nonzeros in the various rows of the 2170 OFF-DIAGONAL portion of the local submatrix (possibly different for 2171 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2172 structure. The size of this array is equal to the number 2173 of local rows, i.e 'm'. 2174 2175 The AIJ format (also called the Yale sparse matrix format or 2176 compressed row storage), is fully compatible with standard Fortran 77 2177 storage. That is, the stored row and column indices can begin at 2178 either one (as in Fortran) or zero. See the users manual for details. 2179 2180 The user MUST specify either the local or global matrix dimensions 2181 (possibly both). 2182 2183 The parallel matrix is partitioned such that the first m0 rows belong to 2184 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2185 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2186 2187 The DIAGONAL portion of the local submatrix of a processor can be defined 2188 as the submatrix which is obtained by extraction the part corresponding 2189 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2190 first row that belongs to the processor, and r2 is the last row belonging 2191 to the this processor. This is a square mxm matrix. The remaining portion 2192 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2193 2194 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2195 2196 By default, this format uses inodes (identical nodes) when possible. 2197 We search for consecutive rows with the same nonzero structure, thereby 2198 reusing matrix information to achieve increased efficiency. 2199 2200 Options Database Keys: 2201 + -mat_aij_no_inode - Do not use inodes 2202 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2203 - -mat_aij_oneindex - Internally use indexing starting at 1 2204 rather than 0. Note that when calling MatSetValues(), 2205 the user still MUST index entries starting at 0! 2206 2207 Example usage: 2208 2209 Consider the following 8x8 matrix with 34 non-zero values, that is 2210 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2211 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2212 as follows: 2213 2214 .vb 2215 1 2 0 | 0 3 0 | 0 4 2216 Proc0 0 5 6 | 7 0 0 | 8 0 2217 9 0 10 | 11 0 0 | 12 0 2218 ------------------------------------- 2219 13 0 14 | 15 16 17 | 0 0 2220 Proc1 0 18 0 | 19 20 21 | 0 0 2221 0 0 0 | 22 23 0 | 24 0 2222 ------------------------------------- 2223 Proc2 25 26 27 | 0 0 28 | 29 0 2224 30 0 0 | 31 32 33 | 0 34 2225 .ve 2226 2227 This can be represented as a collection of submatrices as: 2228 2229 .vb 2230 A B C 2231 D E F 2232 G H I 2233 .ve 2234 2235 Where the submatrices A,B,C are owned by proc0, D,E,F are 2236 owned by proc1, G,H,I are owned by proc2. 2237 2238 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2239 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2240 The 'M','N' parameters are 8,8, and have the same values on all procs. 2241 2242 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2243 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2244 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2245 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2246 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2247 matrix, ans [DF] as another SeqAIJ matrix. 2248 2249 When d_nz, o_nz parameters are specified, d_nz storage elements are 2250 allocated for every row of the local diagonal submatrix, and o_nz 2251 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2252 One way to choose d_nz and o_nz is to use the max nonzerors per local 2253 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2254 In this case, the values of d_nz,o_nz are: 2255 .vb 2256 proc0 : dnz = 2, o_nz = 2 2257 proc1 : dnz = 3, o_nz = 2 2258 proc2 : dnz = 1, o_nz = 4 2259 .ve 2260 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2261 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2262 for proc3. i.e we are using 12+15+10=37 storage locations to store 2263 34 values. 2264 2265 When d_nnz, o_nnz parameters are specified, the storage is specified 2266 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2267 In the above case the values for d_nnz,o_nnz are: 2268 .vb 2269 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2270 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2271 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2272 .ve 2273 Here the space allocated is sum of all the above values i.e 34, and 2274 hence pre-allocation is perfect. 2275 2276 Level: intermediate 2277 2278 .keywords: matrix, aij, compressed row, sparse, parallel 2279 2280 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2281 @*/ 2282 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2283 { 2284 Mat_MPIAIJ *b; 2285 int ierr,i; 2286 PetscTruth flg2; 2287 2288 PetscFunctionBegin; 2289 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr); 2290 if (!flg2) PetscFunctionReturn(0); 2291 B->preallocated = PETSC_TRUE; 2292 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2293 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2294 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2295 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2296 if (d_nnz) { 2297 for (i=0; i<B->m; i++) { 2298 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 2299 } 2300 } 2301 if (o_nnz) { 2302 for (i=0; i<B->m; i++) { 2303 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 2304 } 2305 } 2306 b = (Mat_MPIAIJ*)B->data; 2307 2308 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2309 PetscLogObjectParent(B,b->A); 2310 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2311 PetscLogObjectParent(B,b->B); 2312 2313 PetscFunctionReturn(0); 2314 } 2315 2316 #undef __FUNCT__ 2317 #define __FUNCT__ "MatCreateMPIAIJ" 2318 /*@C 2319 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2320 (the default parallel PETSc format). For good matrix assembly performance 2321 the user should preallocate the matrix storage by setting the parameters 2322 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2323 performance can be increased by more than a factor of 50. 2324 2325 Collective on MPI_Comm 2326 2327 Input Parameters: 2328 + comm - MPI communicator 2329 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2330 This value should be the same as the local size used in creating the 2331 y vector for the matrix-vector product y = Ax. 2332 . n - This value should be the same as the local size used in creating the 2333 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2334 calculated if N is given) For square matrices n is almost always m. 2335 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2336 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2337 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2338 (same value is used for all local rows) 2339 . d_nnz - array containing the number of nonzeros in the various rows of the 2340 DIAGONAL portion of the local submatrix (possibly different for each row) 2341 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2342 The size of this array is equal to the number of local rows, i.e 'm'. 2343 You must leave room for the diagonal entry even if it is zero. 2344 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2345 submatrix (same value is used for all local rows). 2346 - o_nnz - array containing the number of nonzeros in the various rows of the 2347 OFF-DIAGONAL portion of the local submatrix (possibly different for 2348 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2349 structure. The size of this array is equal to the number 2350 of local rows, i.e 'm'. 2351 2352 Output Parameter: 2353 . A - the matrix 2354 2355 Notes: 2356 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2357 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2358 storage requirements for this matrix. 2359 2360 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2361 processor than it must be used on all processors that share the object for 2362 that argument. 2363 2364 The AIJ format (also called the Yale sparse matrix format or 2365 compressed row storage), is fully compatible with standard Fortran 77 2366 storage. That is, the stored row and column indices can begin at 2367 either one (as in Fortran) or zero. See the users manual for details. 2368 2369 The user MUST specify either the local or global matrix dimensions 2370 (possibly both). 2371 2372 The parallel matrix is partitioned such that the first m0 rows belong to 2373 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2374 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2375 2376 The DIAGONAL portion of the local submatrix of a processor can be defined 2377 as the submatrix which is obtained by extraction the part corresponding 2378 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2379 first row that belongs to the processor, and r2 is the last row belonging 2380 to the this processor. This is a square mxm matrix. The remaining portion 2381 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2382 2383 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2384 2385 By default, this format uses inodes (identical nodes) when possible. 2386 We search for consecutive rows with the same nonzero structure, thereby 2387 reusing matrix information to achieve increased efficiency. 2388 2389 Options Database Keys: 2390 + -mat_aij_no_inode - Do not use inodes 2391 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2392 - -mat_aij_oneindex - Internally use indexing starting at 1 2393 rather than 0. Note that when calling MatSetValues(), 2394 the user still MUST index entries starting at 0! 2395 2396 2397 Example usage: 2398 2399 Consider the following 8x8 matrix with 34 non-zero values, that is 2400 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2401 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2402 as follows: 2403 2404 .vb 2405 1 2 0 | 0 3 0 | 0 4 2406 Proc0 0 5 6 | 7 0 0 | 8 0 2407 9 0 10 | 11 0 0 | 12 0 2408 ------------------------------------- 2409 13 0 14 | 15 16 17 | 0 0 2410 Proc1 0 18 0 | 19 20 21 | 0 0 2411 0 0 0 | 22 23 0 | 24 0 2412 ------------------------------------- 2413 Proc2 25 26 27 | 0 0 28 | 29 0 2414 30 0 0 | 31 32 33 | 0 34 2415 .ve 2416 2417 This can be represented as a collection of submatrices as: 2418 2419 .vb 2420 A B C 2421 D E F 2422 G H I 2423 .ve 2424 2425 Where the submatrices A,B,C are owned by proc0, D,E,F are 2426 owned by proc1, G,H,I are owned by proc2. 2427 2428 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2429 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2430 The 'M','N' parameters are 8,8, and have the same values on all procs. 2431 2432 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2433 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2434 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2435 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2436 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2437 matrix, ans [DF] as another SeqAIJ matrix. 2438 2439 When d_nz, o_nz parameters are specified, d_nz storage elements are 2440 allocated for every row of the local diagonal submatrix, and o_nz 2441 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2442 One way to choose d_nz and o_nz is to use the max nonzerors per local 2443 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2444 In this case, the values of d_nz,o_nz are: 2445 .vb 2446 proc0 : dnz = 2, o_nz = 2 2447 proc1 : dnz = 3, o_nz = 2 2448 proc2 : dnz = 1, o_nz = 4 2449 .ve 2450 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2451 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2452 for proc3. i.e we are using 12+15+10=37 storage locations to store 2453 34 values. 2454 2455 When d_nnz, o_nnz parameters are specified, the storage is specified 2456 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2457 In the above case the values for d_nnz,o_nnz are: 2458 .vb 2459 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2460 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2461 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2462 .ve 2463 Here the space allocated is sum of all the above values i.e 34, and 2464 hence pre-allocation is perfect. 2465 2466 Level: intermediate 2467 2468 .keywords: matrix, aij, compressed row, sparse, parallel 2469 2470 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2471 @*/ 2472 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2473 { 2474 int ierr,size; 2475 2476 PetscFunctionBegin; 2477 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2478 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2479 if (size > 1) { 2480 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2481 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2482 } else { 2483 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2484 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2485 } 2486 PetscFunctionReturn(0); 2487 } 2488 2489 #undef __FUNCT__ 2490 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2491 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2492 { 2493 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2494 PetscFunctionBegin; 2495 *Ad = a->A; 2496 *Ao = a->B; 2497 *colmap = a->garray; 2498 PetscFunctionReturn(0); 2499 } 2500 2501 #undef __FUNCT__ 2502 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2503 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2504 { 2505 int ierr,i; 2506 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2507 2508 PetscFunctionBegin; 2509 if (coloring->ctype == IS_COLORING_LOCAL) { 2510 ISColoringValue *allcolors,*colors; 2511 ISColoring ocoloring; 2512 2513 /* set coloring for diagonal portion */ 2514 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2515 2516 /* set coloring for off-diagonal portion */ 2517 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2518 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2519 for (i=0; i<a->B->n; i++) { 2520 colors[i] = allcolors[a->garray[i]]; 2521 } 2522 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2523 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2524 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2525 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2526 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2527 ISColoringValue *colors; 2528 int *larray; 2529 ISColoring ocoloring; 2530 2531 /* set coloring for diagonal portion */ 2532 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2533 for (i=0; i<a->A->n; i++) { 2534 larray[i] = i + a->cstart; 2535 } 2536 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2537 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2538 for (i=0; i<a->A->n; i++) { 2539 colors[i] = coloring->colors[larray[i]]; 2540 } 2541 ierr = PetscFree(larray);CHKERRQ(ierr); 2542 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2543 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2544 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2545 2546 /* set coloring for off-diagonal portion */ 2547 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2548 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2549 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2550 for (i=0; i<a->B->n; i++) { 2551 colors[i] = coloring->colors[larray[i]]; 2552 } 2553 ierr = PetscFree(larray);CHKERRQ(ierr); 2554 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2555 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2556 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2557 } else { 2558 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2559 } 2560 2561 PetscFunctionReturn(0); 2562 } 2563 2564 #undef __FUNCT__ 2565 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2566 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2567 { 2568 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2569 int ierr; 2570 2571 PetscFunctionBegin; 2572 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2573 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2574 PetscFunctionReturn(0); 2575 } 2576 2577 #undef __FUNCT__ 2578 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2579 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2580 { 2581 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2582 int ierr; 2583 2584 PetscFunctionBegin; 2585 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2586 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2587 PetscFunctionReturn(0); 2588 } 2589 2590 #undef __FUNCT__ 2591 #define __FUNCT__ "MatMerge" 2592 /*@C 2593 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2594 matrices from each processor 2595 2596 Collective on MPI_Comm 2597 2598 Input Parameters: 2599 + comm - the communicators the parallel matrix will live on 2600 - inmat - the input sequential matrices 2601 2602 Output Parameter: 2603 . outmat - the parallel matrix generated 2604 2605 Level: advanced 2606 2607 Notes: The number of columns of the matrix in EACH of the seperate files 2608 MUST be the same. 2609 2610 @*/ 2611 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat) 2612 { 2613 int ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz; 2614 PetscScalar *values; 2615 PetscMap columnmap,rowmap; 2616 2617 PetscFunctionBegin; 2618 2619 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2620 2621 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2622 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2623 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2624 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2625 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2626 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2627 2628 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2629 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2630 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2631 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2632 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2633 2634 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2635 for (i=0;i<m;i++) { 2636 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2637 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2638 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2639 } 2640 ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr); 2641 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2642 2643 for (i=0;i<m;i++) { 2644 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2645 I = i + rstart; 2646 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2647 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2648 } 2649 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2650 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2651 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2652 2653 PetscFunctionReturn(0); 2654 } 2655 2656 #undef __FUNCT__ 2657 #define __FUNCT__ "MatFileSplit" 2658 int MatFileSplit(Mat A,char *outfile) 2659 { 2660 int ierr,rank,len,m,N,i,rstart,*indx,nnz; 2661 PetscViewer out; 2662 char *name; 2663 Mat B; 2664 PetscScalar *values; 2665 2666 PetscFunctionBegin; 2667 2668 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2669 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2670 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr); 2671 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2672 for (i=0;i<m;i++) { 2673 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2674 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2675 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2676 } 2677 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2678 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2679 2680 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2681 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2682 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2683 sprintf(name,"%s.%d",outfile,rank); 2684 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr); 2685 ierr = PetscFree(name); 2686 ierr = MatView(B,out);CHKERRQ(ierr); 2687 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2688 ierr = MatDestroy(B);CHKERRQ(ierr); 2689 PetscFunctionReturn(0); 2690 } 2691