1 2 #include "src/mat/impls/aij/mpi/mpiaij.h" 3 #include "src/inline/spops.h" 4 5 /* 6 Local utility routine that creates a mapping from the global column 7 number to the local number in the off-diagonal part of the local 8 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 9 a slightly higher hash table cost; without it it is not scalable (each processor 10 has an order N integer array but is fast to acess. 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 14 int CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 17 int n = aij->B->n,i,ierr; 18 19 PetscFunctionBegin; 20 #if defined (PETSC_USE_CTABLE) 21 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 22 for (i=0; i<n; i++){ 23 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 24 } 25 #else 26 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 27 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 28 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 29 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 30 #endif 31 PetscFunctionReturn(0); 32 } 33 34 #define CHUNKSIZE 15 35 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 36 { \ 37 \ 38 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 39 rmax = aimax[row]; nrow = ailen[row]; \ 40 col1 = col - shift; \ 41 \ 42 low = 0; high = nrow; \ 43 while (high-low > 5) { \ 44 t = (low+high)/2; \ 45 if (rp[t] > col) high = t; \ 46 else low = t; \ 47 } \ 48 for (_i=low; _i<high; _i++) { \ 49 if (rp[_i] > col1) break; \ 50 if (rp[_i] == col1) { \ 51 if (addv == ADD_VALUES) ap[_i] += value; \ 52 else ap[_i] = value; \ 53 goto a_noinsert; \ 54 } \ 55 } \ 56 if (nonew == 1) goto a_noinsert; \ 57 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 58 if (nrow >= rmax) { \ 59 /* there is no extra room in row, therefore enlarge */ \ 60 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 61 PetscScalar *new_a; \ 62 \ 63 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 64 \ 65 /* malloc new storage space */ \ 66 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 67 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 68 new_j = (int*)(new_a + new_nz); \ 69 new_i = new_j + new_nz; \ 70 \ 71 /* copy over old data into new slots */ \ 72 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 73 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 74 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 75 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 76 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 77 len*sizeof(int));CHKERRQ(ierr); \ 78 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 79 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 80 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 81 /* free up old matrix storage */ \ 82 \ 83 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 84 if (!a->singlemalloc) { \ 85 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 86 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 87 } \ 88 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 89 a->singlemalloc = PETSC_TRUE; \ 90 \ 91 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 92 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 93 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 94 a->maxnz += CHUNKSIZE; \ 95 a->reallocs++; \ 96 } \ 97 N = nrow++ - 1; a->nz++; \ 98 /* shift up all the later entries in this row */ \ 99 for (ii=N; ii>=_i; ii--) { \ 100 rp[ii+1] = rp[ii]; \ 101 ap[ii+1] = ap[ii]; \ 102 } \ 103 rp[_i] = col1; \ 104 ap[_i] = value; \ 105 a_noinsert: ; \ 106 ailen[row] = nrow; \ 107 } 108 109 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 110 { \ 111 \ 112 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 113 rmax = bimax[row]; nrow = bilen[row]; \ 114 col1 = col - shift; \ 115 \ 116 low = 0; high = nrow; \ 117 while (high-low > 5) { \ 118 t = (low+high)/2; \ 119 if (rp[t] > col) high = t; \ 120 else low = t; \ 121 } \ 122 for (_i=low; _i<high; _i++) { \ 123 if (rp[_i] > col1) break; \ 124 if (rp[_i] == col1) { \ 125 if (addv == ADD_VALUES) ap[_i] += value; \ 126 else ap[_i] = value; \ 127 goto b_noinsert; \ 128 } \ 129 } \ 130 if (nonew == 1) goto b_noinsert; \ 131 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 132 if (nrow >= rmax) { \ 133 /* there is no extra room in row, therefore enlarge */ \ 134 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 135 PetscScalar *new_a; \ 136 \ 137 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 138 \ 139 /* malloc new storage space */ \ 140 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 141 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 142 new_j = (int*)(new_a + new_nz); \ 143 new_i = new_j + new_nz; \ 144 \ 145 /* copy over old data into new slots */ \ 146 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 147 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 148 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 149 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 150 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 151 len*sizeof(int));CHKERRQ(ierr); \ 152 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 153 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 154 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 155 /* free up old matrix storage */ \ 156 \ 157 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 158 if (!b->singlemalloc) { \ 159 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 160 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 161 } \ 162 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 163 b->singlemalloc = PETSC_TRUE; \ 164 \ 165 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 166 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 167 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 168 b->maxnz += CHUNKSIZE; \ 169 b->reallocs++; \ 170 } \ 171 N = nrow++ - 1; b->nz++; \ 172 /* shift up all the later entries in this row */ \ 173 for (ii=N; ii>=_i; ii--) { \ 174 rp[ii+1] = rp[ii]; \ 175 ap[ii+1] = ap[ii]; \ 176 } \ 177 rp[_i] = col1; \ 178 ap[_i] = value; \ 179 b_noinsert: ; \ 180 bilen[row] = nrow; \ 181 } 182 183 #undef __FUNCT__ 184 #define __FUNCT__ "MatSetValues_MPIAIJ" 185 int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscScalar value; 189 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 190 int cstart = aij->cstart,cend = aij->cend,row,col; 191 PetscTruth roworiented = aij->roworiented; 192 193 /* Some Variables required in the macro */ 194 Mat A = aij->A; 195 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 196 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 197 PetscScalar *aa = a->a; 198 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 199 Mat B = aij->B; 200 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 201 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 202 PetscScalar *ba = b->a; 203 204 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 205 int nonew = a->nonew,shift=0; 206 PetscScalar *ap; 207 208 PetscFunctionBegin; 209 for (i=0; i<m; i++) { 210 if (im[i] < 0) continue; 211 #if defined(PETSC_USE_BOPT_g) 212 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 213 #endif 214 if (im[i] >= rstart && im[i] < rend) { 215 row = im[i] - rstart; 216 for (j=0; j<n; j++) { 217 if (in[j] >= cstart && in[j] < cend){ 218 col = in[j] - cstart; 219 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 220 if (ignorezeroentries && value == 0.0) continue; 221 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 222 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 223 } else if (in[j] < 0) continue; 224 #if defined(PETSC_USE_BOPT_g) 225 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 226 #endif 227 else { 228 if (mat->was_assembled) { 229 if (!aij->colmap) { 230 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 231 } 232 #if defined (PETSC_USE_CTABLE) 233 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 234 col--; 235 #else 236 col = aij->colmap[in[j]] - 1; 237 #endif 238 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 239 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 240 col = in[j]; 241 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 242 B = aij->B; 243 b = (Mat_SeqAIJ*)B->data; 244 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 245 ba = b->a; 246 } 247 } else col = in[j]; 248 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 249 if (ignorezeroentries && value == 0.0) continue; 250 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 251 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 252 } 253 } 254 } else { 255 if (!aij->donotstash) { 256 if (roworiented) { 257 if (ignorezeroentries && v[i*n] == 0.0) continue; 258 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 259 } else { 260 if (ignorezeroentries && v[i] == 0.0) continue; 261 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 262 } 263 } 264 } 265 } 266 PetscFunctionReturn(0); 267 } 268 269 #undef __FUNCT__ 270 #define __FUNCT__ "MatGetValues_MPIAIJ" 271 int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 272 { 273 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 274 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 275 int cstart = aij->cstart,cend = aij->cend,row,col; 276 277 PetscFunctionBegin; 278 for (i=0; i<m; i++) { 279 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 280 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 281 if (idxm[i] >= rstart && idxm[i] < rend) { 282 row = idxm[i] - rstart; 283 for (j=0; j<n; j++) { 284 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 285 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 286 if (idxn[j] >= cstart && idxn[j] < cend){ 287 col = idxn[j] - cstart; 288 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 289 } else { 290 if (!aij->colmap) { 291 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 292 } 293 #if defined (PETSC_USE_CTABLE) 294 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 295 col --; 296 #else 297 col = aij->colmap[idxn[j]] - 1; 298 #endif 299 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 300 else { 301 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 302 } 303 } 304 } 305 } else { 306 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 307 } 308 } 309 PetscFunctionReturn(0); 310 } 311 312 #undef __FUNCT__ 313 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 314 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 315 { 316 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 317 int ierr,nstash,reallocs; 318 InsertMode addv; 319 320 PetscFunctionBegin; 321 if (aij->donotstash) { 322 PetscFunctionReturn(0); 323 } 324 325 /* make sure all processors are either in INSERTMODE or ADDMODE */ 326 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 327 if (addv == (ADD_VALUES|INSERT_VALUES)) { 328 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 329 } 330 mat->insertmode = addv; /* in case this processor had no cache */ 331 332 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 333 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 334 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 335 PetscFunctionReturn(0); 336 } 337 338 339 #undef __FUNCT__ 340 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 341 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 342 { 343 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 344 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 345 int i,j,rstart,ncols,n,ierr,flg; 346 int *row,*col,other_disassembled; 347 PetscScalar *val; 348 InsertMode addv = mat->insertmode; 349 350 PetscFunctionBegin; 351 if (!aij->donotstash) { 352 while (1) { 353 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 354 if (!flg) break; 355 356 for (i=0; i<n;) { 357 /* Now identify the consecutive vals belonging to the same row */ 358 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 359 if (j < n) ncols = j-i; 360 else ncols = n-i; 361 /* Now assemble all these values with a single function call */ 362 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 363 i = j; 364 } 365 } 366 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 367 } 368 369 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 370 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 371 372 /* determine if any processor has disassembled, if so we must 373 also disassemble ourselfs, in order that we may reassemble. */ 374 /* 375 if nonzero structure of submatrix B cannot change then we know that 376 no processor disassembled thus we can skip this stuff 377 */ 378 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 379 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 380 if (mat->was_assembled && !other_disassembled) { 381 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 382 } 383 } 384 385 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 386 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 387 } 388 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 389 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 390 391 if (aij->rowvalues) { 392 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 393 aij->rowvalues = 0; 394 } 395 396 /* used by MatAXPY() */ 397 a->xtoy = 0; b->xtoy = 0; 398 a->XtoY = 0; b->XtoY = 0; 399 400 PetscFunctionReturn(0); 401 } 402 403 #undef __FUNCT__ 404 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 405 int MatZeroEntries_MPIAIJ(Mat A) 406 { 407 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 408 int ierr; 409 410 PetscFunctionBegin; 411 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 412 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 413 PetscFunctionReturn(0); 414 } 415 416 #undef __FUNCT__ 417 #define __FUNCT__ "MatZeroRows_MPIAIJ" 418 int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 419 { 420 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 421 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 422 int *nprocs,j,idx,nsends,row; 423 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 424 int *rvalues,tag = A->tag,count,base,slen,n,*source; 425 int *lens,imdex,*lrows,*values,rstart=l->rstart; 426 MPI_Comm comm = A->comm; 427 MPI_Request *send_waits,*recv_waits; 428 MPI_Status recv_status,*send_status; 429 IS istmp; 430 PetscTruth found; 431 432 PetscFunctionBegin; 433 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 434 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 435 436 /* first count number of contributors to each processor */ 437 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 438 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 439 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 440 for (i=0; i<N; i++) { 441 idx = rows[i]; 442 found = PETSC_FALSE; 443 for (j=0; j<size; j++) { 444 if (idx >= owners[j] && idx < owners[j+1]) { 445 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 446 } 447 } 448 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 449 } 450 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 451 452 /* inform other processors of number of messages and max length*/ 453 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 454 455 /* post receives: */ 456 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 457 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 458 for (i=0; i<nrecvs; i++) { 459 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 460 } 461 462 /* do sends: 463 1) starts[i] gives the starting index in svalues for stuff going to 464 the ith processor 465 */ 466 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 467 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 468 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 469 starts[0] = 0; 470 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 471 for (i=0; i<N; i++) { 472 svalues[starts[owner[i]]++] = rows[i]; 473 } 474 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 475 476 starts[0] = 0; 477 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 478 count = 0; 479 for (i=0; i<size; i++) { 480 if (nprocs[2*i+1]) { 481 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 482 } 483 } 484 ierr = PetscFree(starts);CHKERRQ(ierr); 485 486 base = owners[rank]; 487 488 /* wait on receives */ 489 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 490 source = lens + nrecvs; 491 count = nrecvs; slen = 0; 492 while (count) { 493 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 494 /* unpack receives into our local space */ 495 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 496 source[imdex] = recv_status.MPI_SOURCE; 497 lens[imdex] = n; 498 slen += n; 499 count--; 500 } 501 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 502 503 /* move the data into the send scatter */ 504 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 505 count = 0; 506 for (i=0; i<nrecvs; i++) { 507 values = rvalues + i*nmax; 508 for (j=0; j<lens[i]; j++) { 509 lrows[count++] = values[j] - base; 510 } 511 } 512 ierr = PetscFree(rvalues);CHKERRQ(ierr); 513 ierr = PetscFree(lens);CHKERRQ(ierr); 514 ierr = PetscFree(owner);CHKERRQ(ierr); 515 ierr = PetscFree(nprocs);CHKERRQ(ierr); 516 517 /* actually zap the local rows */ 518 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 519 PetscLogObjectParent(A,istmp); 520 521 /* 522 Zero the required rows. If the "diagonal block" of the matrix 523 is square and the user wishes to set the diagonal we use seperate 524 code so that MatSetValues() is not called for each diagonal allocating 525 new memory, thus calling lots of mallocs and slowing things down. 526 527 Contributed by: Mathew Knepley 528 */ 529 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 530 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 531 if (diag && (l->A->M == l->A->N)) { 532 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 533 } else if (diag) { 534 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 535 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 536 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 537 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 538 } 539 for (i = 0; i < slen; i++) { 540 row = lrows[i] + rstart; 541 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 542 } 543 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 544 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 545 } else { 546 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 547 } 548 ierr = ISDestroy(istmp);CHKERRQ(ierr); 549 ierr = PetscFree(lrows);CHKERRQ(ierr); 550 551 /* wait on sends */ 552 if (nsends) { 553 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 554 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 555 ierr = PetscFree(send_status);CHKERRQ(ierr); 556 } 557 ierr = PetscFree(send_waits);CHKERRQ(ierr); 558 ierr = PetscFree(svalues);CHKERRQ(ierr); 559 560 PetscFunctionReturn(0); 561 } 562 563 #undef __FUNCT__ 564 #define __FUNCT__ "MatMult_MPIAIJ" 565 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 566 { 567 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 568 int ierr,nt; 569 570 PetscFunctionBegin; 571 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 572 if (nt != A->n) { 573 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 574 } 575 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 576 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 577 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 578 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatMultAdd_MPIAIJ" 584 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 585 { 586 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 587 int ierr; 588 589 PetscFunctionBegin; 590 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 591 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 592 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 593 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 594 PetscFunctionReturn(0); 595 } 596 597 #undef __FUNCT__ 598 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 599 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 600 { 601 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 602 int ierr; 603 604 PetscFunctionBegin; 605 /* do nondiagonal part */ 606 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 607 /* send it on its way */ 608 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 609 /* do local part */ 610 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 611 /* receive remote parts: note this assumes the values are not actually */ 612 /* inserted in yy until the next line, which is true for my implementation*/ 613 /* but is not perhaps always true. */ 614 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 615 PetscFunctionReturn(0); 616 } 617 618 EXTERN_C_BEGIN 619 #undef __FUNCT__ 620 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 621 int MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 622 { 623 MPI_Comm comm; 624 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 625 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 626 IS Me,Notme; 627 int M,N,first,last,*notme,ntids,i, ierr; 628 629 PetscFunctionBegin; 630 631 /* Easy test: symmetric diagonal block */ 632 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 633 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 634 if (!*f) PetscFunctionReturn(0); 635 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 636 ierr = MPI_Comm_size(comm,&ntids);CHKERRQ(ierr); 637 if (ntids==1) PetscFunctionReturn(0); 638 639 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 640 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 641 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 642 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me);CHKERRQ(ierr); 643 for (i=0; i<first; i++) notme[i] = i; 644 for (i=last; i<M; i++) notme[i-last+first] = i; 645 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 646 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 647 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 648 Aoff = Aoffs[0]; 649 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 650 Boff = Boffs[0]; 651 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 652 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 653 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 654 ierr = ISDestroy(Me);CHKERRQ(ierr); 655 ierr = ISDestroy(Notme);CHKERRQ(ierr); 656 657 PetscFunctionReturn(0); 658 } 659 EXTERN_C_END 660 661 #undef __FUNCT__ 662 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 663 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 664 { 665 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 666 int ierr; 667 668 PetscFunctionBegin; 669 /* do nondiagonal part */ 670 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 671 /* send it on its way */ 672 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 673 /* do local part */ 674 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 675 /* receive remote parts: note this assumes the values are not actually */ 676 /* inserted in yy until the next line, which is true for my implementation*/ 677 /* but is not perhaps always true. */ 678 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 679 PetscFunctionReturn(0); 680 } 681 682 /* 683 This only works correctly for square matrices where the subblock A->A is the 684 diagonal block 685 */ 686 #undef __FUNCT__ 687 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 688 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 689 { 690 int ierr; 691 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 692 693 PetscFunctionBegin; 694 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 695 if (a->rstart != a->cstart || a->rend != a->cend) { 696 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 697 } 698 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 699 PetscFunctionReturn(0); 700 } 701 702 #undef __FUNCT__ 703 #define __FUNCT__ "MatScale_MPIAIJ" 704 int MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 705 { 706 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 707 int ierr; 708 709 PetscFunctionBegin; 710 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 711 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 712 PetscFunctionReturn(0); 713 } 714 715 #undef __FUNCT__ 716 #define __FUNCT__ "MatDestroy_MPIAIJ" 717 int MatDestroy_MPIAIJ(Mat mat) 718 { 719 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 720 int ierr; 721 722 PetscFunctionBegin; 723 #if defined(PETSC_USE_LOG) 724 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 725 #endif 726 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 727 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 728 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 729 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 730 #if defined (PETSC_USE_CTABLE) 731 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 732 #else 733 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 734 #endif 735 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 736 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 737 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 738 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 739 ierr = PetscFree(aij);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatView_MPIAIJ_Binary" 745 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 746 { 747 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 748 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 749 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 750 int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 751 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 752 PetscScalar *column_values; 753 754 PetscFunctionBegin; 755 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 756 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 757 nz = A->nz + B->nz; 758 if (!rank) { 759 header[0] = MAT_FILE_COOKIE; 760 header[1] = mat->M; 761 header[2] = mat->N; 762 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 763 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 764 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 765 /* get largest number of rows any processor has */ 766 rlen = mat->m; 767 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 768 for (i=1; i<size; i++) { 769 rlen = PetscMax(rlen,range[i+1] - range[i]); 770 } 771 } else { 772 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 773 rlen = mat->m; 774 } 775 776 /* load up the local row counts */ 777 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 778 for (i=0; i<mat->m; i++) { 779 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 780 } 781 782 /* store the row lengths to the file */ 783 if (!rank) { 784 MPI_Status status; 785 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 786 for (i=1; i<size; i++) { 787 rlen = range[i+1] - range[i]; 788 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 789 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 790 } 791 } else { 792 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 793 } 794 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 795 796 /* load up the local column indices */ 797 nzmax = nz; /* )th processor needs space a largest processor needs */ 798 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 799 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 800 cnt = 0; 801 for (i=0; i<mat->m; i++) { 802 for (j=B->i[i]; j<B->i[i+1]; j++) { 803 if ( (col = garray[B->j[j]]) > cstart) break; 804 column_indices[cnt++] = col; 805 } 806 for (k=A->i[i]; k<A->i[i+1]; k++) { 807 column_indices[cnt++] = A->j[k] + cstart; 808 } 809 for (; j<B->i[i+1]; j++) { 810 column_indices[cnt++] = garray[B->j[j]]; 811 } 812 } 813 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 814 815 /* store the column indices to the file */ 816 if (!rank) { 817 MPI_Status status; 818 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 819 for (i=1; i<size; i++) { 820 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 821 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 822 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 823 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 824 } 825 } else { 826 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 827 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 828 } 829 ierr = PetscFree(column_indices);CHKERRQ(ierr); 830 831 /* load up the local column values */ 832 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 833 cnt = 0; 834 for (i=0; i<mat->m; i++) { 835 for (j=B->i[i]; j<B->i[i+1]; j++) { 836 if ( garray[B->j[j]] > cstart) break; 837 column_values[cnt++] = B->a[j]; 838 } 839 for (k=A->i[i]; k<A->i[i+1]; k++) { 840 column_values[cnt++] = A->a[k]; 841 } 842 for (; j<B->i[i+1]; j++) { 843 column_values[cnt++] = B->a[j]; 844 } 845 } 846 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 847 848 /* store the column values to the file */ 849 if (!rank) { 850 MPI_Status status; 851 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 852 for (i=1; i<size; i++) { 853 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 854 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 855 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 856 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 857 } 858 } else { 859 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 860 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 861 } 862 ierr = PetscFree(column_values);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 868 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 869 { 870 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 871 int ierr,rank = aij->rank,size = aij->size; 872 PetscTruth isdraw,iascii,flg,isbinary; 873 PetscViewer sviewer; 874 PetscViewerFormat format; 875 876 PetscFunctionBegin; 877 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 878 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 880 if (iascii) { 881 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 882 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 883 MatInfo info; 884 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 885 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 886 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 887 if (flg) { 888 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 889 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 890 } else { 891 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 892 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 893 } 894 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 895 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 896 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 897 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 898 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 899 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 900 PetscFunctionReturn(0); 901 } else if (format == PETSC_VIEWER_ASCII_INFO) { 902 PetscFunctionReturn(0); 903 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 904 PetscFunctionReturn(0); 905 } 906 } else if (isbinary) { 907 if (size == 1) { 908 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 909 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 910 } else { 911 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 912 } 913 PetscFunctionReturn(0); 914 } else if (isdraw) { 915 PetscDraw draw; 916 PetscTruth isnull; 917 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 918 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 919 } 920 921 if (size == 1) { 922 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 923 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 924 } else { 925 /* assemble the entire matrix onto first processor. */ 926 Mat A; 927 Mat_SeqAIJ *Aloc; 928 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 929 PetscScalar *a; 930 931 if (!rank) { 932 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 933 } else { 934 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 935 } 936 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 937 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 938 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 939 PetscLogObjectParent(mat,A); 940 941 /* copy over the A part */ 942 Aloc = (Mat_SeqAIJ*)aij->A->data; 943 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 944 row = aij->rstart; 945 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 946 for (i=0; i<m; i++) { 947 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 948 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 949 } 950 aj = Aloc->j; 951 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 952 953 /* copy over the B part */ 954 Aloc = (Mat_SeqAIJ*)aij->B->data; 955 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 956 row = aij->rstart; 957 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 958 ct = cols; 959 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 960 for (i=0; i<m; i++) { 961 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 962 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 963 } 964 ierr = PetscFree(ct);CHKERRQ(ierr); 965 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 966 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 967 /* 968 Everyone has to call to draw the matrix since the graphics waits are 969 synchronized across all processors that share the PetscDraw object 970 */ 971 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 972 if (!rank) { 973 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 974 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 975 } 976 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 977 ierr = MatDestroy(A);CHKERRQ(ierr); 978 } 979 PetscFunctionReturn(0); 980 } 981 982 #undef __FUNCT__ 983 #define __FUNCT__ "MatView_MPIAIJ" 984 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 985 { 986 int ierr; 987 PetscTruth iascii,isdraw,issocket,isbinary; 988 989 PetscFunctionBegin; 990 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 991 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 992 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 993 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 994 if (iascii || isdraw || isbinary || issocket) { 995 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 996 } else { 997 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 998 } 999 PetscFunctionReturn(0); 1000 } 1001 1002 1003 1004 #undef __FUNCT__ 1005 #define __FUNCT__ "MatRelax_MPIAIJ" 1006 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1007 { 1008 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1009 int ierr; 1010 Vec bb1; 1011 PetscScalar mone=-1.0; 1012 1013 PetscFunctionBegin; 1014 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1015 1016 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1017 1018 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1019 if (flag & SOR_ZERO_INITIAL_GUESS) { 1020 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1021 its--; 1022 } 1023 1024 while (its--) { 1025 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1026 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1027 1028 /* update rhs: bb1 = bb - B*x */ 1029 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1030 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1031 1032 /* local sweep */ 1033 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1034 CHKERRQ(ierr); 1035 } 1036 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1037 if (flag & SOR_ZERO_INITIAL_GUESS) { 1038 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1039 its--; 1040 } 1041 while (its--) { 1042 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1043 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1044 1045 /* update rhs: bb1 = bb - B*x */ 1046 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1047 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1048 1049 /* local sweep */ 1050 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1051 CHKERRQ(ierr); 1052 } 1053 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1054 if (flag & SOR_ZERO_INITIAL_GUESS) { 1055 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1056 its--; 1057 } 1058 while (its--) { 1059 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1060 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1061 1062 /* update rhs: bb1 = bb - B*x */ 1063 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1064 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1065 1066 /* local sweep */ 1067 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1068 CHKERRQ(ierr); 1069 } 1070 } else { 1071 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1072 } 1073 1074 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1080 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1081 { 1082 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1083 Mat A = mat->A,B = mat->B; 1084 int ierr; 1085 PetscReal isend[5],irecv[5]; 1086 1087 PetscFunctionBegin; 1088 info->block_size = 1.0; 1089 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1090 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1091 isend[3] = info->memory; isend[4] = info->mallocs; 1092 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1093 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1094 isend[3] += info->memory; isend[4] += info->mallocs; 1095 if (flag == MAT_LOCAL) { 1096 info->nz_used = isend[0]; 1097 info->nz_allocated = isend[1]; 1098 info->nz_unneeded = isend[2]; 1099 info->memory = isend[3]; 1100 info->mallocs = isend[4]; 1101 } else if (flag == MAT_GLOBAL_MAX) { 1102 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1103 info->nz_used = irecv[0]; 1104 info->nz_allocated = irecv[1]; 1105 info->nz_unneeded = irecv[2]; 1106 info->memory = irecv[3]; 1107 info->mallocs = irecv[4]; 1108 } else if (flag == MAT_GLOBAL_SUM) { 1109 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1110 info->nz_used = irecv[0]; 1111 info->nz_allocated = irecv[1]; 1112 info->nz_unneeded = irecv[2]; 1113 info->memory = irecv[3]; 1114 info->mallocs = irecv[4]; 1115 } 1116 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1117 info->fill_ratio_needed = 0; 1118 info->factor_mallocs = 0; 1119 info->rows_global = (double)matin->M; 1120 info->columns_global = (double)matin->N; 1121 info->rows_local = (double)matin->m; 1122 info->columns_local = (double)matin->N; 1123 1124 PetscFunctionReturn(0); 1125 } 1126 1127 #undef __FUNCT__ 1128 #define __FUNCT__ "MatSetOption_MPIAIJ" 1129 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1130 { 1131 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1132 int ierr; 1133 1134 PetscFunctionBegin; 1135 switch (op) { 1136 case MAT_NO_NEW_NONZERO_LOCATIONS: 1137 case MAT_YES_NEW_NONZERO_LOCATIONS: 1138 case MAT_COLUMNS_UNSORTED: 1139 case MAT_COLUMNS_SORTED: 1140 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1141 case MAT_KEEP_ZEROED_ROWS: 1142 case MAT_NEW_NONZERO_LOCATION_ERR: 1143 case MAT_USE_INODES: 1144 case MAT_DO_NOT_USE_INODES: 1145 case MAT_IGNORE_ZERO_ENTRIES: 1146 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1147 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1148 break; 1149 case MAT_ROW_ORIENTED: 1150 a->roworiented = PETSC_TRUE; 1151 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1152 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1153 break; 1154 case MAT_ROWS_SORTED: 1155 case MAT_ROWS_UNSORTED: 1156 case MAT_YES_NEW_DIAGONALS: 1157 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1158 break; 1159 case MAT_COLUMN_ORIENTED: 1160 a->roworiented = PETSC_FALSE; 1161 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1162 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1163 break; 1164 case MAT_IGNORE_OFF_PROC_ENTRIES: 1165 a->donotstash = PETSC_TRUE; 1166 break; 1167 case MAT_NO_NEW_DIAGONALS: 1168 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1169 case MAT_SYMMETRIC: 1170 case MAT_STRUCTURALLY_SYMMETRIC: 1171 case MAT_HERMITIAN: 1172 case MAT_SYMMETRY_ETERNAL: 1173 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1174 break; 1175 case MAT_NOT_SYMMETRIC: 1176 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1177 case MAT_NOT_HERMITIAN: 1178 case MAT_NOT_SYMMETRY_ETERNAL: 1179 break; 1180 default: 1181 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1182 } 1183 PetscFunctionReturn(0); 1184 } 1185 1186 #undef __FUNCT__ 1187 #define __FUNCT__ "MatGetRow_MPIAIJ" 1188 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1189 { 1190 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1191 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1192 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1193 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1194 int *cmap,*idx_p; 1195 1196 PetscFunctionBegin; 1197 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1198 mat->getrowactive = PETSC_TRUE; 1199 1200 if (!mat->rowvalues && (idx || v)) { 1201 /* 1202 allocate enough space to hold information from the longest row. 1203 */ 1204 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1205 int max = 1,tmp; 1206 for (i=0; i<matin->m; i++) { 1207 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1208 if (max < tmp) { max = tmp; } 1209 } 1210 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1211 mat->rowindices = (int*)(mat->rowvalues + max); 1212 } 1213 1214 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1215 lrow = row - rstart; 1216 1217 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1218 if (!v) {pvA = 0; pvB = 0;} 1219 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1220 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1221 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1222 nztot = nzA + nzB; 1223 1224 cmap = mat->garray; 1225 if (v || idx) { 1226 if (nztot) { 1227 /* Sort by increasing column numbers, assuming A and B already sorted */ 1228 int imark = -1; 1229 if (v) { 1230 *v = v_p = mat->rowvalues; 1231 for (i=0; i<nzB; i++) { 1232 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1233 else break; 1234 } 1235 imark = i; 1236 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1237 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1238 } 1239 if (idx) { 1240 *idx = idx_p = mat->rowindices; 1241 if (imark > -1) { 1242 for (i=0; i<imark; i++) { 1243 idx_p[i] = cmap[cworkB[i]]; 1244 } 1245 } else { 1246 for (i=0; i<nzB; i++) { 1247 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1248 else break; 1249 } 1250 imark = i; 1251 } 1252 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1253 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1254 } 1255 } else { 1256 if (idx) *idx = 0; 1257 if (v) *v = 0; 1258 } 1259 } 1260 *nz = nztot; 1261 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1262 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1263 PetscFunctionReturn(0); 1264 } 1265 1266 #undef __FUNCT__ 1267 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1268 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1269 { 1270 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1271 1272 PetscFunctionBegin; 1273 if (aij->getrowactive == PETSC_FALSE) { 1274 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1275 } 1276 aij->getrowactive = PETSC_FALSE; 1277 PetscFunctionReturn(0); 1278 } 1279 1280 #undef __FUNCT__ 1281 #define __FUNCT__ "MatNorm_MPIAIJ" 1282 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1283 { 1284 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1285 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1286 int ierr,i,j,cstart = aij->cstart; 1287 PetscReal sum = 0.0; 1288 PetscScalar *v; 1289 1290 PetscFunctionBegin; 1291 if (aij->size == 1) { 1292 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1293 } else { 1294 if (type == NORM_FROBENIUS) { 1295 v = amat->a; 1296 for (i=0; i<amat->nz; i++) { 1297 #if defined(PETSC_USE_COMPLEX) 1298 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1299 #else 1300 sum += (*v)*(*v); v++; 1301 #endif 1302 } 1303 v = bmat->a; 1304 for (i=0; i<bmat->nz; i++) { 1305 #if defined(PETSC_USE_COMPLEX) 1306 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1307 #else 1308 sum += (*v)*(*v); v++; 1309 #endif 1310 } 1311 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1312 *norm = sqrt(*norm); 1313 } else if (type == NORM_1) { /* max column norm */ 1314 PetscReal *tmp,*tmp2; 1315 int *jj,*garray = aij->garray; 1316 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1317 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1318 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1319 *norm = 0.0; 1320 v = amat->a; jj = amat->j; 1321 for (j=0; j<amat->nz; j++) { 1322 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1323 } 1324 v = bmat->a; jj = bmat->j; 1325 for (j=0; j<bmat->nz; j++) { 1326 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1327 } 1328 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1329 for (j=0; j<mat->N; j++) { 1330 if (tmp2[j] > *norm) *norm = tmp2[j]; 1331 } 1332 ierr = PetscFree(tmp);CHKERRQ(ierr); 1333 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1334 } else if (type == NORM_INFINITY) { /* max row norm */ 1335 PetscReal ntemp = 0.0; 1336 for (j=0; j<aij->A->m; j++) { 1337 v = amat->a + amat->i[j]; 1338 sum = 0.0; 1339 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1340 sum += PetscAbsScalar(*v); v++; 1341 } 1342 v = bmat->a + bmat->i[j]; 1343 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1344 sum += PetscAbsScalar(*v); v++; 1345 } 1346 if (sum > ntemp) ntemp = sum; 1347 } 1348 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1349 } else { 1350 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1351 } 1352 } 1353 PetscFunctionReturn(0); 1354 } 1355 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "MatTranspose_MPIAIJ" 1358 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1359 { 1360 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1361 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1362 int ierr; 1363 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1364 Mat B; 1365 PetscScalar *array; 1366 1367 PetscFunctionBegin; 1368 if (!matout && M != N) { 1369 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1370 } 1371 1372 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1373 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1374 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1375 1376 /* copy over the A part */ 1377 Aloc = (Mat_SeqAIJ*)a->A->data; 1378 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1379 row = a->rstart; 1380 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1381 for (i=0; i<m; i++) { 1382 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1383 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1384 } 1385 aj = Aloc->j; 1386 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1387 1388 /* copy over the B part */ 1389 Aloc = (Mat_SeqAIJ*)a->B->data; 1390 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1391 row = a->rstart; 1392 ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr); 1393 ct = cols; 1394 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1395 for (i=0; i<m; i++) { 1396 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1397 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1398 } 1399 ierr = PetscFree(ct);CHKERRQ(ierr); 1400 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1401 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1402 if (matout) { 1403 *matout = B; 1404 } else { 1405 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1406 } 1407 PetscFunctionReturn(0); 1408 } 1409 1410 #undef __FUNCT__ 1411 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1412 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1413 { 1414 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1415 Mat a = aij->A,b = aij->B; 1416 int ierr,s1,s2,s3; 1417 1418 PetscFunctionBegin; 1419 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1420 if (rr) { 1421 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1422 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1423 /* Overlap communication with computation. */ 1424 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1425 } 1426 if (ll) { 1427 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1428 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1429 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1430 } 1431 /* scale the diagonal block */ 1432 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1433 1434 if (rr) { 1435 /* Do a scatter end and then right scale the off-diagonal block */ 1436 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1437 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1438 } 1439 1440 PetscFunctionReturn(0); 1441 } 1442 1443 1444 #undef __FUNCT__ 1445 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1446 int MatPrintHelp_MPIAIJ(Mat A) 1447 { 1448 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1449 int ierr; 1450 1451 PetscFunctionBegin; 1452 if (!a->rank) { 1453 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1454 } 1455 PetscFunctionReturn(0); 1456 } 1457 1458 #undef __FUNCT__ 1459 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1460 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1461 { 1462 PetscFunctionBegin; 1463 *bs = 1; 1464 PetscFunctionReturn(0); 1465 } 1466 #undef __FUNCT__ 1467 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1468 int MatSetUnfactored_MPIAIJ(Mat A) 1469 { 1470 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1471 int ierr; 1472 1473 PetscFunctionBegin; 1474 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1475 PetscFunctionReturn(0); 1476 } 1477 1478 #undef __FUNCT__ 1479 #define __FUNCT__ "MatEqual_MPIAIJ" 1480 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1481 { 1482 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1483 Mat a,b,c,d; 1484 PetscTruth flg; 1485 int ierr; 1486 1487 PetscFunctionBegin; 1488 a = matA->A; b = matA->B; 1489 c = matB->A; d = matB->B; 1490 1491 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1492 if (flg == PETSC_TRUE) { 1493 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1494 } 1495 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1496 PetscFunctionReturn(0); 1497 } 1498 1499 #undef __FUNCT__ 1500 #define __FUNCT__ "MatCopy_MPIAIJ" 1501 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1502 { 1503 int ierr; 1504 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1505 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1506 1507 PetscFunctionBegin; 1508 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1509 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1510 /* because of the column compression in the off-processor part of the matrix a->B, 1511 the number of columns in a->B and b->B may be different, hence we cannot call 1512 the MatCopy() directly on the two parts. If need be, we can provide a more 1513 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1514 then copying the submatrices */ 1515 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1516 } else { 1517 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1518 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1519 } 1520 PetscFunctionReturn(0); 1521 } 1522 1523 #undef __FUNCT__ 1524 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1525 int MatSetUpPreallocation_MPIAIJ(Mat A) 1526 { 1527 int ierr; 1528 1529 PetscFunctionBegin; 1530 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1531 PetscFunctionReturn(0); 1532 } 1533 1534 #include "petscblaslapack.h" 1535 #undef __FUNCT__ 1536 #define __FUNCT__ "MatAXPY_MPIAIJ" 1537 int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1538 { 1539 int ierr,one=1,i; 1540 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1541 Mat_SeqAIJ *x,*y; 1542 1543 PetscFunctionBegin; 1544 if (str == SAME_NONZERO_PATTERN) { 1545 x = (Mat_SeqAIJ *)xx->A->data; 1546 y = (Mat_SeqAIJ *)yy->A->data; 1547 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1548 x = (Mat_SeqAIJ *)xx->B->data; 1549 y = (Mat_SeqAIJ *)yy->B->data; 1550 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1551 } else if (str == SUBSET_NONZERO_PATTERN) { 1552 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1553 1554 x = (Mat_SeqAIJ *)xx->B->data; 1555 y = (Mat_SeqAIJ *)yy->B->data; 1556 if (y->xtoy && y->XtoY != xx->B) { 1557 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1558 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1559 } 1560 if (!y->xtoy) { /* get xtoy */ 1561 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1562 y->XtoY = xx->B; 1563 } 1564 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1565 } else { 1566 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1567 } 1568 PetscFunctionReturn(0); 1569 } 1570 1571 /* -------------------------------------------------------------------*/ 1572 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1573 MatGetRow_MPIAIJ, 1574 MatRestoreRow_MPIAIJ, 1575 MatMult_MPIAIJ, 1576 /* 4*/ MatMultAdd_MPIAIJ, 1577 MatMultTranspose_MPIAIJ, 1578 MatMultTransposeAdd_MPIAIJ, 1579 0, 1580 0, 1581 0, 1582 /*10*/ 0, 1583 0, 1584 0, 1585 MatRelax_MPIAIJ, 1586 MatTranspose_MPIAIJ, 1587 /*15*/ MatGetInfo_MPIAIJ, 1588 MatEqual_MPIAIJ, 1589 MatGetDiagonal_MPIAIJ, 1590 MatDiagonalScale_MPIAIJ, 1591 MatNorm_MPIAIJ, 1592 /*20*/ MatAssemblyBegin_MPIAIJ, 1593 MatAssemblyEnd_MPIAIJ, 1594 0, 1595 MatSetOption_MPIAIJ, 1596 MatZeroEntries_MPIAIJ, 1597 /*25*/ MatZeroRows_MPIAIJ, 1598 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1599 MatLUFactorSymbolic_MPIAIJ_TFS, 1600 #else 1601 0, 1602 #endif 1603 0, 1604 0, 1605 0, 1606 /*30*/ MatSetUpPreallocation_MPIAIJ, 1607 0, 1608 0, 1609 0, 1610 0, 1611 /*35*/ MatDuplicate_MPIAIJ, 1612 0, 1613 0, 1614 0, 1615 0, 1616 /*40*/ MatAXPY_MPIAIJ, 1617 MatGetSubMatrices_MPIAIJ, 1618 MatIncreaseOverlap_MPIAIJ, 1619 MatGetValues_MPIAIJ, 1620 MatCopy_MPIAIJ, 1621 /*45*/ MatPrintHelp_MPIAIJ, 1622 MatScale_MPIAIJ, 1623 0, 1624 0, 1625 0, 1626 /*50*/ MatGetBlockSize_MPIAIJ, 1627 0, 1628 0, 1629 0, 1630 0, 1631 /*55*/ MatFDColoringCreate_MPIAIJ, 1632 0, 1633 MatSetUnfactored_MPIAIJ, 1634 0, 1635 0, 1636 /*60*/ MatGetSubMatrix_MPIAIJ, 1637 MatDestroy_MPIAIJ, 1638 MatView_MPIAIJ, 1639 MatGetPetscMaps_Petsc, 1640 0, 1641 /*65*/ 0, 1642 0, 1643 0, 1644 0, 1645 0, 1646 /*70*/ 0, 1647 0, 1648 MatSetColoring_MPIAIJ, 1649 MatSetValuesAdic_MPIAIJ, 1650 MatSetValuesAdifor_MPIAIJ, 1651 /*75*/ 0, 1652 0, 1653 0, 1654 0, 1655 0, 1656 /*80*/ 0, 1657 0, 1658 0, 1659 0, 1660 /*85*/ MatLoad_MPIAIJ, 1661 0, 1662 0, 1663 0, 1664 0, 1665 /*90*/ 0, 1666 MatMatMult_MPIAIJ_MPIAIJ, 1667 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1668 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1669 }; 1670 1671 /* ----------------------------------------------------------------------------------------*/ 1672 1673 EXTERN_C_BEGIN 1674 #undef __FUNCT__ 1675 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1676 int MatStoreValues_MPIAIJ(Mat mat) 1677 { 1678 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1679 int ierr; 1680 1681 PetscFunctionBegin; 1682 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1683 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1684 PetscFunctionReturn(0); 1685 } 1686 EXTERN_C_END 1687 1688 EXTERN_C_BEGIN 1689 #undef __FUNCT__ 1690 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1691 int MatRetrieveValues_MPIAIJ(Mat mat) 1692 { 1693 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1694 int ierr; 1695 1696 PetscFunctionBegin; 1697 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1698 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1699 PetscFunctionReturn(0); 1700 } 1701 EXTERN_C_END 1702 1703 #include "petscpc.h" 1704 EXTERN_C_BEGIN 1705 #undef __FUNCT__ 1706 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1707 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1708 { 1709 Mat_MPIAIJ *b; 1710 int ierr,i; 1711 1712 PetscFunctionBegin; 1713 B->preallocated = PETSC_TRUE; 1714 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1715 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1716 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1717 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1718 if (d_nnz) { 1719 for (i=0; i<B->m; i++) { 1720 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1721 } 1722 } 1723 if (o_nnz) { 1724 for (i=0; i<B->m; i++) { 1725 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1726 } 1727 } 1728 b = (Mat_MPIAIJ*)B->data; 1729 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1730 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1731 1732 PetscFunctionReturn(0); 1733 } 1734 EXTERN_C_END 1735 1736 /*MC 1737 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 1738 1739 Options Database Keys: 1740 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 1741 1742 Level: beginner 1743 1744 .seealso: MatCreateMPIAIJ 1745 M*/ 1746 1747 EXTERN_C_BEGIN 1748 #undef __FUNCT__ 1749 #define __FUNCT__ "MatCreate_MPIAIJ" 1750 int MatCreate_MPIAIJ(Mat B) 1751 { 1752 Mat_MPIAIJ *b; 1753 int ierr,i,size; 1754 1755 PetscFunctionBegin; 1756 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1757 1758 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1759 B->data = (void*)b; 1760 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1761 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1762 B->factor = 0; 1763 B->assembled = PETSC_FALSE; 1764 B->mapping = 0; 1765 1766 B->insertmode = NOT_SET_VALUES; 1767 b->size = size; 1768 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1769 1770 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1771 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1772 1773 /* the information in the maps duplicates the information computed below, eventually 1774 we should remove the duplicate information that is not contained in the maps */ 1775 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1776 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1777 1778 /* build local table of row and column ownerships */ 1779 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1780 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1781 b->cowners = b->rowners + b->size + 2; 1782 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1783 b->rowners[0] = 0; 1784 for (i=2; i<=b->size; i++) { 1785 b->rowners[i] += b->rowners[i-1]; 1786 } 1787 b->rstart = b->rowners[b->rank]; 1788 b->rend = b->rowners[b->rank+1]; 1789 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1790 b->cowners[0] = 0; 1791 for (i=2; i<=b->size; i++) { 1792 b->cowners[i] += b->cowners[i-1]; 1793 } 1794 b->cstart = b->cowners[b->rank]; 1795 b->cend = b->cowners[b->rank+1]; 1796 1797 /* build cache for off array entries formed */ 1798 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1799 b->donotstash = PETSC_FALSE; 1800 b->colmap = 0; 1801 b->garray = 0; 1802 b->roworiented = PETSC_TRUE; 1803 1804 /* stuff used for matrix vector multiply */ 1805 b->lvec = PETSC_NULL; 1806 b->Mvctx = PETSC_NULL; 1807 1808 /* stuff for MatGetRow() */ 1809 b->rowindices = 0; 1810 b->rowvalues = 0; 1811 b->getrowactive = PETSC_FALSE; 1812 1813 /* Explicitly create 2 MATSEQAIJ matrices. */ 1814 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 1815 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 1816 PetscLogObjectParent(B,b->A); 1817 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 1818 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 1819 PetscLogObjectParent(B,b->B); 1820 1821 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1822 "MatStoreValues_MPIAIJ", 1823 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1824 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1825 "MatRetrieveValues_MPIAIJ", 1826 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1827 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1828 "MatGetDiagonalBlock_MPIAIJ", 1829 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1830 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 1831 "MatIsTranspose_MPIAIJ", 1832 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 1833 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 1834 "MatMPIAIJSetPreallocation_MPIAIJ", 1835 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 1836 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 1837 "MatDiagonalScaleLocal_MPIAIJ", 1838 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 1839 PetscFunctionReturn(0); 1840 } 1841 EXTERN_C_END 1842 1843 /*MC 1844 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1845 1846 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1847 and MATMPIAIJ otherwise. As a result, for single process communicators, 1848 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 1849 for communicators controlling multiple processes. It is recommended that you call both of 1850 the above preallocation routines for simplicity. 1851 1852 Options Database Keys: 1853 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 1854 1855 Level: beginner 1856 1857 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ 1858 M*/ 1859 1860 EXTERN_C_BEGIN 1861 #undef __FUNCT__ 1862 #define __FUNCT__ "MatCreate_AIJ" 1863 int MatCreate_AIJ(Mat A) { 1864 int ierr,size; 1865 1866 PetscFunctionBegin; 1867 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr); 1868 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1869 if (size == 1) { 1870 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1871 } else { 1872 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1873 } 1874 PetscFunctionReturn(0); 1875 } 1876 EXTERN_C_END 1877 1878 #undef __FUNCT__ 1879 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1880 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1881 { 1882 Mat mat; 1883 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1884 int ierr; 1885 1886 PetscFunctionBegin; 1887 *newmat = 0; 1888 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1889 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1890 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1891 a = (Mat_MPIAIJ*)mat->data; 1892 1893 mat->factor = matin->factor; 1894 mat->assembled = PETSC_TRUE; 1895 mat->insertmode = NOT_SET_VALUES; 1896 mat->preallocated = PETSC_TRUE; 1897 1898 a->rstart = oldmat->rstart; 1899 a->rend = oldmat->rend; 1900 a->cstart = oldmat->cstart; 1901 a->cend = oldmat->cend; 1902 a->size = oldmat->size; 1903 a->rank = oldmat->rank; 1904 a->donotstash = oldmat->donotstash; 1905 a->roworiented = oldmat->roworiented; 1906 a->rowindices = 0; 1907 a->rowvalues = 0; 1908 a->getrowactive = PETSC_FALSE; 1909 1910 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1911 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1912 if (oldmat->colmap) { 1913 #if defined (PETSC_USE_CTABLE) 1914 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1915 #else 1916 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1917 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1918 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1919 #endif 1920 } else a->colmap = 0; 1921 if (oldmat->garray) { 1922 int len; 1923 len = oldmat->B->n; 1924 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1925 PetscLogObjectMemory(mat,len*sizeof(int)); 1926 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1927 } else a->garray = 0; 1928 1929 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1930 PetscLogObjectParent(mat,a->lvec); 1931 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1932 PetscLogObjectParent(mat,a->Mvctx); 1933 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1934 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1935 PetscLogObjectParent(mat,a->A); 1936 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1937 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1938 PetscLogObjectParent(mat,a->B); 1939 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1940 *newmat = mat; 1941 PetscFunctionReturn(0); 1942 } 1943 1944 #include "petscsys.h" 1945 1946 #undef __FUNCT__ 1947 #define __FUNCT__ "MatLoad_MPIAIJ" 1948 int MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1949 { 1950 Mat A; 1951 PetscScalar *vals,*svals; 1952 MPI_Comm comm = ((PetscObject)viewer)->comm; 1953 MPI_Status status; 1954 int i,nz,ierr,j,rstart,rend,fd; 1955 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1956 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1957 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1958 1959 PetscFunctionBegin; 1960 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1961 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1962 if (!rank) { 1963 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1964 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1965 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1966 if (header[3] < 0) { 1967 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1968 } 1969 } 1970 1971 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1972 M = header[1]; N = header[2]; 1973 /* determine ownership of all rows */ 1974 m = M/size + ((M % size) > rank); 1975 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1976 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1977 rowners[0] = 0; 1978 for (i=2; i<=size; i++) { 1979 rowners[i] += rowners[i-1]; 1980 } 1981 rstart = rowners[rank]; 1982 rend = rowners[rank+1]; 1983 1984 /* distribute row lengths to all processors */ 1985 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1986 offlens = ourlens + (rend-rstart); 1987 if (!rank) { 1988 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1989 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1990 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1991 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1992 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1993 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1994 } else { 1995 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1996 } 1997 1998 if (!rank) { 1999 /* calculate the number of nonzeros on each processor */ 2000 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2001 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2002 for (i=0; i<size; i++) { 2003 for (j=rowners[i]; j< rowners[i+1]; j++) { 2004 procsnz[i] += rowlengths[j]; 2005 } 2006 } 2007 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2008 2009 /* determine max buffer needed and allocate it */ 2010 maxnz = 0; 2011 for (i=0; i<size; i++) { 2012 maxnz = PetscMax(maxnz,procsnz[i]); 2013 } 2014 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2015 2016 /* read in my part of the matrix column indices */ 2017 nz = procsnz[0]; 2018 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 2019 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2020 2021 /* read in every one elses and ship off */ 2022 for (i=1; i<size; i++) { 2023 nz = procsnz[i]; 2024 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2025 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2026 } 2027 ierr = PetscFree(cols);CHKERRQ(ierr); 2028 } else { 2029 /* determine buffer space needed for message */ 2030 nz = 0; 2031 for (i=0; i<m; i++) { 2032 nz += ourlens[i]; 2033 } 2034 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 2035 2036 /* receive message of column indices*/ 2037 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2038 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2039 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2040 } 2041 2042 /* determine column ownership if matrix is not square */ 2043 if (N != M) { 2044 n = N/size + ((N % size) > rank); 2045 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2046 cstart = cend - n; 2047 } else { 2048 cstart = rstart; 2049 cend = rend; 2050 n = cend - cstart; 2051 } 2052 2053 /* loop over local rows, determining number of off diagonal entries */ 2054 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2055 jj = 0; 2056 for (i=0; i<m; i++) { 2057 for (j=0; j<ourlens[i]; j++) { 2058 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2059 jj++; 2060 } 2061 } 2062 2063 /* create our matrix */ 2064 for (i=0; i<m; i++) { 2065 ourlens[i] -= offlens[i]; 2066 } 2067 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2068 ierr = MatSetType(A,type);CHKERRQ(ierr); 2069 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2070 2071 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2072 for (i=0; i<m; i++) { 2073 ourlens[i] += offlens[i]; 2074 } 2075 2076 if (!rank) { 2077 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2078 2079 /* read in my part of the matrix numerical values */ 2080 nz = procsnz[0]; 2081 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2082 2083 /* insert into matrix */ 2084 jj = rstart; 2085 smycols = mycols; 2086 svals = vals; 2087 for (i=0; i<m; i++) { 2088 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2089 smycols += ourlens[i]; 2090 svals += ourlens[i]; 2091 jj++; 2092 } 2093 2094 /* read in other processors and ship out */ 2095 for (i=1; i<size; i++) { 2096 nz = procsnz[i]; 2097 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2098 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2099 } 2100 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2101 } else { 2102 /* receive numeric values */ 2103 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2104 2105 /* receive message of values*/ 2106 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2107 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2108 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2109 2110 /* insert into matrix */ 2111 jj = rstart; 2112 smycols = mycols; 2113 svals = vals; 2114 for (i=0; i<m; i++) { 2115 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2116 smycols += ourlens[i]; 2117 svals += ourlens[i]; 2118 jj++; 2119 } 2120 } 2121 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2122 ierr = PetscFree(vals);CHKERRQ(ierr); 2123 ierr = PetscFree(mycols);CHKERRQ(ierr); 2124 ierr = PetscFree(rowners);CHKERRQ(ierr); 2125 2126 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2127 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2128 *newmat = A; 2129 PetscFunctionReturn(0); 2130 } 2131 2132 #undef __FUNCT__ 2133 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2134 /* 2135 Not great since it makes two copies of the submatrix, first an SeqAIJ 2136 in local and then by concatenating the local matrices the end result. 2137 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2138 */ 2139 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2140 { 2141 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2142 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2143 Mat *local,M,Mreuse; 2144 PetscScalar *vwork,*aa; 2145 MPI_Comm comm = mat->comm; 2146 Mat_SeqAIJ *aij; 2147 2148 2149 PetscFunctionBegin; 2150 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2151 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2152 2153 if (call == MAT_REUSE_MATRIX) { 2154 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2155 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2156 local = &Mreuse; 2157 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2158 } else { 2159 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2160 Mreuse = *local; 2161 ierr = PetscFree(local);CHKERRQ(ierr); 2162 } 2163 2164 /* 2165 m - number of local rows 2166 n - number of columns (same on all processors) 2167 rstart - first row in new global matrix generated 2168 */ 2169 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2170 if (call == MAT_INITIAL_MATRIX) { 2171 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2172 ii = aij->i; 2173 jj = aij->j; 2174 2175 /* 2176 Determine the number of non-zeros in the diagonal and off-diagonal 2177 portions of the matrix in order to do correct preallocation 2178 */ 2179 2180 /* first get start and end of "diagonal" columns */ 2181 if (csize == PETSC_DECIDE) { 2182 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2183 if (mglobal == n) { /* square matrix */ 2184 nlocal = m; 2185 } else { 2186 nlocal = n/size + ((n % size) > rank); 2187 } 2188 } else { 2189 nlocal = csize; 2190 } 2191 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2192 rstart = rend - nlocal; 2193 if (rank == size - 1 && rend != n) { 2194 SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n); 2195 } 2196 2197 /* next, compute all the lengths */ 2198 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2199 olens = dlens + m; 2200 for (i=0; i<m; i++) { 2201 jend = ii[i+1] - ii[i]; 2202 olen = 0; 2203 dlen = 0; 2204 for (j=0; j<jend; j++) { 2205 if (*jj < rstart || *jj >= rend) olen++; 2206 else dlen++; 2207 jj++; 2208 } 2209 olens[i] = olen; 2210 dlens[i] = dlen; 2211 } 2212 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2213 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2214 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2215 ierr = PetscFree(dlens);CHKERRQ(ierr); 2216 } else { 2217 int ml,nl; 2218 2219 M = *newmat; 2220 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2221 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2222 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2223 /* 2224 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2225 rather than the slower MatSetValues(). 2226 */ 2227 M->was_assembled = PETSC_TRUE; 2228 M->assembled = PETSC_FALSE; 2229 } 2230 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2231 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2232 ii = aij->i; 2233 jj = aij->j; 2234 aa = aij->a; 2235 for (i=0; i<m; i++) { 2236 row = rstart + i; 2237 nz = ii[i+1] - ii[i]; 2238 cwork = jj; jj += nz; 2239 vwork = aa; aa += nz; 2240 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2241 } 2242 2243 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2244 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2245 *newmat = M; 2246 2247 /* save submatrix used in processor for next request */ 2248 if (call == MAT_INITIAL_MATRIX) { 2249 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2250 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2251 } 2252 2253 PetscFunctionReturn(0); 2254 } 2255 2256 #undef __FUNCT__ 2257 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2258 /*@C 2259 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2260 (the default parallel PETSc format). For good matrix assembly performance 2261 the user should preallocate the matrix storage by setting the parameters 2262 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2263 performance can be increased by more than a factor of 50. 2264 2265 Collective on MPI_Comm 2266 2267 Input Parameters: 2268 + A - the matrix 2269 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2270 (same value is used for all local rows) 2271 . d_nnz - array containing the number of nonzeros in the various rows of the 2272 DIAGONAL portion of the local submatrix (possibly different for each row) 2273 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2274 The size of this array is equal to the number of local rows, i.e 'm'. 2275 You must leave room for the diagonal entry even if it is zero. 2276 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2277 submatrix (same value is used for all local rows). 2278 - o_nnz - array containing the number of nonzeros in the various rows of the 2279 OFF-DIAGONAL portion of the local submatrix (possibly different for 2280 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2281 structure. The size of this array is equal to the number 2282 of local rows, i.e 'm'. 2283 2284 The AIJ format (also called the Yale sparse matrix format or 2285 compressed row storage), is fully compatible with standard Fortran 77 2286 storage. That is, the stored row and column indices can begin at 2287 either one (as in Fortran) or zero. See the users manual for details. 2288 2289 The user MUST specify either the local or global matrix dimensions 2290 (possibly both). 2291 2292 The parallel matrix is partitioned such that the first m0 rows belong to 2293 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2294 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2295 2296 The DIAGONAL portion of the local submatrix of a processor can be defined 2297 as the submatrix which is obtained by extraction the part corresponding 2298 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2299 first row that belongs to the processor, and r2 is the last row belonging 2300 to the this processor. This is a square mxm matrix. The remaining portion 2301 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2302 2303 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2304 2305 By default, this format uses inodes (identical nodes) when possible. 2306 We search for consecutive rows with the same nonzero structure, thereby 2307 reusing matrix information to achieve increased efficiency. 2308 2309 Options Database Keys: 2310 + -mat_aij_no_inode - Do not use inodes 2311 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2312 - -mat_aij_oneindex - Internally use indexing starting at 1 2313 rather than 0. Note that when calling MatSetValues(), 2314 the user still MUST index entries starting at 0! 2315 2316 Example usage: 2317 2318 Consider the following 8x8 matrix with 34 non-zero values, that is 2319 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2320 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2321 as follows: 2322 2323 .vb 2324 1 2 0 | 0 3 0 | 0 4 2325 Proc0 0 5 6 | 7 0 0 | 8 0 2326 9 0 10 | 11 0 0 | 12 0 2327 ------------------------------------- 2328 13 0 14 | 15 16 17 | 0 0 2329 Proc1 0 18 0 | 19 20 21 | 0 0 2330 0 0 0 | 22 23 0 | 24 0 2331 ------------------------------------- 2332 Proc2 25 26 27 | 0 0 28 | 29 0 2333 30 0 0 | 31 32 33 | 0 34 2334 .ve 2335 2336 This can be represented as a collection of submatrices as: 2337 2338 .vb 2339 A B C 2340 D E F 2341 G H I 2342 .ve 2343 2344 Where the submatrices A,B,C are owned by proc0, D,E,F are 2345 owned by proc1, G,H,I are owned by proc2. 2346 2347 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2348 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2349 The 'M','N' parameters are 8,8, and have the same values on all procs. 2350 2351 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2352 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2353 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2354 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2355 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2356 matrix, ans [DF] as another SeqAIJ matrix. 2357 2358 When d_nz, o_nz parameters are specified, d_nz storage elements are 2359 allocated for every row of the local diagonal submatrix, and o_nz 2360 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2361 One way to choose d_nz and o_nz is to use the max nonzerors per local 2362 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2363 In this case, the values of d_nz,o_nz are: 2364 .vb 2365 proc0 : dnz = 2, o_nz = 2 2366 proc1 : dnz = 3, o_nz = 2 2367 proc2 : dnz = 1, o_nz = 4 2368 .ve 2369 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2370 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2371 for proc3. i.e we are using 12+15+10=37 storage locations to store 2372 34 values. 2373 2374 When d_nnz, o_nnz parameters are specified, the storage is specified 2375 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2376 In the above case the values for d_nnz,o_nnz are: 2377 .vb 2378 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2379 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2380 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2381 .ve 2382 Here the space allocated is sum of all the above values i.e 34, and 2383 hence pre-allocation is perfect. 2384 2385 Level: intermediate 2386 2387 .keywords: matrix, aij, compressed row, sparse, parallel 2388 2389 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2390 @*/ 2391 int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2392 { 2393 int ierr,(*f)(Mat,int,const int[],int,const int[]); 2394 2395 PetscFunctionBegin; 2396 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2397 if (f) { 2398 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2399 } 2400 PetscFunctionReturn(0); 2401 } 2402 2403 #undef __FUNCT__ 2404 #define __FUNCT__ "MatCreateMPIAIJ" 2405 /*@C 2406 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2407 (the default parallel PETSc format). For good matrix assembly performance 2408 the user should preallocate the matrix storage by setting the parameters 2409 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2410 performance can be increased by more than a factor of 50. 2411 2412 Collective on MPI_Comm 2413 2414 Input Parameters: 2415 + comm - MPI communicator 2416 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2417 This value should be the same as the local size used in creating the 2418 y vector for the matrix-vector product y = Ax. 2419 . n - This value should be the same as the local size used in creating the 2420 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2421 calculated if N is given) For square matrices n is almost always m. 2422 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2423 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2424 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2425 (same value is used for all local rows) 2426 . d_nnz - array containing the number of nonzeros in the various rows of the 2427 DIAGONAL portion of the local submatrix (possibly different for each row) 2428 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2429 The size of this array is equal to the number of local rows, i.e 'm'. 2430 You must leave room for the diagonal entry even if it is zero. 2431 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2432 submatrix (same value is used for all local rows). 2433 - o_nnz - array containing the number of nonzeros in the various rows of the 2434 OFF-DIAGONAL portion of the local submatrix (possibly different for 2435 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2436 structure. The size of this array is equal to the number 2437 of local rows, i.e 'm'. 2438 2439 Output Parameter: 2440 . A - the matrix 2441 2442 Notes: 2443 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2444 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2445 storage requirements for this matrix. 2446 2447 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2448 processor than it must be used on all processors that share the object for 2449 that argument. 2450 2451 The AIJ format (also called the Yale sparse matrix format or 2452 compressed row storage), is fully compatible with standard Fortran 77 2453 storage. That is, the stored row and column indices can begin at 2454 either one (as in Fortran) or zero. See the users manual for details. 2455 2456 The user MUST specify either the local or global matrix dimensions 2457 (possibly both). 2458 2459 The parallel matrix is partitioned such that the first m0 rows belong to 2460 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2461 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2462 2463 The DIAGONAL portion of the local submatrix of a processor can be defined 2464 as the submatrix which is obtained by extraction the part corresponding 2465 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2466 first row that belongs to the processor, and r2 is the last row belonging 2467 to the this processor. This is a square mxm matrix. The remaining portion 2468 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2469 2470 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2471 2472 When calling this routine with a single process communicator, a matrix of 2473 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2474 type of communicator, use the construction mechanism: 2475 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2476 2477 By default, this format uses inodes (identical nodes) when possible. 2478 We search for consecutive rows with the same nonzero structure, thereby 2479 reusing matrix information to achieve increased efficiency. 2480 2481 Options Database Keys: 2482 + -mat_aij_no_inode - Do not use inodes 2483 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2484 - -mat_aij_oneindex - Internally use indexing starting at 1 2485 rather than 0. Note that when calling MatSetValues(), 2486 the user still MUST index entries starting at 0! 2487 2488 2489 Example usage: 2490 2491 Consider the following 8x8 matrix with 34 non-zero values, that is 2492 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2493 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2494 as follows: 2495 2496 .vb 2497 1 2 0 | 0 3 0 | 0 4 2498 Proc0 0 5 6 | 7 0 0 | 8 0 2499 9 0 10 | 11 0 0 | 12 0 2500 ------------------------------------- 2501 13 0 14 | 15 16 17 | 0 0 2502 Proc1 0 18 0 | 19 20 21 | 0 0 2503 0 0 0 | 22 23 0 | 24 0 2504 ------------------------------------- 2505 Proc2 25 26 27 | 0 0 28 | 29 0 2506 30 0 0 | 31 32 33 | 0 34 2507 .ve 2508 2509 This can be represented as a collection of submatrices as: 2510 2511 .vb 2512 A B C 2513 D E F 2514 G H I 2515 .ve 2516 2517 Where the submatrices A,B,C are owned by proc0, D,E,F are 2518 owned by proc1, G,H,I are owned by proc2. 2519 2520 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2521 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2522 The 'M','N' parameters are 8,8, and have the same values on all procs. 2523 2524 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2525 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2526 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2527 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2528 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2529 matrix, ans [DF] as another SeqAIJ matrix. 2530 2531 When d_nz, o_nz parameters are specified, d_nz storage elements are 2532 allocated for every row of the local diagonal submatrix, and o_nz 2533 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2534 One way to choose d_nz and o_nz is to use the max nonzerors per local 2535 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2536 In this case, the values of d_nz,o_nz are: 2537 .vb 2538 proc0 : dnz = 2, o_nz = 2 2539 proc1 : dnz = 3, o_nz = 2 2540 proc2 : dnz = 1, o_nz = 4 2541 .ve 2542 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2543 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2544 for proc3. i.e we are using 12+15+10=37 storage locations to store 2545 34 values. 2546 2547 When d_nnz, o_nnz parameters are specified, the storage is specified 2548 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2549 In the above case the values for d_nnz,o_nnz are: 2550 .vb 2551 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2552 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2553 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2554 .ve 2555 Here the space allocated is sum of all the above values i.e 34, and 2556 hence pre-allocation is perfect. 2557 2558 Level: intermediate 2559 2560 .keywords: matrix, aij, compressed row, sparse, parallel 2561 2562 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2563 @*/ 2564 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2565 { 2566 int ierr,size; 2567 2568 PetscFunctionBegin; 2569 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2570 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2571 if (size > 1) { 2572 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2573 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2574 } else { 2575 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2576 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2577 } 2578 PetscFunctionReturn(0); 2579 } 2580 2581 #undef __FUNCT__ 2582 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2583 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2584 { 2585 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2586 PetscFunctionBegin; 2587 *Ad = a->A; 2588 *Ao = a->B; 2589 *colmap = a->garray; 2590 PetscFunctionReturn(0); 2591 } 2592 2593 #undef __FUNCT__ 2594 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2595 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2596 { 2597 int ierr,i; 2598 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2599 2600 PetscFunctionBegin; 2601 if (coloring->ctype == IS_COLORING_LOCAL) { 2602 ISColoringValue *allcolors,*colors; 2603 ISColoring ocoloring; 2604 2605 /* set coloring for diagonal portion */ 2606 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2607 2608 /* set coloring for off-diagonal portion */ 2609 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2610 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2611 for (i=0; i<a->B->n; i++) { 2612 colors[i] = allcolors[a->garray[i]]; 2613 } 2614 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2615 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2616 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2617 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2618 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2619 ISColoringValue *colors; 2620 int *larray; 2621 ISColoring ocoloring; 2622 2623 /* set coloring for diagonal portion */ 2624 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2625 for (i=0; i<a->A->n; i++) { 2626 larray[i] = i + a->cstart; 2627 } 2628 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2629 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2630 for (i=0; i<a->A->n; i++) { 2631 colors[i] = coloring->colors[larray[i]]; 2632 } 2633 ierr = PetscFree(larray);CHKERRQ(ierr); 2634 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2635 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2636 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2637 2638 /* set coloring for off-diagonal portion */ 2639 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2640 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2641 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2642 for (i=0; i<a->B->n; i++) { 2643 colors[i] = coloring->colors[larray[i]]; 2644 } 2645 ierr = PetscFree(larray);CHKERRQ(ierr); 2646 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2647 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2648 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2649 } else { 2650 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2651 } 2652 2653 PetscFunctionReturn(0); 2654 } 2655 2656 #undef __FUNCT__ 2657 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2658 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2659 { 2660 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2661 int ierr; 2662 2663 PetscFunctionBegin; 2664 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2665 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2666 PetscFunctionReturn(0); 2667 } 2668 2669 #undef __FUNCT__ 2670 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2671 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2672 { 2673 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2674 int ierr; 2675 2676 PetscFunctionBegin; 2677 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2678 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2679 PetscFunctionReturn(0); 2680 } 2681 2682 #undef __FUNCT__ 2683 #define __FUNCT__ "MatMerge" 2684 /*@C 2685 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2686 matrices from each processor 2687 2688 Collective on MPI_Comm 2689 2690 Input Parameters: 2691 + comm - the communicators the parallel matrix will live on 2692 . inmat - the input sequential matrices 2693 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2694 2695 Output Parameter: 2696 . outmat - the parallel matrix generated 2697 2698 Level: advanced 2699 2700 Notes: The number of columns of the matrix in EACH of the seperate files 2701 MUST be the same. 2702 2703 @*/ 2704 int MatMerge(MPI_Comm comm,Mat inmat,MatReuse scall,Mat *outmat) 2705 { 2706 int ierr,m,n,i,rstart,nnz,I,*dnz,*onz; 2707 const int *indx; 2708 const PetscScalar *values; 2709 PetscMap columnmap,rowmap; 2710 2711 PetscFunctionBegin; 2712 2713 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2714 if (scall == MAT_INITIAL_MATRIX){ 2715 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2716 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2717 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2718 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2719 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2720 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2721 2722 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2723 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2724 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2725 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2726 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2727 2728 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2729 for (i=0;i<m;i++) { 2730 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2731 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2732 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2733 } 2734 /* This routine will ONLY return MPIAIJ type matrix */ 2735 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2736 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2737 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2738 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2739 2740 } else if (scall == MAT_REUSE_MATRIX){ 2741 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2742 } else { 2743 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 2744 } 2745 2746 for (i=0;i<m;i++) { 2747 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2748 I = i + rstart; 2749 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2750 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2751 } 2752 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2753 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2754 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2755 #ifdef OLD 2756 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2757 2758 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2759 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2760 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2761 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2762 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2763 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2764 2765 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2766 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2767 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2768 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2769 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2770 2771 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2772 for (i=0;i<m;i++) { 2773 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2774 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2775 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2776 } 2777 /* This routine will ONLY return MPIAIJ type matrix */ 2778 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2779 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2780 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2781 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2782 2783 for (i=0;i<m;i++) { 2784 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2785 I = i + rstart; 2786 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2787 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2788 } 2789 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2790 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2791 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2792 #endif 2793 PetscFunctionReturn(0); 2794 } 2795 2796 #undef __FUNCT__ 2797 #define __FUNCT__ "MatFileSplit" 2798 int MatFileSplit(Mat A,char *outfile) 2799 { 2800 int ierr,rank,len,m,N,i,rstart,nnz; 2801 const int *indx; 2802 PetscViewer out; 2803 char *name; 2804 Mat B; 2805 const PetscScalar *values; 2806 2807 PetscFunctionBegin; 2808 2809 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2810 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2811 /* Should this be the type of the diagonal block of A? */ 2812 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2813 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2814 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2815 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2816 for (i=0;i<m;i++) { 2817 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2818 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2819 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2820 } 2821 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2822 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2823 2824 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2825 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2826 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2827 sprintf(name,"%s.%d",outfile,rank); 2828 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2829 ierr = PetscFree(name); 2830 ierr = MatView(B,out);CHKERRQ(ierr); 2831 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2832 ierr = MatDestroy(B);CHKERRQ(ierr); 2833 PetscFunctionReturn(0); 2834 } 2835