1 2 #include "src/mat/impls/aij/mpi/mpiaij.h" 3 #include "src/inline/spops.h" 4 5 /* 6 Local utility routine that creates a mapping from the global column 7 number to the local number in the off-diagonal part of the local 8 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 9 a slightly higher hash table cost; without it it is not scalable (each processor 10 has an order N integer array but is fast to acess. 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 14 int CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 17 int n = aij->B->n,i,ierr; 18 19 PetscFunctionBegin; 20 #if defined (PETSC_USE_CTABLE) 21 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 22 for (i=0; i<n; i++){ 23 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 24 } 25 #else 26 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 27 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 28 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 29 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 30 #endif 31 PetscFunctionReturn(0); 32 } 33 34 #define CHUNKSIZE 15 35 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 36 { \ 37 \ 38 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 39 rmax = aimax[row]; nrow = ailen[row]; \ 40 col1 = col - shift; \ 41 \ 42 low = 0; high = nrow; \ 43 while (high-low > 5) { \ 44 t = (low+high)/2; \ 45 if (rp[t] > col) high = t; \ 46 else low = t; \ 47 } \ 48 for (_i=low; _i<high; _i++) { \ 49 if (rp[_i] > col1) break; \ 50 if (rp[_i] == col1) { \ 51 if (addv == ADD_VALUES) ap[_i] += value; \ 52 else ap[_i] = value; \ 53 goto a_noinsert; \ 54 } \ 55 } \ 56 if (nonew == 1) goto a_noinsert; \ 57 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 58 if (nrow >= rmax) { \ 59 /* there is no extra room in row, therefore enlarge */ \ 60 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 61 PetscScalar *new_a; \ 62 \ 63 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 64 \ 65 /* malloc new storage space */ \ 66 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 67 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 68 new_j = (int*)(new_a + new_nz); \ 69 new_i = new_j + new_nz; \ 70 \ 71 /* copy over old data into new slots */ \ 72 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 73 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 74 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 75 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 76 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 77 len*sizeof(int));CHKERRQ(ierr); \ 78 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 79 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 80 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 81 /* free up old matrix storage */ \ 82 \ 83 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 84 if (!a->singlemalloc) { \ 85 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 86 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 87 } \ 88 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 89 a->singlemalloc = PETSC_TRUE; \ 90 \ 91 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 92 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 93 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 94 a->maxnz += CHUNKSIZE; \ 95 a->reallocs++; \ 96 } \ 97 N = nrow++ - 1; a->nz++; \ 98 /* shift up all the later entries in this row */ \ 99 for (ii=N; ii>=_i; ii--) { \ 100 rp[ii+1] = rp[ii]; \ 101 ap[ii+1] = ap[ii]; \ 102 } \ 103 rp[_i] = col1; \ 104 ap[_i] = value; \ 105 a_noinsert: ; \ 106 ailen[row] = nrow; \ 107 } 108 109 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 110 { \ 111 \ 112 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 113 rmax = bimax[row]; nrow = bilen[row]; \ 114 col1 = col - shift; \ 115 \ 116 low = 0; high = nrow; \ 117 while (high-low > 5) { \ 118 t = (low+high)/2; \ 119 if (rp[t] > col) high = t; \ 120 else low = t; \ 121 } \ 122 for (_i=low; _i<high; _i++) { \ 123 if (rp[_i] > col1) break; \ 124 if (rp[_i] == col1) { \ 125 if (addv == ADD_VALUES) ap[_i] += value; \ 126 else ap[_i] = value; \ 127 goto b_noinsert; \ 128 } \ 129 } \ 130 if (nonew == 1) goto b_noinsert; \ 131 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 132 if (nrow >= rmax) { \ 133 /* there is no extra room in row, therefore enlarge */ \ 134 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 135 PetscScalar *new_a; \ 136 \ 137 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 138 \ 139 /* malloc new storage space */ \ 140 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 141 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 142 new_j = (int*)(new_a + new_nz); \ 143 new_i = new_j + new_nz; \ 144 \ 145 /* copy over old data into new slots */ \ 146 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 147 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 148 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 149 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 150 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 151 len*sizeof(int));CHKERRQ(ierr); \ 152 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 153 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 154 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 155 /* free up old matrix storage */ \ 156 \ 157 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 158 if (!b->singlemalloc) { \ 159 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 160 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 161 } \ 162 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 163 b->singlemalloc = PETSC_TRUE; \ 164 \ 165 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 166 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 167 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 168 b->maxnz += CHUNKSIZE; \ 169 b->reallocs++; \ 170 } \ 171 N = nrow++ - 1; b->nz++; \ 172 /* shift up all the later entries in this row */ \ 173 for (ii=N; ii>=_i; ii--) { \ 174 rp[ii+1] = rp[ii]; \ 175 ap[ii+1] = ap[ii]; \ 176 } \ 177 rp[_i] = col1; \ 178 ap[_i] = value; \ 179 b_noinsert: ; \ 180 bilen[row] = nrow; \ 181 } 182 183 #undef __FUNCT__ 184 #define __FUNCT__ "MatSetValues_MPIAIJ" 185 int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscScalar value; 189 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 190 int cstart = aij->cstart,cend = aij->cend,row,col; 191 PetscTruth roworiented = aij->roworiented; 192 193 /* Some Variables required in the macro */ 194 Mat A = aij->A; 195 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 196 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 197 PetscScalar *aa = a->a; 198 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 199 Mat B = aij->B; 200 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 201 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 202 PetscScalar *ba = b->a; 203 204 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 205 int nonew = a->nonew,shift=0; 206 PetscScalar *ap; 207 208 PetscFunctionBegin; 209 for (i=0; i<m; i++) { 210 if (im[i] < 0) continue; 211 #if defined(PETSC_USE_BOPT_g) 212 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 213 #endif 214 if (im[i] >= rstart && im[i] < rend) { 215 row = im[i] - rstart; 216 for (j=0; j<n; j++) { 217 if (in[j] >= cstart && in[j] < cend){ 218 col = in[j] - cstart; 219 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 220 if (ignorezeroentries && value == 0.0) continue; 221 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 222 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 223 } else if (in[j] < 0) continue; 224 #if defined(PETSC_USE_BOPT_g) 225 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 226 #endif 227 else { 228 if (mat->was_assembled) { 229 if (!aij->colmap) { 230 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 231 } 232 #if defined (PETSC_USE_CTABLE) 233 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 234 col--; 235 #else 236 col = aij->colmap[in[j]] - 1; 237 #endif 238 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 239 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 240 col = in[j]; 241 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 242 B = aij->B; 243 b = (Mat_SeqAIJ*)B->data; 244 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 245 ba = b->a; 246 } 247 } else col = in[j]; 248 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 249 if (ignorezeroentries && value == 0.0) continue; 250 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 251 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 252 } 253 } 254 } else { 255 if (!aij->donotstash) { 256 if (roworiented) { 257 if (ignorezeroentries && v[i*n] == 0.0) continue; 258 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 259 } else { 260 if (ignorezeroentries && v[i] == 0.0) continue; 261 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 262 } 263 } 264 } 265 } 266 PetscFunctionReturn(0); 267 } 268 269 #undef __FUNCT__ 270 #define __FUNCT__ "MatGetValues_MPIAIJ" 271 int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 272 { 273 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 274 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 275 int cstart = aij->cstart,cend = aij->cend,row,col; 276 277 PetscFunctionBegin; 278 for (i=0; i<m; i++) { 279 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 280 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 281 if (idxm[i] >= rstart && idxm[i] < rend) { 282 row = idxm[i] - rstart; 283 for (j=0; j<n; j++) { 284 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 285 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 286 if (idxn[j] >= cstart && idxn[j] < cend){ 287 col = idxn[j] - cstart; 288 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 289 } else { 290 if (!aij->colmap) { 291 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 292 } 293 #if defined (PETSC_USE_CTABLE) 294 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 295 col --; 296 #else 297 col = aij->colmap[idxn[j]] - 1; 298 #endif 299 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 300 else { 301 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 302 } 303 } 304 } 305 } else { 306 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 307 } 308 } 309 PetscFunctionReturn(0); 310 } 311 312 #undef __FUNCT__ 313 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 314 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 315 { 316 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 317 int ierr,nstash,reallocs; 318 InsertMode addv; 319 320 PetscFunctionBegin; 321 if (aij->donotstash) { 322 PetscFunctionReturn(0); 323 } 324 325 /* make sure all processors are either in INSERTMODE or ADDMODE */ 326 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 327 if (addv == (ADD_VALUES|INSERT_VALUES)) { 328 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 329 } 330 mat->insertmode = addv; /* in case this processor had no cache */ 331 332 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 333 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 334 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 335 PetscFunctionReturn(0); 336 } 337 338 339 #undef __FUNCT__ 340 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 341 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 342 { 343 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 344 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 345 int i,j,rstart,ncols,n,ierr,flg; 346 int *row,*col,other_disassembled; 347 PetscScalar *val; 348 InsertMode addv = mat->insertmode; 349 350 PetscFunctionBegin; 351 if (!aij->donotstash) { 352 while (1) { 353 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 354 if (!flg) break; 355 356 for (i=0; i<n;) { 357 /* Now identify the consecutive vals belonging to the same row */ 358 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 359 if (j < n) ncols = j-i; 360 else ncols = n-i; 361 /* Now assemble all these values with a single function call */ 362 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 363 i = j; 364 } 365 } 366 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 367 } 368 369 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 370 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 371 372 /* determine if any processor has disassembled, if so we must 373 also disassemble ourselfs, in order that we may reassemble. */ 374 /* 375 if nonzero structure of submatrix B cannot change then we know that 376 no processor disassembled thus we can skip this stuff 377 */ 378 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 379 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 380 if (mat->was_assembled && !other_disassembled) { 381 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 382 } 383 } 384 385 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 386 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 387 } 388 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 389 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 390 391 if (aij->rowvalues) { 392 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 393 aij->rowvalues = 0; 394 } 395 396 /* used by MatAXPY() */ 397 a->xtoy = 0; b->xtoy = 0; 398 a->XtoY = 0; b->XtoY = 0; 399 400 PetscFunctionReturn(0); 401 } 402 403 #undef __FUNCT__ 404 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 405 int MatZeroEntries_MPIAIJ(Mat A) 406 { 407 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 408 int ierr; 409 410 PetscFunctionBegin; 411 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 412 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 413 PetscFunctionReturn(0); 414 } 415 416 #undef __FUNCT__ 417 #define __FUNCT__ "MatZeroRows_MPIAIJ" 418 int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 419 { 420 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 421 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 422 int *nprocs,j,idx,nsends,row; 423 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 424 int *rvalues,tag = A->tag,count,base,slen,n,*source; 425 int *lens,imdex,*lrows,*values,rstart=l->rstart; 426 MPI_Comm comm = A->comm; 427 MPI_Request *send_waits,*recv_waits; 428 MPI_Status recv_status,*send_status; 429 IS istmp; 430 PetscTruth found; 431 432 PetscFunctionBegin; 433 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 434 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 435 436 /* first count number of contributors to each processor */ 437 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 438 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 439 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 440 for (i=0; i<N; i++) { 441 idx = rows[i]; 442 found = PETSC_FALSE; 443 for (j=0; j<size; j++) { 444 if (idx >= owners[j] && idx < owners[j+1]) { 445 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 446 } 447 } 448 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 449 } 450 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 451 452 /* inform other processors of number of messages and max length*/ 453 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 454 455 /* post receives: */ 456 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 457 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 458 for (i=0; i<nrecvs; i++) { 459 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 460 } 461 462 /* do sends: 463 1) starts[i] gives the starting index in svalues for stuff going to 464 the ith processor 465 */ 466 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 467 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 468 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 469 starts[0] = 0; 470 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 471 for (i=0; i<N; i++) { 472 svalues[starts[owner[i]]++] = rows[i]; 473 } 474 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 475 476 starts[0] = 0; 477 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 478 count = 0; 479 for (i=0; i<size; i++) { 480 if (nprocs[2*i+1]) { 481 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 482 } 483 } 484 ierr = PetscFree(starts);CHKERRQ(ierr); 485 486 base = owners[rank]; 487 488 /* wait on receives */ 489 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 490 source = lens + nrecvs; 491 count = nrecvs; slen = 0; 492 while (count) { 493 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 494 /* unpack receives into our local space */ 495 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 496 source[imdex] = recv_status.MPI_SOURCE; 497 lens[imdex] = n; 498 slen += n; 499 count--; 500 } 501 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 502 503 /* move the data into the send scatter */ 504 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 505 count = 0; 506 for (i=0; i<nrecvs; i++) { 507 values = rvalues + i*nmax; 508 for (j=0; j<lens[i]; j++) { 509 lrows[count++] = values[j] - base; 510 } 511 } 512 ierr = PetscFree(rvalues);CHKERRQ(ierr); 513 ierr = PetscFree(lens);CHKERRQ(ierr); 514 ierr = PetscFree(owner);CHKERRQ(ierr); 515 ierr = PetscFree(nprocs);CHKERRQ(ierr); 516 517 /* actually zap the local rows */ 518 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 519 PetscLogObjectParent(A,istmp); 520 521 /* 522 Zero the required rows. If the "diagonal block" of the matrix 523 is square and the user wishes to set the diagonal we use seperate 524 code so that MatSetValues() is not called for each diagonal allocating 525 new memory, thus calling lots of mallocs and slowing things down. 526 527 Contributed by: Mathew Knepley 528 */ 529 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 530 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 531 if (diag && (l->A->M == l->A->N)) { 532 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 533 } else if (diag) { 534 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 535 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 536 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 537 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 538 } 539 for (i = 0; i < slen; i++) { 540 row = lrows[i] + rstart; 541 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 542 } 543 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 544 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 545 } else { 546 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 547 } 548 ierr = ISDestroy(istmp);CHKERRQ(ierr); 549 ierr = PetscFree(lrows);CHKERRQ(ierr); 550 551 /* wait on sends */ 552 if (nsends) { 553 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 554 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 555 ierr = PetscFree(send_status);CHKERRQ(ierr); 556 } 557 ierr = PetscFree(send_waits);CHKERRQ(ierr); 558 ierr = PetscFree(svalues);CHKERRQ(ierr); 559 560 PetscFunctionReturn(0); 561 } 562 563 #undef __FUNCT__ 564 #define __FUNCT__ "MatMult_MPIAIJ" 565 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 566 { 567 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 568 int ierr,nt; 569 570 PetscFunctionBegin; 571 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 572 if (nt != A->n) { 573 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 574 } 575 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 576 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 577 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 578 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatMultAdd_MPIAIJ" 584 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 585 { 586 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 587 int ierr; 588 589 PetscFunctionBegin; 590 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 591 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 592 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 593 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 594 PetscFunctionReturn(0); 595 } 596 597 #undef __FUNCT__ 598 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 599 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 600 { 601 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 602 int ierr; 603 604 PetscFunctionBegin; 605 /* do nondiagonal part */ 606 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 607 /* send it on its way */ 608 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 609 /* do local part */ 610 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 611 /* receive remote parts: note this assumes the values are not actually */ 612 /* inserted in yy until the next line, which is true for my implementation*/ 613 /* but is not perhaps always true. */ 614 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 615 PetscFunctionReturn(0); 616 } 617 618 EXTERN_C_BEGIN 619 #undef __FUNCT__ 620 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 621 int MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f) 622 { 623 MPI_Comm comm; 624 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 625 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 626 IS Me,Notme; 627 int M,N,first,last,*notme,ntids,i, ierr; 628 629 PetscFunctionBegin; 630 631 /* Easy test: symmetric diagonal block */ 632 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 633 ierr = MatIsTranspose(Adia,Bdia,f);CHKERRQ(ierr); 634 if (!*f) PetscFunctionReturn(0); 635 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 636 ierr = MPI_Comm_size(comm,&ntids);CHKERRQ(ierr); 637 if (ntids==1) PetscFunctionReturn(0); 638 639 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 640 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 641 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 642 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me);CHKERRQ(ierr); 643 for (i=0; i<first; i++) notme[i] = i; 644 for (i=last; i<M; i++) notme[i-last+first] = i; 645 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 646 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 647 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 648 Aoff = Aoffs[0]; 649 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 650 Boff = Boffs[0]; 651 ierr = MatIsTranspose(Aoff,Boff,f);CHKERRQ(ierr); 652 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 653 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 654 ierr = ISDestroy(Me);CHKERRQ(ierr); 655 ierr = ISDestroy(Notme);CHKERRQ(ierr); 656 657 PetscFunctionReturn(0); 658 } 659 EXTERN_C_END 660 661 #undef __FUNCT__ 662 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 663 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 664 { 665 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 666 int ierr; 667 668 PetscFunctionBegin; 669 /* do nondiagonal part */ 670 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 671 /* send it on its way */ 672 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 673 /* do local part */ 674 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 675 /* receive remote parts: note this assumes the values are not actually */ 676 /* inserted in yy until the next line, which is true for my implementation*/ 677 /* but is not perhaps always true. */ 678 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 679 PetscFunctionReturn(0); 680 } 681 682 /* 683 This only works correctly for square matrices where the subblock A->A is the 684 diagonal block 685 */ 686 #undef __FUNCT__ 687 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 688 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 689 { 690 int ierr; 691 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 692 693 PetscFunctionBegin; 694 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 695 if (a->rstart != a->cstart || a->rend != a->cend) { 696 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 697 } 698 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 699 PetscFunctionReturn(0); 700 } 701 702 #undef __FUNCT__ 703 #define __FUNCT__ "MatScale_MPIAIJ" 704 int MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 705 { 706 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 707 int ierr; 708 709 PetscFunctionBegin; 710 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 711 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 712 PetscFunctionReturn(0); 713 } 714 715 #undef __FUNCT__ 716 #define __FUNCT__ "MatDestroy_MPIAIJ" 717 int MatDestroy_MPIAIJ(Mat mat) 718 { 719 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 720 int ierr; 721 722 PetscFunctionBegin; 723 #if defined(PETSC_USE_LOG) 724 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 725 #endif 726 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 727 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 728 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 729 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 730 #if defined (PETSC_USE_CTABLE) 731 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 732 #else 733 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 734 #endif 735 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 736 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 737 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 738 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 739 ierr = PetscFree(aij);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatView_MPIAIJ_Binary" 745 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 746 { 747 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 748 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 749 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 750 int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 751 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 752 PetscScalar *column_values; 753 754 PetscFunctionBegin; 755 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 756 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 757 nz = A->nz + B->nz; 758 if (rank == 0) { 759 header[0] = MAT_FILE_COOKIE; 760 header[1] = mat->M; 761 header[2] = mat->N; 762 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 763 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 764 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 765 /* get largest number of rows any processor has */ 766 rlen = mat->m; 767 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 768 for (i=1; i<size; i++) { 769 rlen = PetscMax(rlen,range[i+1] - range[i]); 770 } 771 } else { 772 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 773 rlen = mat->m; 774 } 775 776 /* load up the local row counts */ 777 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 778 for (i=0; i<mat->m; i++) { 779 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 780 } 781 782 /* store the row lengths to the file */ 783 if (rank == 0) { 784 MPI_Status status; 785 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 786 for (i=1; i<size; i++) { 787 rlen = range[i+1] - range[i]; 788 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 789 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 790 } 791 } else { 792 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 793 } 794 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 795 796 /* load up the local column indices */ 797 nzmax = nz; /* )th processor needs space a largest processor needs */ 798 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 799 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 800 cnt = 0; 801 for (i=0; i<mat->m; i++) { 802 for (j=B->i[i]; j<B->i[i+1]; j++) { 803 if ( (col = garray[B->j[j]]) > cstart) break; 804 column_indices[cnt++] = col; 805 } 806 for (k=A->i[i]; k<A->i[i+1]; k++) { 807 column_indices[cnt++] = A->j[k] + cstart; 808 } 809 for (; j<B->i[i+1]; j++) { 810 column_indices[cnt++] = garray[B->j[j]]; 811 } 812 } 813 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 814 815 /* store the column indices to the file */ 816 if (rank == 0) { 817 MPI_Status status; 818 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 819 for (i=1; i<size; i++) { 820 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 821 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 822 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 823 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 824 } 825 } else { 826 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 827 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 828 } 829 ierr = PetscFree(column_indices);CHKERRQ(ierr); 830 831 /* load up the local column values */ 832 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 833 cnt = 0; 834 for (i=0; i<mat->m; i++) { 835 for (j=B->i[i]; j<B->i[i+1]; j++) { 836 if ( garray[B->j[j]] > cstart) break; 837 column_values[cnt++] = B->a[j]; 838 } 839 for (k=A->i[i]; k<A->i[i+1]; k++) { 840 column_values[cnt++] = A->a[k]; 841 } 842 for (; j<B->i[i+1]; j++) { 843 column_values[cnt++] = B->a[j]; 844 } 845 } 846 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 847 848 /* store the column values to the file */ 849 if (rank == 0) { 850 MPI_Status status; 851 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 852 for (i=1; i<size; i++) { 853 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 854 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 855 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 856 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 857 } 858 } else { 859 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 860 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 861 } 862 ierr = PetscFree(column_values);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 868 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 869 { 870 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 871 int ierr,rank = aij->rank,size = aij->size; 872 PetscTruth isdraw,isascii,flg,isbinary; 873 PetscViewer sviewer; 874 PetscViewerFormat format; 875 876 PetscFunctionBegin; 877 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 878 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 880 if (isascii) { 881 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 882 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 883 MatInfo info; 884 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 885 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 886 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 887 if (flg) { 888 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 889 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 890 } else { 891 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 892 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 893 } 894 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 895 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 896 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 897 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 898 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 899 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 900 PetscFunctionReturn(0); 901 } else if (format == PETSC_VIEWER_ASCII_INFO) { 902 PetscFunctionReturn(0); 903 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 904 PetscFunctionReturn(0); 905 } 906 } else if (isbinary) { 907 if (size == 1) { 908 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 909 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 910 } else { 911 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 912 } 913 PetscFunctionReturn(0); 914 } else if (isdraw) { 915 PetscDraw draw; 916 PetscTruth isnull; 917 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 918 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 919 } 920 921 if (size == 1) { 922 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 923 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 924 } else { 925 /* assemble the entire matrix onto first processor. */ 926 Mat A; 927 Mat_SeqAIJ *Aloc; 928 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 929 PetscScalar *a; 930 931 if (!rank) { 932 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 933 } else { 934 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 935 } 936 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 937 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 938 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 939 PetscLogObjectParent(mat,A); 940 941 /* copy over the A part */ 942 Aloc = (Mat_SeqAIJ*)aij->A->data; 943 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 944 row = aij->rstart; 945 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 946 for (i=0; i<m; i++) { 947 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 948 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 949 } 950 aj = Aloc->j; 951 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 952 953 /* copy over the B part */ 954 Aloc = (Mat_SeqAIJ*)aij->B->data; 955 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 956 row = aij->rstart; 957 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 958 ct = cols; 959 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 960 for (i=0; i<m; i++) { 961 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 962 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 963 } 964 ierr = PetscFree(ct);CHKERRQ(ierr); 965 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 966 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 967 /* 968 Everyone has to call to draw the matrix since the graphics waits are 969 synchronized across all processors that share the PetscDraw object 970 */ 971 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 972 if (!rank) { 973 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 974 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 975 } 976 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 977 ierr = MatDestroy(A);CHKERRQ(ierr); 978 } 979 PetscFunctionReturn(0); 980 } 981 982 #undef __FUNCT__ 983 #define __FUNCT__ "MatView_MPIAIJ" 984 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 985 { 986 int ierr; 987 PetscTruth isascii,isdraw,issocket,isbinary; 988 989 PetscFunctionBegin; 990 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 991 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 992 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 993 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 994 if (isascii || isdraw || isbinary || issocket) { 995 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 996 } else { 997 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 998 } 999 PetscFunctionReturn(0); 1000 } 1001 1002 1003 1004 #undef __FUNCT__ 1005 #define __FUNCT__ "MatRelax_MPIAIJ" 1006 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1007 { 1008 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1009 int ierr; 1010 Vec bb1; 1011 PetscScalar mone=-1.0; 1012 1013 PetscFunctionBegin; 1014 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1015 1016 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1017 1018 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1019 if (flag & SOR_ZERO_INITIAL_GUESS) { 1020 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1021 its--; 1022 } 1023 1024 while (its--) { 1025 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1026 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1027 1028 /* update rhs: bb1 = bb - B*x */ 1029 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1030 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1031 1032 /* local sweep */ 1033 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1034 CHKERRQ(ierr); 1035 } 1036 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1037 if (flag & SOR_ZERO_INITIAL_GUESS) { 1038 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1039 its--; 1040 } 1041 while (its--) { 1042 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1043 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1044 1045 /* update rhs: bb1 = bb - B*x */ 1046 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1047 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1048 1049 /* local sweep */ 1050 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1051 CHKERRQ(ierr); 1052 } 1053 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1054 if (flag & SOR_ZERO_INITIAL_GUESS) { 1055 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1056 its--; 1057 } 1058 while (its--) { 1059 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1060 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1061 1062 /* update rhs: bb1 = bb - B*x */ 1063 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1064 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1065 1066 /* local sweep */ 1067 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1068 CHKERRQ(ierr); 1069 } 1070 } else { 1071 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1072 } 1073 1074 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1080 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1081 { 1082 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1083 Mat A = mat->A,B = mat->B; 1084 int ierr; 1085 PetscReal isend[5],irecv[5]; 1086 1087 PetscFunctionBegin; 1088 info->block_size = 1.0; 1089 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1090 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1091 isend[3] = info->memory; isend[4] = info->mallocs; 1092 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1093 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1094 isend[3] += info->memory; isend[4] += info->mallocs; 1095 if (flag == MAT_LOCAL) { 1096 info->nz_used = isend[0]; 1097 info->nz_allocated = isend[1]; 1098 info->nz_unneeded = isend[2]; 1099 info->memory = isend[3]; 1100 info->mallocs = isend[4]; 1101 } else if (flag == MAT_GLOBAL_MAX) { 1102 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1103 info->nz_used = irecv[0]; 1104 info->nz_allocated = irecv[1]; 1105 info->nz_unneeded = irecv[2]; 1106 info->memory = irecv[3]; 1107 info->mallocs = irecv[4]; 1108 } else if (flag == MAT_GLOBAL_SUM) { 1109 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1110 info->nz_used = irecv[0]; 1111 info->nz_allocated = irecv[1]; 1112 info->nz_unneeded = irecv[2]; 1113 info->memory = irecv[3]; 1114 info->mallocs = irecv[4]; 1115 } 1116 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1117 info->fill_ratio_needed = 0; 1118 info->factor_mallocs = 0; 1119 info->rows_global = (double)matin->M; 1120 info->columns_global = (double)matin->N; 1121 info->rows_local = (double)matin->m; 1122 info->columns_local = (double)matin->N; 1123 1124 PetscFunctionReturn(0); 1125 } 1126 1127 #undef __FUNCT__ 1128 #define __FUNCT__ "MatSetOption_MPIAIJ" 1129 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1130 { 1131 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1132 int ierr; 1133 1134 PetscFunctionBegin; 1135 switch (op) { 1136 case MAT_NO_NEW_NONZERO_LOCATIONS: 1137 case MAT_YES_NEW_NONZERO_LOCATIONS: 1138 case MAT_COLUMNS_UNSORTED: 1139 case MAT_COLUMNS_SORTED: 1140 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1141 case MAT_KEEP_ZEROED_ROWS: 1142 case MAT_NEW_NONZERO_LOCATION_ERR: 1143 case MAT_USE_INODES: 1144 case MAT_DO_NOT_USE_INODES: 1145 case MAT_IGNORE_ZERO_ENTRIES: 1146 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1147 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1148 break; 1149 case MAT_ROW_ORIENTED: 1150 a->roworiented = PETSC_TRUE; 1151 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1152 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1153 break; 1154 case MAT_ROWS_SORTED: 1155 case MAT_ROWS_UNSORTED: 1156 case MAT_YES_NEW_DIAGONALS: 1157 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1158 break; 1159 case MAT_COLUMN_ORIENTED: 1160 a->roworiented = PETSC_FALSE; 1161 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1162 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1163 break; 1164 case MAT_IGNORE_OFF_PROC_ENTRIES: 1165 a->donotstash = PETSC_TRUE; 1166 break; 1167 case MAT_NO_NEW_DIAGONALS: 1168 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1169 case MAT_SYMMETRIC: 1170 case MAT_STRUCTURALLY_SYMMETRIC: 1171 case MAT_HERMITIAN: 1172 case MAT_SYMMETRY_ETERNAL: 1173 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1174 break; 1175 case MAT_NOT_SYMMETRIC: 1176 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1177 case MAT_NOT_HERMITIAN: 1178 case MAT_NOT_SYMMETRY_ETERNAL: 1179 break; 1180 default: 1181 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1182 } 1183 PetscFunctionReturn(0); 1184 } 1185 1186 #undef __FUNCT__ 1187 #define __FUNCT__ "MatGetRow_MPIAIJ" 1188 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1189 { 1190 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1191 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1192 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1193 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1194 int *cmap,*idx_p; 1195 1196 PetscFunctionBegin; 1197 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1198 mat->getrowactive = PETSC_TRUE; 1199 1200 if (!mat->rowvalues && (idx || v)) { 1201 /* 1202 allocate enough space to hold information from the longest row. 1203 */ 1204 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1205 int max = 1,tmp; 1206 for (i=0; i<matin->m; i++) { 1207 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1208 if (max < tmp) { max = tmp; } 1209 } 1210 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1211 mat->rowindices = (int*)(mat->rowvalues + max); 1212 } 1213 1214 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1215 lrow = row - rstart; 1216 1217 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1218 if (!v) {pvA = 0; pvB = 0;} 1219 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1220 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1221 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1222 nztot = nzA + nzB; 1223 1224 cmap = mat->garray; 1225 if (v || idx) { 1226 if (nztot) { 1227 /* Sort by increasing column numbers, assuming A and B already sorted */ 1228 int imark = -1; 1229 if (v) { 1230 *v = v_p = mat->rowvalues; 1231 for (i=0; i<nzB; i++) { 1232 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1233 else break; 1234 } 1235 imark = i; 1236 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1237 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1238 } 1239 if (idx) { 1240 *idx = idx_p = mat->rowindices; 1241 if (imark > -1) { 1242 for (i=0; i<imark; i++) { 1243 idx_p[i] = cmap[cworkB[i]]; 1244 } 1245 } else { 1246 for (i=0; i<nzB; i++) { 1247 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1248 else break; 1249 } 1250 imark = i; 1251 } 1252 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1253 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1254 } 1255 } else { 1256 if (idx) *idx = 0; 1257 if (v) *v = 0; 1258 } 1259 } 1260 *nz = nztot; 1261 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1262 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1263 PetscFunctionReturn(0); 1264 } 1265 1266 #undef __FUNCT__ 1267 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1268 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1269 { 1270 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1271 1272 PetscFunctionBegin; 1273 if (aij->getrowactive == PETSC_FALSE) { 1274 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1275 } 1276 aij->getrowactive = PETSC_FALSE; 1277 PetscFunctionReturn(0); 1278 } 1279 1280 #undef __FUNCT__ 1281 #define __FUNCT__ "MatNorm_MPIAIJ" 1282 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1283 { 1284 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1285 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1286 int ierr,i,j,cstart = aij->cstart; 1287 PetscReal sum = 0.0; 1288 PetscScalar *v; 1289 1290 PetscFunctionBegin; 1291 if (aij->size == 1) { 1292 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1293 } else { 1294 if (type == NORM_FROBENIUS) { 1295 v = amat->a; 1296 for (i=0; i<amat->nz; i++) { 1297 #if defined(PETSC_USE_COMPLEX) 1298 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1299 #else 1300 sum += (*v)*(*v); v++; 1301 #endif 1302 } 1303 v = bmat->a; 1304 for (i=0; i<bmat->nz; i++) { 1305 #if defined(PETSC_USE_COMPLEX) 1306 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1307 #else 1308 sum += (*v)*(*v); v++; 1309 #endif 1310 } 1311 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1312 *norm = sqrt(*norm); 1313 } else if (type == NORM_1) { /* max column norm */ 1314 PetscReal *tmp,*tmp2; 1315 int *jj,*garray = aij->garray; 1316 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1317 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1318 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1319 *norm = 0.0; 1320 v = amat->a; jj = amat->j; 1321 for (j=0; j<amat->nz; j++) { 1322 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1323 } 1324 v = bmat->a; jj = bmat->j; 1325 for (j=0; j<bmat->nz; j++) { 1326 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1327 } 1328 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1329 for (j=0; j<mat->N; j++) { 1330 if (tmp2[j] > *norm) *norm = tmp2[j]; 1331 } 1332 ierr = PetscFree(tmp);CHKERRQ(ierr); 1333 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1334 } else if (type == NORM_INFINITY) { /* max row norm */ 1335 PetscReal ntemp = 0.0; 1336 for (j=0; j<aij->A->m; j++) { 1337 v = amat->a + amat->i[j]; 1338 sum = 0.0; 1339 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1340 sum += PetscAbsScalar(*v); v++; 1341 } 1342 v = bmat->a + bmat->i[j]; 1343 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1344 sum += PetscAbsScalar(*v); v++; 1345 } 1346 if (sum > ntemp) ntemp = sum; 1347 } 1348 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1349 } else { 1350 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1351 } 1352 } 1353 PetscFunctionReturn(0); 1354 } 1355 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "MatTranspose_MPIAIJ" 1358 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1359 { 1360 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1361 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1362 int ierr; 1363 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1364 Mat B; 1365 PetscScalar *array; 1366 1367 PetscFunctionBegin; 1368 if (!matout && M != N) { 1369 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1370 } 1371 1372 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1373 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1374 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1375 1376 /* copy over the A part */ 1377 Aloc = (Mat_SeqAIJ*)a->A->data; 1378 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1379 row = a->rstart; 1380 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1381 for (i=0; i<m; i++) { 1382 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1383 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1384 } 1385 aj = Aloc->j; 1386 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1387 1388 /* copy over the B part */ 1389 Aloc = (Mat_SeqAIJ*)a->B->data; 1390 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1391 row = a->rstart; 1392 ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr); 1393 ct = cols; 1394 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1395 for (i=0; i<m; i++) { 1396 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1397 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1398 } 1399 ierr = PetscFree(ct);CHKERRQ(ierr); 1400 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1401 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1402 if (matout) { 1403 *matout = B; 1404 } else { 1405 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1406 } 1407 PetscFunctionReturn(0); 1408 } 1409 1410 #undef __FUNCT__ 1411 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1412 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1413 { 1414 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1415 Mat a = aij->A,b = aij->B; 1416 int ierr,s1,s2,s3; 1417 1418 PetscFunctionBegin; 1419 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1420 if (rr) { 1421 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1422 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1423 /* Overlap communication with computation. */ 1424 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1425 } 1426 if (ll) { 1427 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1428 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1429 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1430 } 1431 /* scale the diagonal block */ 1432 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1433 1434 if (rr) { 1435 /* Do a scatter end and then right scale the off-diagonal block */ 1436 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1437 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1438 } 1439 1440 PetscFunctionReturn(0); 1441 } 1442 1443 1444 #undef __FUNCT__ 1445 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1446 int MatPrintHelp_MPIAIJ(Mat A) 1447 { 1448 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1449 int ierr; 1450 1451 PetscFunctionBegin; 1452 if (!a->rank) { 1453 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1454 } 1455 PetscFunctionReturn(0); 1456 } 1457 1458 #undef __FUNCT__ 1459 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1460 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1461 { 1462 PetscFunctionBegin; 1463 *bs = 1; 1464 PetscFunctionReturn(0); 1465 } 1466 #undef __FUNCT__ 1467 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1468 int MatSetUnfactored_MPIAIJ(Mat A) 1469 { 1470 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1471 int ierr; 1472 1473 PetscFunctionBegin; 1474 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1475 PetscFunctionReturn(0); 1476 } 1477 1478 #undef __FUNCT__ 1479 #define __FUNCT__ "MatEqual_MPIAIJ" 1480 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1481 { 1482 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1483 Mat a,b,c,d; 1484 PetscTruth flg; 1485 int ierr; 1486 1487 PetscFunctionBegin; 1488 a = matA->A; b = matA->B; 1489 c = matB->A; d = matB->B; 1490 1491 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1492 if (flg == PETSC_TRUE) { 1493 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1494 } 1495 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1496 PetscFunctionReturn(0); 1497 } 1498 1499 #undef __FUNCT__ 1500 #define __FUNCT__ "MatCopy_MPIAIJ" 1501 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1502 { 1503 int ierr; 1504 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1505 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1506 1507 PetscFunctionBegin; 1508 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1509 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1510 /* because of the column compression in the off-processor part of the matrix a->B, 1511 the number of columns in a->B and b->B may be different, hence we cannot call 1512 the MatCopy() directly on the two parts. If need be, we can provide a more 1513 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1514 then copying the submatrices */ 1515 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1516 } else { 1517 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1518 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1519 } 1520 PetscFunctionReturn(0); 1521 } 1522 1523 #undef __FUNCT__ 1524 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1525 int MatSetUpPreallocation_MPIAIJ(Mat A) 1526 { 1527 int ierr; 1528 1529 PetscFunctionBegin; 1530 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1531 PetscFunctionReturn(0); 1532 } 1533 1534 #include "petscblaslapack.h" 1535 #undef __FUNCT__ 1536 #define __FUNCT__ "MatAXPY_MPIAIJ" 1537 int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1538 { 1539 int ierr,one=1,i; 1540 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1541 Mat_SeqAIJ *x,*y; 1542 1543 PetscFunctionBegin; 1544 if (str == SAME_NONZERO_PATTERN) { 1545 x = (Mat_SeqAIJ *)xx->A->data; 1546 y = (Mat_SeqAIJ *)yy->A->data; 1547 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1548 x = (Mat_SeqAIJ *)xx->B->data; 1549 y = (Mat_SeqAIJ *)yy->B->data; 1550 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1551 } else if (str == SUBSET_NONZERO_PATTERN) { 1552 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1553 1554 x = (Mat_SeqAIJ *)xx->B->data; 1555 y = (Mat_SeqAIJ *)yy->B->data; 1556 if (y->xtoy && y->XtoY != xx->B) { 1557 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1558 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1559 } 1560 if (!y->xtoy) { /* get xtoy */ 1561 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1562 y->XtoY = xx->B; 1563 } 1564 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1565 } else { 1566 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1567 } 1568 PetscFunctionReturn(0); 1569 } 1570 1571 /* -------------------------------------------------------------------*/ 1572 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1573 MatGetRow_MPIAIJ, 1574 MatRestoreRow_MPIAIJ, 1575 MatMult_MPIAIJ, 1576 /* 4*/ MatMultAdd_MPIAIJ, 1577 MatMultTranspose_MPIAIJ, 1578 MatMultTransposeAdd_MPIAIJ, 1579 0, 1580 0, 1581 0, 1582 /*10*/ 0, 1583 0, 1584 0, 1585 MatRelax_MPIAIJ, 1586 MatTranspose_MPIAIJ, 1587 /*15*/ MatGetInfo_MPIAIJ, 1588 MatEqual_MPIAIJ, 1589 MatGetDiagonal_MPIAIJ, 1590 MatDiagonalScale_MPIAIJ, 1591 MatNorm_MPIAIJ, 1592 /*20*/ MatAssemblyBegin_MPIAIJ, 1593 MatAssemblyEnd_MPIAIJ, 1594 0, 1595 MatSetOption_MPIAIJ, 1596 MatZeroEntries_MPIAIJ, 1597 /*25*/ MatZeroRows_MPIAIJ, 1598 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1599 MatLUFactorSymbolic_MPIAIJ_TFS, 1600 #else 1601 0, 1602 #endif 1603 0, 1604 0, 1605 0, 1606 /*30*/ MatSetUpPreallocation_MPIAIJ, 1607 0, 1608 0, 1609 0, 1610 0, 1611 /*35*/ MatDuplicate_MPIAIJ, 1612 0, 1613 0, 1614 0, 1615 0, 1616 /*40*/ MatAXPY_MPIAIJ, 1617 MatGetSubMatrices_MPIAIJ, 1618 MatIncreaseOverlap_MPIAIJ, 1619 MatGetValues_MPIAIJ, 1620 MatCopy_MPIAIJ, 1621 /*45*/ MatPrintHelp_MPIAIJ, 1622 MatScale_MPIAIJ, 1623 0, 1624 0, 1625 0, 1626 /*50*/ MatGetBlockSize_MPIAIJ, 1627 0, 1628 0, 1629 0, 1630 0, 1631 /*55*/ MatFDColoringCreate_MPIAIJ, 1632 0, 1633 MatSetUnfactored_MPIAIJ, 1634 0, 1635 0, 1636 /*60*/ MatGetSubMatrix_MPIAIJ, 1637 MatDestroy_MPIAIJ, 1638 MatView_MPIAIJ, 1639 MatGetPetscMaps_Petsc, 1640 0, 1641 /*65*/ 0, 1642 0, 1643 0, 1644 0, 1645 0, 1646 /*70*/ 0, 1647 0, 1648 MatSetColoring_MPIAIJ, 1649 MatSetValuesAdic_MPIAIJ, 1650 MatSetValuesAdifor_MPIAIJ, 1651 /*75*/ 0, 1652 0, 1653 0, 1654 0, 1655 0, 1656 /*80*/ 0, 1657 0, 1658 0, 1659 0, 1660 /*85*/ MatLoad_MPIAIJ}; 1661 1662 /* ----------------------------------------------------------------------------------------*/ 1663 1664 EXTERN_C_BEGIN 1665 #undef __FUNCT__ 1666 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1667 int MatStoreValues_MPIAIJ(Mat mat) 1668 { 1669 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1670 int ierr; 1671 1672 PetscFunctionBegin; 1673 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1674 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1675 PetscFunctionReturn(0); 1676 } 1677 EXTERN_C_END 1678 1679 EXTERN_C_BEGIN 1680 #undef __FUNCT__ 1681 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1682 int MatRetrieveValues_MPIAIJ(Mat mat) 1683 { 1684 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1685 int ierr; 1686 1687 PetscFunctionBegin; 1688 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1689 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1690 PetscFunctionReturn(0); 1691 } 1692 EXTERN_C_END 1693 1694 #include "petscpc.h" 1695 EXTERN_C_BEGIN 1696 #undef __FUNCT__ 1697 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1698 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1699 { 1700 Mat_MPIAIJ *b; 1701 int ierr,i; 1702 1703 PetscFunctionBegin; 1704 B->preallocated = PETSC_TRUE; 1705 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1706 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1707 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1708 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1709 if (d_nnz) { 1710 for (i=0; i<B->m; i++) { 1711 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1712 } 1713 } 1714 if (o_nnz) { 1715 for (i=0; i<B->m; i++) { 1716 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1717 } 1718 } 1719 b = (Mat_MPIAIJ*)B->data; 1720 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1721 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1722 1723 PetscFunctionReturn(0); 1724 } 1725 EXTERN_C_END 1726 1727 /*MC 1728 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 1729 1730 Options Database Keys: 1731 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 1732 1733 Level: beginner 1734 1735 .seealso: MatCreateMPIAIJ 1736 M*/ 1737 1738 EXTERN_C_BEGIN 1739 #undef __FUNCT__ 1740 #define __FUNCT__ "MatCreate_MPIAIJ" 1741 int MatCreate_MPIAIJ(Mat B) 1742 { 1743 Mat_MPIAIJ *b; 1744 int ierr,i,size; 1745 1746 PetscFunctionBegin; 1747 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1748 1749 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1750 B->data = (void*)b; 1751 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1752 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1753 B->factor = 0; 1754 B->assembled = PETSC_FALSE; 1755 B->mapping = 0; 1756 1757 B->insertmode = NOT_SET_VALUES; 1758 b->size = size; 1759 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1760 1761 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1762 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1763 1764 /* the information in the maps duplicates the information computed below, eventually 1765 we should remove the duplicate information that is not contained in the maps */ 1766 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1767 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1768 1769 /* build local table of row and column ownerships */ 1770 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1771 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1772 b->cowners = b->rowners + b->size + 2; 1773 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1774 b->rowners[0] = 0; 1775 for (i=2; i<=b->size; i++) { 1776 b->rowners[i] += b->rowners[i-1]; 1777 } 1778 b->rstart = b->rowners[b->rank]; 1779 b->rend = b->rowners[b->rank+1]; 1780 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1781 b->cowners[0] = 0; 1782 for (i=2; i<=b->size; i++) { 1783 b->cowners[i] += b->cowners[i-1]; 1784 } 1785 b->cstart = b->cowners[b->rank]; 1786 b->cend = b->cowners[b->rank+1]; 1787 1788 /* build cache for off array entries formed */ 1789 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1790 b->donotstash = PETSC_FALSE; 1791 b->colmap = 0; 1792 b->garray = 0; 1793 b->roworiented = PETSC_TRUE; 1794 1795 /* stuff used for matrix vector multiply */ 1796 b->lvec = PETSC_NULL; 1797 b->Mvctx = PETSC_NULL; 1798 1799 /* stuff for MatGetRow() */ 1800 b->rowindices = 0; 1801 b->rowvalues = 0; 1802 b->getrowactive = PETSC_FALSE; 1803 1804 /* Explicitly create 2 MATSEQAIJ matrices. */ 1805 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 1806 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 1807 PetscLogObjectParent(B,b->A); 1808 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 1809 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 1810 PetscLogObjectParent(B,b->B); 1811 1812 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1813 "MatStoreValues_MPIAIJ", 1814 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1815 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1816 "MatRetrieveValues_MPIAIJ", 1817 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1818 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1819 "MatGetDiagonalBlock_MPIAIJ", 1820 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1821 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 1822 "MatIsTranspose_MPIAIJ", 1823 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 1824 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 1825 "MatMPIAIJSetPreallocation_MPIAIJ", 1826 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 1827 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 1828 "MatDiagonalScaleLocal_MPIAIJ", 1829 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 1830 PetscFunctionReturn(0); 1831 } 1832 EXTERN_C_END 1833 1834 /*MC 1835 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1836 1837 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1838 and MATMPIAIJ otherwise. As a result, for single process communicators, 1839 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 1840 for communicators controlling multiple processes. It is recommended that you call both of 1841 the above preallocation routines for simplicity. 1842 1843 Options Database Keys: 1844 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 1845 1846 Level: beginner 1847 1848 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ 1849 M*/ 1850 1851 EXTERN_C_BEGIN 1852 #undef __FUNCT__ 1853 #define __FUNCT__ "MatCreate_AIJ" 1854 int MatCreate_AIJ(Mat A) { 1855 int ierr,size; 1856 1857 PetscFunctionBegin; 1858 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr); 1859 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1860 if (size == 1) { 1861 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1862 } else { 1863 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1864 } 1865 PetscFunctionReturn(0); 1866 } 1867 EXTERN_C_END 1868 1869 #undef __FUNCT__ 1870 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1871 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1872 { 1873 Mat mat; 1874 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1875 int ierr; 1876 1877 PetscFunctionBegin; 1878 *newmat = 0; 1879 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1880 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1881 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1882 a = (Mat_MPIAIJ*)mat->data; 1883 1884 mat->factor = matin->factor; 1885 mat->assembled = PETSC_TRUE; 1886 mat->insertmode = NOT_SET_VALUES; 1887 mat->preallocated = PETSC_TRUE; 1888 1889 a->rstart = oldmat->rstart; 1890 a->rend = oldmat->rend; 1891 a->cstart = oldmat->cstart; 1892 a->cend = oldmat->cend; 1893 a->size = oldmat->size; 1894 a->rank = oldmat->rank; 1895 a->donotstash = oldmat->donotstash; 1896 a->roworiented = oldmat->roworiented; 1897 a->rowindices = 0; 1898 a->rowvalues = 0; 1899 a->getrowactive = PETSC_FALSE; 1900 1901 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1902 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1903 if (oldmat->colmap) { 1904 #if defined (PETSC_USE_CTABLE) 1905 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1906 #else 1907 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1908 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1909 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1910 #endif 1911 } else a->colmap = 0; 1912 if (oldmat->garray) { 1913 int len; 1914 len = oldmat->B->n; 1915 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1916 PetscLogObjectMemory(mat,len*sizeof(int)); 1917 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1918 } else a->garray = 0; 1919 1920 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1921 PetscLogObjectParent(mat,a->lvec); 1922 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1923 PetscLogObjectParent(mat,a->Mvctx); 1924 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1925 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1926 PetscLogObjectParent(mat,a->A); 1927 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1928 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1929 PetscLogObjectParent(mat,a->B); 1930 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1931 *newmat = mat; 1932 PetscFunctionReturn(0); 1933 } 1934 1935 #include "petscsys.h" 1936 1937 #undef __FUNCT__ 1938 #define __FUNCT__ "MatLoad_MPIAIJ" 1939 int MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1940 { 1941 Mat A; 1942 PetscScalar *vals,*svals; 1943 MPI_Comm comm = ((PetscObject)viewer)->comm; 1944 MPI_Status status; 1945 int i,nz,ierr,j,rstart,rend,fd; 1946 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1947 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1948 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1949 1950 PetscFunctionBegin; 1951 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1952 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1953 if (!rank) { 1954 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1955 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1956 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1957 if (header[3] < 0) { 1958 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1959 } 1960 } 1961 1962 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1963 M = header[1]; N = header[2]; 1964 /* determine ownership of all rows */ 1965 m = M/size + ((M % size) > rank); 1966 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1967 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1968 rowners[0] = 0; 1969 for (i=2; i<=size; i++) { 1970 rowners[i] += rowners[i-1]; 1971 } 1972 rstart = rowners[rank]; 1973 rend = rowners[rank+1]; 1974 1975 /* distribute row lengths to all processors */ 1976 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1977 offlens = ourlens + (rend-rstart); 1978 if (!rank) { 1979 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1980 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1981 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1982 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1983 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1984 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1985 } else { 1986 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1987 } 1988 1989 if (!rank) { 1990 /* calculate the number of nonzeros on each processor */ 1991 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1992 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1993 for (i=0; i<size; i++) { 1994 for (j=rowners[i]; j< rowners[i+1]; j++) { 1995 procsnz[i] += rowlengths[j]; 1996 } 1997 } 1998 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1999 2000 /* determine max buffer needed and allocate it */ 2001 maxnz = 0; 2002 for (i=0; i<size; i++) { 2003 maxnz = PetscMax(maxnz,procsnz[i]); 2004 } 2005 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2006 2007 /* read in my part of the matrix column indices */ 2008 nz = procsnz[0]; 2009 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 2010 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2011 2012 /* read in every one elses and ship off */ 2013 for (i=1; i<size; i++) { 2014 nz = procsnz[i]; 2015 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2016 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2017 } 2018 ierr = PetscFree(cols);CHKERRQ(ierr); 2019 } else { 2020 /* determine buffer space needed for message */ 2021 nz = 0; 2022 for (i=0; i<m; i++) { 2023 nz += ourlens[i]; 2024 } 2025 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 2026 2027 /* receive message of column indices*/ 2028 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2029 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2030 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2031 } 2032 2033 /* determine column ownership if matrix is not square */ 2034 if (N != M) { 2035 n = N/size + ((N % size) > rank); 2036 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2037 cstart = cend - n; 2038 } else { 2039 cstart = rstart; 2040 cend = rend; 2041 n = cend - cstart; 2042 } 2043 2044 /* loop over local rows, determining number of off diagonal entries */ 2045 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2046 jj = 0; 2047 for (i=0; i<m; i++) { 2048 for (j=0; j<ourlens[i]; j++) { 2049 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2050 jj++; 2051 } 2052 } 2053 2054 /* create our matrix */ 2055 for (i=0; i<m; i++) { 2056 ourlens[i] -= offlens[i]; 2057 } 2058 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2059 ierr = MatSetType(A,type);CHKERRQ(ierr); 2060 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2061 2062 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2063 for (i=0; i<m; i++) { 2064 ourlens[i] += offlens[i]; 2065 } 2066 2067 if (!rank) { 2068 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2069 2070 /* read in my part of the matrix numerical values */ 2071 nz = procsnz[0]; 2072 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2073 2074 /* insert into matrix */ 2075 jj = rstart; 2076 smycols = mycols; 2077 svals = vals; 2078 for (i=0; i<m; i++) { 2079 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2080 smycols += ourlens[i]; 2081 svals += ourlens[i]; 2082 jj++; 2083 } 2084 2085 /* read in other processors and ship out */ 2086 for (i=1; i<size; i++) { 2087 nz = procsnz[i]; 2088 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2089 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2090 } 2091 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2092 } else { 2093 /* receive numeric values */ 2094 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2095 2096 /* receive message of values*/ 2097 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2098 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2099 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2100 2101 /* insert into matrix */ 2102 jj = rstart; 2103 smycols = mycols; 2104 svals = vals; 2105 for (i=0; i<m; i++) { 2106 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2107 smycols += ourlens[i]; 2108 svals += ourlens[i]; 2109 jj++; 2110 } 2111 } 2112 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2113 ierr = PetscFree(vals);CHKERRQ(ierr); 2114 ierr = PetscFree(mycols);CHKERRQ(ierr); 2115 ierr = PetscFree(rowners);CHKERRQ(ierr); 2116 2117 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2118 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2119 *newmat = A; 2120 PetscFunctionReturn(0); 2121 } 2122 2123 #undef __FUNCT__ 2124 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2125 /* 2126 Not great since it makes two copies of the submatrix, first an SeqAIJ 2127 in local and then by concatenating the local matrices the end result. 2128 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2129 */ 2130 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2131 { 2132 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2133 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2134 Mat *local,M,Mreuse; 2135 PetscScalar *vwork,*aa; 2136 MPI_Comm comm = mat->comm; 2137 Mat_SeqAIJ *aij; 2138 2139 2140 PetscFunctionBegin; 2141 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2142 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2143 2144 if (call == MAT_REUSE_MATRIX) { 2145 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2146 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2147 local = &Mreuse; 2148 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2149 } else { 2150 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2151 Mreuse = *local; 2152 ierr = PetscFree(local);CHKERRQ(ierr); 2153 } 2154 2155 /* 2156 m - number of local rows 2157 n - number of columns (same on all processors) 2158 rstart - first row in new global matrix generated 2159 */ 2160 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2161 if (call == MAT_INITIAL_MATRIX) { 2162 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2163 ii = aij->i; 2164 jj = aij->j; 2165 2166 /* 2167 Determine the number of non-zeros in the diagonal and off-diagonal 2168 portions of the matrix in order to do correct preallocation 2169 */ 2170 2171 /* first get start and end of "diagonal" columns */ 2172 if (csize == PETSC_DECIDE) { 2173 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2174 if (mglobal == n) { /* square matrix */ 2175 nlocal = m; 2176 } else { 2177 nlocal = n/size + ((n % size) > rank); 2178 } 2179 } else { 2180 nlocal = csize; 2181 } 2182 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2183 rstart = rend - nlocal; 2184 if (rank == size - 1 && rend != n) { 2185 SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n); 2186 } 2187 2188 /* next, compute all the lengths */ 2189 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2190 olens = dlens + m; 2191 for (i=0; i<m; i++) { 2192 jend = ii[i+1] - ii[i]; 2193 olen = 0; 2194 dlen = 0; 2195 for (j=0; j<jend; j++) { 2196 if (*jj < rstart || *jj >= rend) olen++; 2197 else dlen++; 2198 jj++; 2199 } 2200 olens[i] = olen; 2201 dlens[i] = dlen; 2202 } 2203 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2204 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2205 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2206 ierr = PetscFree(dlens);CHKERRQ(ierr); 2207 } else { 2208 int ml,nl; 2209 2210 M = *newmat; 2211 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2212 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2213 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2214 /* 2215 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2216 rather than the slower MatSetValues(). 2217 */ 2218 M->was_assembled = PETSC_TRUE; 2219 M->assembled = PETSC_FALSE; 2220 } 2221 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2222 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2223 ii = aij->i; 2224 jj = aij->j; 2225 aa = aij->a; 2226 for (i=0; i<m; i++) { 2227 row = rstart + i; 2228 nz = ii[i+1] - ii[i]; 2229 cwork = jj; jj += nz; 2230 vwork = aa; aa += nz; 2231 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2232 } 2233 2234 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2235 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2236 *newmat = M; 2237 2238 /* save submatrix used in processor for next request */ 2239 if (call == MAT_INITIAL_MATRIX) { 2240 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2241 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2242 } 2243 2244 PetscFunctionReturn(0); 2245 } 2246 2247 #undef __FUNCT__ 2248 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2249 /*@C 2250 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2251 (the default parallel PETSc format). For good matrix assembly performance 2252 the user should preallocate the matrix storage by setting the parameters 2253 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2254 performance can be increased by more than a factor of 50. 2255 2256 Collective on MPI_Comm 2257 2258 Input Parameters: 2259 + A - the matrix 2260 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2261 (same value is used for all local rows) 2262 . d_nnz - array containing the number of nonzeros in the various rows of the 2263 DIAGONAL portion of the local submatrix (possibly different for each row) 2264 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2265 The size of this array is equal to the number of local rows, i.e 'm'. 2266 You must leave room for the diagonal entry even if it is zero. 2267 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2268 submatrix (same value is used for all local rows). 2269 - o_nnz - array containing the number of nonzeros in the various rows of the 2270 OFF-DIAGONAL portion of the local submatrix (possibly different for 2271 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2272 structure. The size of this array is equal to the number 2273 of local rows, i.e 'm'. 2274 2275 The AIJ format (also called the Yale sparse matrix format or 2276 compressed row storage), is fully compatible with standard Fortran 77 2277 storage. That is, the stored row and column indices can begin at 2278 either one (as in Fortran) or zero. See the users manual for details. 2279 2280 The user MUST specify either the local or global matrix dimensions 2281 (possibly both). 2282 2283 The parallel matrix is partitioned such that the first m0 rows belong to 2284 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2285 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2286 2287 The DIAGONAL portion of the local submatrix of a processor can be defined 2288 as the submatrix which is obtained by extraction the part corresponding 2289 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2290 first row that belongs to the processor, and r2 is the last row belonging 2291 to the this processor. This is a square mxm matrix. The remaining portion 2292 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2293 2294 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2295 2296 By default, this format uses inodes (identical nodes) when possible. 2297 We search for consecutive rows with the same nonzero structure, thereby 2298 reusing matrix information to achieve increased efficiency. 2299 2300 Options Database Keys: 2301 + -mat_aij_no_inode - Do not use inodes 2302 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2303 - -mat_aij_oneindex - Internally use indexing starting at 1 2304 rather than 0. Note that when calling MatSetValues(), 2305 the user still MUST index entries starting at 0! 2306 2307 Example usage: 2308 2309 Consider the following 8x8 matrix with 34 non-zero values, that is 2310 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2311 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2312 as follows: 2313 2314 .vb 2315 1 2 0 | 0 3 0 | 0 4 2316 Proc0 0 5 6 | 7 0 0 | 8 0 2317 9 0 10 | 11 0 0 | 12 0 2318 ------------------------------------- 2319 13 0 14 | 15 16 17 | 0 0 2320 Proc1 0 18 0 | 19 20 21 | 0 0 2321 0 0 0 | 22 23 0 | 24 0 2322 ------------------------------------- 2323 Proc2 25 26 27 | 0 0 28 | 29 0 2324 30 0 0 | 31 32 33 | 0 34 2325 .ve 2326 2327 This can be represented as a collection of submatrices as: 2328 2329 .vb 2330 A B C 2331 D E F 2332 G H I 2333 .ve 2334 2335 Where the submatrices A,B,C are owned by proc0, D,E,F are 2336 owned by proc1, G,H,I are owned by proc2. 2337 2338 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2339 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2340 The 'M','N' parameters are 8,8, and have the same values on all procs. 2341 2342 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2343 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2344 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2345 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2346 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2347 matrix, ans [DF] as another SeqAIJ matrix. 2348 2349 When d_nz, o_nz parameters are specified, d_nz storage elements are 2350 allocated for every row of the local diagonal submatrix, and o_nz 2351 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2352 One way to choose d_nz and o_nz is to use the max nonzerors per local 2353 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2354 In this case, the values of d_nz,o_nz are: 2355 .vb 2356 proc0 : dnz = 2, o_nz = 2 2357 proc1 : dnz = 3, o_nz = 2 2358 proc2 : dnz = 1, o_nz = 4 2359 .ve 2360 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2361 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2362 for proc3. i.e we are using 12+15+10=37 storage locations to store 2363 34 values. 2364 2365 When d_nnz, o_nnz parameters are specified, the storage is specified 2366 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2367 In the above case the values for d_nnz,o_nnz are: 2368 .vb 2369 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2370 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2371 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2372 .ve 2373 Here the space allocated is sum of all the above values i.e 34, and 2374 hence pre-allocation is perfect. 2375 2376 Level: intermediate 2377 2378 .keywords: matrix, aij, compressed row, sparse, parallel 2379 2380 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2381 @*/ 2382 int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2383 { 2384 int ierr,(*f)(Mat,int,const int[],int,const int[]); 2385 2386 PetscFunctionBegin; 2387 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2388 if (f) { 2389 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2390 } 2391 PetscFunctionReturn(0); 2392 } 2393 2394 #undef __FUNCT__ 2395 #define __FUNCT__ "MatCreateMPIAIJ" 2396 /*@C 2397 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2398 (the default parallel PETSc format). For good matrix assembly performance 2399 the user should preallocate the matrix storage by setting the parameters 2400 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2401 performance can be increased by more than a factor of 50. 2402 2403 Collective on MPI_Comm 2404 2405 Input Parameters: 2406 + comm - MPI communicator 2407 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2408 This value should be the same as the local size used in creating the 2409 y vector for the matrix-vector product y = Ax. 2410 . n - This value should be the same as the local size used in creating the 2411 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2412 calculated if N is given) For square matrices n is almost always m. 2413 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2414 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2415 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2416 (same value is used for all local rows) 2417 . d_nnz - array containing the number of nonzeros in the various rows of the 2418 DIAGONAL portion of the local submatrix (possibly different for each row) 2419 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2420 The size of this array is equal to the number of local rows, i.e 'm'. 2421 You must leave room for the diagonal entry even if it is zero. 2422 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2423 submatrix (same value is used for all local rows). 2424 - o_nnz - array containing the number of nonzeros in the various rows of the 2425 OFF-DIAGONAL portion of the local submatrix (possibly different for 2426 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2427 structure. The size of this array is equal to the number 2428 of local rows, i.e 'm'. 2429 2430 Output Parameter: 2431 . A - the matrix 2432 2433 Notes: 2434 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2435 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2436 storage requirements for this matrix. 2437 2438 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2439 processor than it must be used on all processors that share the object for 2440 that argument. 2441 2442 The AIJ format (also called the Yale sparse matrix format or 2443 compressed row storage), is fully compatible with standard Fortran 77 2444 storage. That is, the stored row and column indices can begin at 2445 either one (as in Fortran) or zero. See the users manual for details. 2446 2447 The user MUST specify either the local or global matrix dimensions 2448 (possibly both). 2449 2450 The parallel matrix is partitioned such that the first m0 rows belong to 2451 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2452 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2453 2454 The DIAGONAL portion of the local submatrix of a processor can be defined 2455 as the submatrix which is obtained by extraction the part corresponding 2456 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2457 first row that belongs to the processor, and r2 is the last row belonging 2458 to the this processor. This is a square mxm matrix. The remaining portion 2459 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2460 2461 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2462 2463 When calling this routine with a single process communicator, a matrix of 2464 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2465 type of communicator, use the construction mechanism: 2466 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2467 2468 By default, this format uses inodes (identical nodes) when possible. 2469 We search for consecutive rows with the same nonzero structure, thereby 2470 reusing matrix information to achieve increased efficiency. 2471 2472 Options Database Keys: 2473 + -mat_aij_no_inode - Do not use inodes 2474 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2475 - -mat_aij_oneindex - Internally use indexing starting at 1 2476 rather than 0. Note that when calling MatSetValues(), 2477 the user still MUST index entries starting at 0! 2478 2479 2480 Example usage: 2481 2482 Consider the following 8x8 matrix with 34 non-zero values, that is 2483 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2484 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2485 as follows: 2486 2487 .vb 2488 1 2 0 | 0 3 0 | 0 4 2489 Proc0 0 5 6 | 7 0 0 | 8 0 2490 9 0 10 | 11 0 0 | 12 0 2491 ------------------------------------- 2492 13 0 14 | 15 16 17 | 0 0 2493 Proc1 0 18 0 | 19 20 21 | 0 0 2494 0 0 0 | 22 23 0 | 24 0 2495 ------------------------------------- 2496 Proc2 25 26 27 | 0 0 28 | 29 0 2497 30 0 0 | 31 32 33 | 0 34 2498 .ve 2499 2500 This can be represented as a collection of submatrices as: 2501 2502 .vb 2503 A B C 2504 D E F 2505 G H I 2506 .ve 2507 2508 Where the submatrices A,B,C are owned by proc0, D,E,F are 2509 owned by proc1, G,H,I are owned by proc2. 2510 2511 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2512 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2513 The 'M','N' parameters are 8,8, and have the same values on all procs. 2514 2515 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2516 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2517 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2518 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2519 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2520 matrix, ans [DF] as another SeqAIJ matrix. 2521 2522 When d_nz, o_nz parameters are specified, d_nz storage elements are 2523 allocated for every row of the local diagonal submatrix, and o_nz 2524 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2525 One way to choose d_nz and o_nz is to use the max nonzerors per local 2526 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2527 In this case, the values of d_nz,o_nz are: 2528 .vb 2529 proc0 : dnz = 2, o_nz = 2 2530 proc1 : dnz = 3, o_nz = 2 2531 proc2 : dnz = 1, o_nz = 4 2532 .ve 2533 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2534 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2535 for proc3. i.e we are using 12+15+10=37 storage locations to store 2536 34 values. 2537 2538 When d_nnz, o_nnz parameters are specified, the storage is specified 2539 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2540 In the above case the values for d_nnz,o_nnz are: 2541 .vb 2542 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2543 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2544 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2545 .ve 2546 Here the space allocated is sum of all the above values i.e 34, and 2547 hence pre-allocation is perfect. 2548 2549 Level: intermediate 2550 2551 .keywords: matrix, aij, compressed row, sparse, parallel 2552 2553 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2554 @*/ 2555 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2556 { 2557 int ierr,size; 2558 2559 PetscFunctionBegin; 2560 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2561 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2562 if (size > 1) { 2563 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2564 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2565 } else { 2566 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2567 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2568 } 2569 PetscFunctionReturn(0); 2570 } 2571 2572 #undef __FUNCT__ 2573 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2574 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2575 { 2576 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2577 PetscFunctionBegin; 2578 *Ad = a->A; 2579 *Ao = a->B; 2580 *colmap = a->garray; 2581 PetscFunctionReturn(0); 2582 } 2583 2584 #undef __FUNCT__ 2585 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2586 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2587 { 2588 int ierr,i; 2589 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2590 2591 PetscFunctionBegin; 2592 if (coloring->ctype == IS_COLORING_LOCAL) { 2593 ISColoringValue *allcolors,*colors; 2594 ISColoring ocoloring; 2595 2596 /* set coloring for diagonal portion */ 2597 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2598 2599 /* set coloring for off-diagonal portion */ 2600 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2601 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2602 for (i=0; i<a->B->n; i++) { 2603 colors[i] = allcolors[a->garray[i]]; 2604 } 2605 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2606 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2607 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2608 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2609 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2610 ISColoringValue *colors; 2611 int *larray; 2612 ISColoring ocoloring; 2613 2614 /* set coloring for diagonal portion */ 2615 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2616 for (i=0; i<a->A->n; i++) { 2617 larray[i] = i + a->cstart; 2618 } 2619 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2620 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2621 for (i=0; i<a->A->n; i++) { 2622 colors[i] = coloring->colors[larray[i]]; 2623 } 2624 ierr = PetscFree(larray);CHKERRQ(ierr); 2625 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2626 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2627 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2628 2629 /* set coloring for off-diagonal portion */ 2630 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2631 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2632 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2633 for (i=0; i<a->B->n; i++) { 2634 colors[i] = coloring->colors[larray[i]]; 2635 } 2636 ierr = PetscFree(larray);CHKERRQ(ierr); 2637 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2638 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2639 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2640 } else { 2641 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2642 } 2643 2644 PetscFunctionReturn(0); 2645 } 2646 2647 #undef __FUNCT__ 2648 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2649 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2650 { 2651 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2652 int ierr; 2653 2654 PetscFunctionBegin; 2655 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2656 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2657 PetscFunctionReturn(0); 2658 } 2659 2660 #undef __FUNCT__ 2661 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2662 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2663 { 2664 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2665 int ierr; 2666 2667 PetscFunctionBegin; 2668 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2669 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2670 PetscFunctionReturn(0); 2671 } 2672 2673 #undef __FUNCT__ 2674 #define __FUNCT__ "MatMerge" 2675 /*@C 2676 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2677 matrices from each processor 2678 2679 Collective on MPI_Comm 2680 2681 Input Parameters: 2682 + comm - the communicators the parallel matrix will live on 2683 - inmat - the input sequential matrices 2684 2685 Output Parameter: 2686 . outmat - the parallel matrix generated 2687 2688 Level: advanced 2689 2690 Notes: The number of columns of the matrix in EACH of the seperate files 2691 MUST be the same. 2692 2693 @*/ 2694 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat) 2695 { 2696 int ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz; 2697 PetscScalar *values; 2698 PetscMap columnmap,rowmap; 2699 2700 PetscFunctionBegin; 2701 2702 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2703 2704 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2705 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2706 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2707 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2708 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2709 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2710 2711 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2712 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2713 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2714 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2715 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2716 2717 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2718 for (i=0;i<m;i++) { 2719 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2720 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2721 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2722 } 2723 /* This routine will ONLY return MPIAIJ type matrix */ 2724 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2725 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2726 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2727 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2728 2729 for (i=0;i<m;i++) { 2730 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2731 I = i + rstart; 2732 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2733 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2734 } 2735 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2736 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2737 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2738 2739 PetscFunctionReturn(0); 2740 } 2741 2742 #undef __FUNCT__ 2743 #define __FUNCT__ "MatFileSplit" 2744 int MatFileSplit(Mat A,char *outfile) 2745 { 2746 int ierr,rank,len,m,N,i,rstart,*indx,nnz; 2747 PetscViewer out; 2748 char *name; 2749 Mat B; 2750 PetscScalar *values; 2751 2752 PetscFunctionBegin; 2753 2754 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2755 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2756 /* Should this be the type of the diagonal block of A? */ 2757 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2758 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2759 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2760 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2761 for (i=0;i<m;i++) { 2762 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2763 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2764 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2765 } 2766 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2767 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2768 2769 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2770 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2771 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2772 sprintf(name,"%s.%d",outfile,rank); 2773 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2774 ierr = PetscFree(name); 2775 ierr = MatView(B,out);CHKERRQ(ierr); 2776 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2777 ierr = MatDestroy(B);CHKERRQ(ierr); 2778 PetscFunctionReturn(0); 2779 } 2780