1 2 #include "src/mat/impls/aij/mpi/mpiaij.h" 3 #include "src/inline/spops.h" 4 5 /* 6 Local utility routine that creates a mapping from the global column 7 number to the local number in the off-diagonal part of the local 8 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 9 a slightly higher hash table cost; without it it is not scalable (each processor 10 has an order N integer array but is fast to acess. 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 14 int CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 17 int n = aij->B->n,i,ierr; 18 19 PetscFunctionBegin; 20 #if defined (PETSC_USE_CTABLE) 21 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 22 for (i=0; i<n; i++){ 23 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 24 } 25 #else 26 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 27 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 28 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 29 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 30 #endif 31 PetscFunctionReturn(0); 32 } 33 34 #define CHUNKSIZE 15 35 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 36 { \ 37 \ 38 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 39 rmax = aimax[row]; nrow = ailen[row]; \ 40 col1 = col - shift; \ 41 \ 42 low = 0; high = nrow; \ 43 while (high-low > 5) { \ 44 t = (low+high)/2; \ 45 if (rp[t] > col) high = t; \ 46 else low = t; \ 47 } \ 48 for (_i=low; _i<high; _i++) { \ 49 if (rp[_i] > col1) break; \ 50 if (rp[_i] == col1) { \ 51 if (addv == ADD_VALUES) ap[_i] += value; \ 52 else ap[_i] = value; \ 53 goto a_noinsert; \ 54 } \ 55 } \ 56 if (nonew == 1) goto a_noinsert; \ 57 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 58 if (nrow >= rmax) { \ 59 /* there is no extra room in row, therefore enlarge */ \ 60 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 61 PetscScalar *new_a; \ 62 \ 63 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 64 \ 65 /* malloc new storage space */ \ 66 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 67 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 68 new_j = (int*)(new_a + new_nz); \ 69 new_i = new_j + new_nz; \ 70 \ 71 /* copy over old data into new slots */ \ 72 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 73 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 74 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 75 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 76 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 77 len*sizeof(int));CHKERRQ(ierr); \ 78 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 79 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 80 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 81 /* free up old matrix storage */ \ 82 \ 83 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 84 if (!a->singlemalloc) { \ 85 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 86 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 87 } \ 88 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 89 a->singlemalloc = PETSC_TRUE; \ 90 \ 91 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 92 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 93 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 94 a->maxnz += CHUNKSIZE; \ 95 a->reallocs++; \ 96 } \ 97 N = nrow++ - 1; a->nz++; \ 98 /* shift up all the later entries in this row */ \ 99 for (ii=N; ii>=_i; ii--) { \ 100 rp[ii+1] = rp[ii]; \ 101 ap[ii+1] = ap[ii]; \ 102 } \ 103 rp[_i] = col1; \ 104 ap[_i] = value; \ 105 a_noinsert: ; \ 106 ailen[row] = nrow; \ 107 } 108 109 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 110 { \ 111 \ 112 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 113 rmax = bimax[row]; nrow = bilen[row]; \ 114 col1 = col - shift; \ 115 \ 116 low = 0; high = nrow; \ 117 while (high-low > 5) { \ 118 t = (low+high)/2; \ 119 if (rp[t] > col) high = t; \ 120 else low = t; \ 121 } \ 122 for (_i=low; _i<high; _i++) { \ 123 if (rp[_i] > col1) break; \ 124 if (rp[_i] == col1) { \ 125 if (addv == ADD_VALUES) ap[_i] += value; \ 126 else ap[_i] = value; \ 127 goto b_noinsert; \ 128 } \ 129 } \ 130 if (nonew == 1) goto b_noinsert; \ 131 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 132 if (nrow >= rmax) { \ 133 /* there is no extra room in row, therefore enlarge */ \ 134 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 135 PetscScalar *new_a; \ 136 \ 137 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 138 \ 139 /* malloc new storage space */ \ 140 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 141 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 142 new_j = (int*)(new_a + new_nz); \ 143 new_i = new_j + new_nz; \ 144 \ 145 /* copy over old data into new slots */ \ 146 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 147 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 148 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 149 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 150 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 151 len*sizeof(int));CHKERRQ(ierr); \ 152 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 153 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 154 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 155 /* free up old matrix storage */ \ 156 \ 157 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 158 if (!b->singlemalloc) { \ 159 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 160 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 161 } \ 162 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 163 b->singlemalloc = PETSC_TRUE; \ 164 \ 165 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 166 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 167 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 168 b->maxnz += CHUNKSIZE; \ 169 b->reallocs++; \ 170 } \ 171 N = nrow++ - 1; b->nz++; \ 172 /* shift up all the later entries in this row */ \ 173 for (ii=N; ii>=_i; ii--) { \ 174 rp[ii+1] = rp[ii]; \ 175 ap[ii+1] = ap[ii]; \ 176 } \ 177 rp[_i] = col1; \ 178 ap[_i] = value; \ 179 b_noinsert: ; \ 180 bilen[row] = nrow; \ 181 } 182 183 #undef __FUNCT__ 184 #define __FUNCT__ "MatSetValues_MPIAIJ" 185 int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscScalar value; 189 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 190 int cstart = aij->cstart,cend = aij->cend,row,col; 191 PetscTruth roworiented = aij->roworiented; 192 193 /* Some Variables required in the macro */ 194 Mat A = aij->A; 195 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 196 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 197 PetscScalar *aa = a->a; 198 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 199 Mat B = aij->B; 200 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 201 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 202 PetscScalar *ba = b->a; 203 204 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 205 int nonew = a->nonew,shift=0; 206 PetscScalar *ap; 207 208 PetscFunctionBegin; 209 for (i=0; i<m; i++) { 210 if (im[i] < 0) continue; 211 #if defined(PETSC_USE_BOPT_g) 212 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 213 #endif 214 if (im[i] >= rstart && im[i] < rend) { 215 row = im[i] - rstart; 216 for (j=0; j<n; j++) { 217 if (in[j] >= cstart && in[j] < cend){ 218 col = in[j] - cstart; 219 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 220 if (ignorezeroentries && value == 0.0) continue; 221 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 222 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 223 } else if (in[j] < 0) continue; 224 #if defined(PETSC_USE_BOPT_g) 225 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 226 #endif 227 else { 228 if (mat->was_assembled) { 229 if (!aij->colmap) { 230 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 231 } 232 #if defined (PETSC_USE_CTABLE) 233 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 234 col--; 235 #else 236 col = aij->colmap[in[j]] - 1; 237 #endif 238 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 239 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 240 col = in[j]; 241 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 242 B = aij->B; 243 b = (Mat_SeqAIJ*)B->data; 244 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 245 ba = b->a; 246 } 247 } else col = in[j]; 248 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 249 if (ignorezeroentries && value == 0.0) continue; 250 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 251 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 252 } 253 } 254 } else { 255 if (!aij->donotstash) { 256 if (roworiented) { 257 if (ignorezeroentries && v[i*n] == 0.0) continue; 258 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 259 } else { 260 if (ignorezeroentries && v[i] == 0.0) continue; 261 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 262 } 263 } 264 } 265 } 266 PetscFunctionReturn(0); 267 } 268 269 #undef __FUNCT__ 270 #define __FUNCT__ "MatGetValues_MPIAIJ" 271 int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 272 { 273 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 274 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 275 int cstart = aij->cstart,cend = aij->cend,row,col; 276 277 PetscFunctionBegin; 278 for (i=0; i<m; i++) { 279 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 280 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 281 if (idxm[i] >= rstart && idxm[i] < rend) { 282 row = idxm[i] - rstart; 283 for (j=0; j<n; j++) { 284 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 285 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 286 if (idxn[j] >= cstart && idxn[j] < cend){ 287 col = idxn[j] - cstart; 288 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 289 } else { 290 if (!aij->colmap) { 291 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 292 } 293 #if defined (PETSC_USE_CTABLE) 294 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 295 col --; 296 #else 297 col = aij->colmap[idxn[j]] - 1; 298 #endif 299 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 300 else { 301 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 302 } 303 } 304 } 305 } else { 306 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 307 } 308 } 309 PetscFunctionReturn(0); 310 } 311 312 #undef __FUNCT__ 313 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 314 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 315 { 316 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 317 int ierr,nstash,reallocs; 318 InsertMode addv; 319 320 PetscFunctionBegin; 321 if (aij->donotstash) { 322 PetscFunctionReturn(0); 323 } 324 325 /* make sure all processors are either in INSERTMODE or ADDMODE */ 326 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 327 if (addv == (ADD_VALUES|INSERT_VALUES)) { 328 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 329 } 330 mat->insertmode = addv; /* in case this processor had no cache */ 331 332 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 333 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 334 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 335 PetscFunctionReturn(0); 336 } 337 338 339 #undef __FUNCT__ 340 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 341 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 342 { 343 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 344 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 345 int i,j,rstart,ncols,n,ierr,flg; 346 int *row,*col,other_disassembled; 347 PetscScalar *val; 348 InsertMode addv = mat->insertmode; 349 350 PetscFunctionBegin; 351 if (!aij->donotstash) { 352 while (1) { 353 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 354 if (!flg) break; 355 356 for (i=0; i<n;) { 357 /* Now identify the consecutive vals belonging to the same row */ 358 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 359 if (j < n) ncols = j-i; 360 else ncols = n-i; 361 /* Now assemble all these values with a single function call */ 362 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 363 i = j; 364 } 365 } 366 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 367 } 368 369 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 370 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 371 372 /* determine if any processor has disassembled, if so we must 373 also disassemble ourselfs, in order that we may reassemble. */ 374 /* 375 if nonzero structure of submatrix B cannot change then we know that 376 no processor disassembled thus we can skip this stuff 377 */ 378 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 379 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 380 if (mat->was_assembled && !other_disassembled) { 381 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 382 } 383 } 384 385 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 386 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 387 } 388 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 389 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 390 391 if (aij->rowvalues) { 392 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 393 aij->rowvalues = 0; 394 } 395 396 /* used by MatAXPY() */ 397 a->xtoy = 0; b->xtoy = 0; 398 a->XtoY = 0; b->XtoY = 0; 399 400 PetscFunctionReturn(0); 401 } 402 403 #undef __FUNCT__ 404 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 405 int MatZeroEntries_MPIAIJ(Mat A) 406 { 407 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 408 int ierr; 409 410 PetscFunctionBegin; 411 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 412 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 413 PetscFunctionReturn(0); 414 } 415 416 #undef __FUNCT__ 417 #define __FUNCT__ "MatZeroRows_MPIAIJ" 418 int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 419 { 420 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 421 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 422 int *nprocs,j,idx,nsends,row; 423 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 424 int *rvalues,tag = A->tag,count,base,slen,n,*source; 425 int *lens,imdex,*lrows,*values,rstart=l->rstart; 426 MPI_Comm comm = A->comm; 427 MPI_Request *send_waits,*recv_waits; 428 MPI_Status recv_status,*send_status; 429 IS istmp; 430 PetscTruth found; 431 432 PetscFunctionBegin; 433 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 434 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 435 436 /* first count number of contributors to each processor */ 437 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 438 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 439 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 440 for (i=0; i<N; i++) { 441 idx = rows[i]; 442 found = PETSC_FALSE; 443 for (j=0; j<size; j++) { 444 if (idx >= owners[j] && idx < owners[j+1]) { 445 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 446 } 447 } 448 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 449 } 450 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 451 452 /* inform other processors of number of messages and max length*/ 453 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 454 455 /* post receives: */ 456 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 457 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 458 for (i=0; i<nrecvs; i++) { 459 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 460 } 461 462 /* do sends: 463 1) starts[i] gives the starting index in svalues for stuff going to 464 the ith processor 465 */ 466 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 467 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 468 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 469 starts[0] = 0; 470 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 471 for (i=0; i<N; i++) { 472 svalues[starts[owner[i]]++] = rows[i]; 473 } 474 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 475 476 starts[0] = 0; 477 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 478 count = 0; 479 for (i=0; i<size; i++) { 480 if (nprocs[2*i+1]) { 481 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 482 } 483 } 484 ierr = PetscFree(starts);CHKERRQ(ierr); 485 486 base = owners[rank]; 487 488 /* wait on receives */ 489 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 490 source = lens + nrecvs; 491 count = nrecvs; slen = 0; 492 while (count) { 493 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 494 /* unpack receives into our local space */ 495 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 496 source[imdex] = recv_status.MPI_SOURCE; 497 lens[imdex] = n; 498 slen += n; 499 count--; 500 } 501 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 502 503 /* move the data into the send scatter */ 504 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 505 count = 0; 506 for (i=0; i<nrecvs; i++) { 507 values = rvalues + i*nmax; 508 for (j=0; j<lens[i]; j++) { 509 lrows[count++] = values[j] - base; 510 } 511 } 512 ierr = PetscFree(rvalues);CHKERRQ(ierr); 513 ierr = PetscFree(lens);CHKERRQ(ierr); 514 ierr = PetscFree(owner);CHKERRQ(ierr); 515 ierr = PetscFree(nprocs);CHKERRQ(ierr); 516 517 /* actually zap the local rows */ 518 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 519 PetscLogObjectParent(A,istmp); 520 521 /* 522 Zero the required rows. If the "diagonal block" of the matrix 523 is square and the user wishes to set the diagonal we use seperate 524 code so that MatSetValues() is not called for each diagonal allocating 525 new memory, thus calling lots of mallocs and slowing things down. 526 527 Contributed by: Mathew Knepley 528 */ 529 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 530 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 531 if (diag && (l->A->M == l->A->N)) { 532 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 533 } else if (diag) { 534 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 535 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 536 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 537 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 538 } 539 for (i = 0; i < slen; i++) { 540 row = lrows[i] + rstart; 541 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 542 } 543 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 544 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 545 } else { 546 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 547 } 548 ierr = ISDestroy(istmp);CHKERRQ(ierr); 549 ierr = PetscFree(lrows);CHKERRQ(ierr); 550 551 /* wait on sends */ 552 if (nsends) { 553 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 554 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 555 ierr = PetscFree(send_status);CHKERRQ(ierr); 556 } 557 ierr = PetscFree(send_waits);CHKERRQ(ierr); 558 ierr = PetscFree(svalues);CHKERRQ(ierr); 559 560 PetscFunctionReturn(0); 561 } 562 563 #undef __FUNCT__ 564 #define __FUNCT__ "MatMult_MPIAIJ" 565 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 566 { 567 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 568 int ierr,nt; 569 570 PetscFunctionBegin; 571 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 572 if (nt != A->n) { 573 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 574 } 575 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 576 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 577 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 578 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatMultAdd_MPIAIJ" 584 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 585 { 586 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 587 int ierr; 588 589 PetscFunctionBegin; 590 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 591 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 592 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 593 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 594 PetscFunctionReturn(0); 595 } 596 597 #undef __FUNCT__ 598 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 599 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 600 { 601 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 602 int ierr; 603 604 PetscFunctionBegin; 605 /* do nondiagonal part */ 606 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 607 /* send it on its way */ 608 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 609 /* do local part */ 610 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 611 /* receive remote parts: note this assumes the values are not actually */ 612 /* inserted in yy until the next line, which is true for my implementation*/ 613 /* but is not perhaps always true. */ 614 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 615 PetscFunctionReturn(0); 616 } 617 618 EXTERN_C_BEGIN 619 #undef __FUNCT__ 620 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 621 int MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f) 622 { 623 MPI_Comm comm; 624 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 625 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 626 IS Me,Notme; 627 int M,N,first,last,*notme,ntids,i, ierr; 628 629 PetscFunctionBegin; 630 631 /* Easy test: symmetric diagonal block */ 632 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 633 ierr = MatIsTranspose(Adia,Bdia,f);CHKERRQ(ierr); 634 if (!*f) PetscFunctionReturn(0); 635 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 636 ierr = MPI_Comm_size(comm,&ntids);CHKERRQ(ierr); 637 if (ntids==1) PetscFunctionReturn(0); 638 639 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 640 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 641 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 642 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me);CHKERRQ(ierr); 643 for (i=0; i<first; i++) notme[i] = i; 644 for (i=last; i<M; i++) notme[i-last+first] = i; 645 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 646 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 647 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 648 Aoff = Aoffs[0]; 649 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 650 Boff = Boffs[0]; 651 ierr = MatIsTranspose(Aoff,Boff,f);CHKERRQ(ierr); 652 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 653 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 654 ierr = ISDestroy(Me);CHKERRQ(ierr); 655 ierr = ISDestroy(Notme);CHKERRQ(ierr); 656 657 PetscFunctionReturn(0); 658 } 659 EXTERN_C_END 660 661 #undef __FUNCT__ 662 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 663 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 664 { 665 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 666 int ierr; 667 668 PetscFunctionBegin; 669 /* do nondiagonal part */ 670 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 671 /* send it on its way */ 672 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 673 /* do local part */ 674 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 675 /* receive remote parts: note this assumes the values are not actually */ 676 /* inserted in yy until the next line, which is true for my implementation*/ 677 /* but is not perhaps always true. */ 678 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 679 PetscFunctionReturn(0); 680 } 681 682 /* 683 This only works correctly for square matrices where the subblock A->A is the 684 diagonal block 685 */ 686 #undef __FUNCT__ 687 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 688 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 689 { 690 int ierr; 691 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 692 693 PetscFunctionBegin; 694 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 695 if (a->rstart != a->cstart || a->rend != a->cend) { 696 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 697 } 698 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 699 PetscFunctionReturn(0); 700 } 701 702 #undef __FUNCT__ 703 #define __FUNCT__ "MatScale_MPIAIJ" 704 int MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 705 { 706 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 707 int ierr; 708 709 PetscFunctionBegin; 710 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 711 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 712 PetscFunctionReturn(0); 713 } 714 715 #undef __FUNCT__ 716 #define __FUNCT__ "MatDestroy_MPIAIJ" 717 int MatDestroy_MPIAIJ(Mat mat) 718 { 719 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 720 int ierr; 721 722 PetscFunctionBegin; 723 #if defined(PETSC_USE_LOG) 724 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 725 #endif 726 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 727 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 728 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 729 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 730 #if defined (PETSC_USE_CTABLE) 731 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 732 #else 733 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 734 #endif 735 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 736 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 737 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 738 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 739 ierr = PetscFree(aij);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatView_MPIAIJ_Binary" 745 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 746 { 747 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 748 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 749 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 750 int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 751 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 752 PetscScalar *column_values; 753 754 PetscFunctionBegin; 755 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 756 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 757 nz = A->nz + B->nz; 758 if (rank == 0) { 759 header[0] = MAT_FILE_COOKIE; 760 header[1] = mat->M; 761 header[2] = mat->N; 762 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 763 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 764 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 765 /* get largest number of rows any processor has */ 766 rlen = mat->m; 767 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 768 for (i=1; i<size; i++) { 769 rlen = PetscMax(rlen,range[i+1] - range[i]); 770 } 771 } else { 772 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 773 rlen = mat->m; 774 } 775 776 /* load up the local row counts */ 777 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 778 for (i=0; i<mat->m; i++) { 779 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 780 } 781 782 /* store the row lengths to the file */ 783 if (rank == 0) { 784 MPI_Status status; 785 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 786 for (i=1; i<size; i++) { 787 rlen = range[i+1] - range[i]; 788 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 789 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 790 } 791 } else { 792 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 793 } 794 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 795 796 /* load up the local column indices */ 797 nzmax = nz; /* )th processor needs space a largest processor needs */ 798 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 799 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 800 cnt = 0; 801 for (i=0; i<mat->m; i++) { 802 for (j=B->i[i]; j<B->i[i+1]; j++) { 803 if ( (col = garray[B->j[j]]) > cstart) break; 804 column_indices[cnt++] = col; 805 } 806 for (k=A->i[i]; k<A->i[i+1]; k++) { 807 column_indices[cnt++] = A->j[k] + cstart; 808 } 809 for (; j<B->i[i+1]; j++) { 810 column_indices[cnt++] = garray[B->j[j]]; 811 } 812 } 813 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 814 815 /* store the column indices to the file */ 816 if (rank == 0) { 817 MPI_Status status; 818 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 819 for (i=1; i<size; i++) { 820 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 821 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 822 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 823 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 824 } 825 } else { 826 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 827 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 828 } 829 ierr = PetscFree(column_indices);CHKERRQ(ierr); 830 831 /* load up the local column values */ 832 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 833 cnt = 0; 834 for (i=0; i<mat->m; i++) { 835 for (j=B->i[i]; j<B->i[i+1]; j++) { 836 if ( garray[B->j[j]] > cstart) break; 837 column_values[cnt++] = B->a[j]; 838 } 839 for (k=A->i[i]; k<A->i[i+1]; k++) { 840 column_values[cnt++] = A->a[k]; 841 } 842 for (; j<B->i[i+1]; j++) { 843 column_values[cnt++] = B->a[j]; 844 } 845 } 846 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 847 848 /* store the column values to the file */ 849 if (rank == 0) { 850 MPI_Status status; 851 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 852 for (i=1; i<size; i++) { 853 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 854 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 855 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 856 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 857 } 858 } else { 859 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 860 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 861 } 862 ierr = PetscFree(column_values);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 868 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 869 { 870 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 871 int ierr,rank = aij->rank,size = aij->size; 872 PetscTruth isdraw,isascii,flg,isbinary; 873 PetscViewer sviewer; 874 PetscViewerFormat format; 875 876 PetscFunctionBegin; 877 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 878 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 880 if (isascii) { 881 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 882 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 883 MatInfo info; 884 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 885 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 886 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 887 if (flg) { 888 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 889 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 890 } else { 891 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 892 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 893 } 894 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 895 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 896 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 897 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 898 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 899 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 900 PetscFunctionReturn(0); 901 } else if (format == PETSC_VIEWER_ASCII_INFO) { 902 PetscFunctionReturn(0); 903 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 904 PetscFunctionReturn(0); 905 } 906 } else if (isbinary) { 907 if (size == 1) { 908 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 909 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 910 } else { 911 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 912 } 913 PetscFunctionReturn(0); 914 } else if (isdraw) { 915 PetscDraw draw; 916 PetscTruth isnull; 917 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 918 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 919 } 920 921 if (size == 1) { 922 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 923 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 924 } else { 925 /* assemble the entire matrix onto first processor. */ 926 Mat A; 927 Mat_SeqAIJ *Aloc; 928 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 929 PetscScalar *a; 930 931 if (!rank) { 932 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 933 } else { 934 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 935 } 936 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 937 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 938 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 939 PetscLogObjectParent(mat,A); 940 941 /* copy over the A part */ 942 Aloc = (Mat_SeqAIJ*)aij->A->data; 943 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 944 row = aij->rstart; 945 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 946 for (i=0; i<m; i++) { 947 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 948 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 949 } 950 aj = Aloc->j; 951 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 952 953 /* copy over the B part */ 954 Aloc = (Mat_SeqAIJ*)aij->B->data; 955 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 956 row = aij->rstart; 957 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 958 ct = cols; 959 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 960 for (i=0; i<m; i++) { 961 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 962 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 963 } 964 ierr = PetscFree(ct);CHKERRQ(ierr); 965 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 966 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 967 /* 968 Everyone has to call to draw the matrix since the graphics waits are 969 synchronized across all processors that share the PetscDraw object 970 */ 971 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 972 if (!rank) { 973 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 974 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 975 } 976 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 977 ierr = MatDestroy(A);CHKERRQ(ierr); 978 } 979 PetscFunctionReturn(0); 980 } 981 982 #undef __FUNCT__ 983 #define __FUNCT__ "MatView_MPIAIJ" 984 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 985 { 986 int ierr; 987 PetscTruth isascii,isdraw,issocket,isbinary; 988 989 PetscFunctionBegin; 990 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 991 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 992 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 993 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 994 if (isascii || isdraw || isbinary || issocket) { 995 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 996 } else { 997 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 998 } 999 PetscFunctionReturn(0); 1000 } 1001 1002 1003 1004 #undef __FUNCT__ 1005 #define __FUNCT__ "MatRelax_MPIAIJ" 1006 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1007 { 1008 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1009 int ierr; 1010 Vec bb1; 1011 PetscScalar mone=-1.0; 1012 1013 PetscFunctionBegin; 1014 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1015 1016 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1017 1018 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1019 if (flag & SOR_ZERO_INITIAL_GUESS) { 1020 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1021 its--; 1022 } 1023 1024 while (its--) { 1025 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1026 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1027 1028 /* update rhs: bb1 = bb - B*x */ 1029 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1030 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1031 1032 /* local sweep */ 1033 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1034 CHKERRQ(ierr); 1035 } 1036 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1037 if (flag & SOR_ZERO_INITIAL_GUESS) { 1038 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1039 its--; 1040 } 1041 while (its--) { 1042 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1043 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1044 1045 /* update rhs: bb1 = bb - B*x */ 1046 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1047 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1048 1049 /* local sweep */ 1050 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1051 CHKERRQ(ierr); 1052 } 1053 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1054 if (flag & SOR_ZERO_INITIAL_GUESS) { 1055 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1056 its--; 1057 } 1058 while (its--) { 1059 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1060 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1061 1062 /* update rhs: bb1 = bb - B*x */ 1063 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1064 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1065 1066 /* local sweep */ 1067 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1068 CHKERRQ(ierr); 1069 } 1070 } else { 1071 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1072 } 1073 1074 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1080 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1081 { 1082 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1083 Mat A = mat->A,B = mat->B; 1084 int ierr; 1085 PetscReal isend[5],irecv[5]; 1086 1087 PetscFunctionBegin; 1088 info->block_size = 1.0; 1089 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1090 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1091 isend[3] = info->memory; isend[4] = info->mallocs; 1092 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1093 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1094 isend[3] += info->memory; isend[4] += info->mallocs; 1095 if (flag == MAT_LOCAL) { 1096 info->nz_used = isend[0]; 1097 info->nz_allocated = isend[1]; 1098 info->nz_unneeded = isend[2]; 1099 info->memory = isend[3]; 1100 info->mallocs = isend[4]; 1101 } else if (flag == MAT_GLOBAL_MAX) { 1102 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1103 info->nz_used = irecv[0]; 1104 info->nz_allocated = irecv[1]; 1105 info->nz_unneeded = irecv[2]; 1106 info->memory = irecv[3]; 1107 info->mallocs = irecv[4]; 1108 } else if (flag == MAT_GLOBAL_SUM) { 1109 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1110 info->nz_used = irecv[0]; 1111 info->nz_allocated = irecv[1]; 1112 info->nz_unneeded = irecv[2]; 1113 info->memory = irecv[3]; 1114 info->mallocs = irecv[4]; 1115 } 1116 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1117 info->fill_ratio_needed = 0; 1118 info->factor_mallocs = 0; 1119 info->rows_global = (double)matin->M; 1120 info->columns_global = (double)matin->N; 1121 info->rows_local = (double)matin->m; 1122 info->columns_local = (double)matin->N; 1123 1124 PetscFunctionReturn(0); 1125 } 1126 1127 #undef __FUNCT__ 1128 #define __FUNCT__ "MatSetOption_MPIAIJ" 1129 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1130 { 1131 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1132 int ierr; 1133 1134 PetscFunctionBegin; 1135 switch (op) { 1136 case MAT_NO_NEW_NONZERO_LOCATIONS: 1137 case MAT_YES_NEW_NONZERO_LOCATIONS: 1138 case MAT_COLUMNS_UNSORTED: 1139 case MAT_COLUMNS_SORTED: 1140 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1141 case MAT_KEEP_ZEROED_ROWS: 1142 case MAT_NEW_NONZERO_LOCATION_ERR: 1143 case MAT_USE_INODES: 1144 case MAT_DO_NOT_USE_INODES: 1145 case MAT_IGNORE_ZERO_ENTRIES: 1146 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1147 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1148 break; 1149 case MAT_ROW_ORIENTED: 1150 a->roworiented = PETSC_TRUE; 1151 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1152 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1153 break; 1154 case MAT_ROWS_SORTED: 1155 case MAT_ROWS_UNSORTED: 1156 case MAT_YES_NEW_DIAGONALS: 1157 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1158 break; 1159 case MAT_COLUMN_ORIENTED: 1160 a->roworiented = PETSC_FALSE; 1161 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1162 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1163 break; 1164 case MAT_IGNORE_OFF_PROC_ENTRIES: 1165 a->donotstash = PETSC_TRUE; 1166 break; 1167 case MAT_NO_NEW_DIAGONALS: 1168 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1169 case MAT_SYMMETRIC: 1170 case MAT_STRUCTURALLY_SYMMETRIC: 1171 case MAT_HERMITIAN: 1172 case MAT_SYMMETRY_ETERNAL: 1173 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1174 break; 1175 case MAT_NOT_SYMMETRIC: 1176 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1177 case MAT_NOT_HERMITIAN: 1178 case MAT_NOT_SYMMETRY_ETERNAL: 1179 break; 1180 default: 1181 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1182 } 1183 PetscFunctionReturn(0); 1184 } 1185 1186 #undef __FUNCT__ 1187 #define __FUNCT__ "MatGetRow_MPIAIJ" 1188 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1189 { 1190 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1191 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1192 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1193 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1194 int *cmap,*idx_p; 1195 1196 PetscFunctionBegin; 1197 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1198 mat->getrowactive = PETSC_TRUE; 1199 1200 if (!mat->rowvalues && (idx || v)) { 1201 /* 1202 allocate enough space to hold information from the longest row. 1203 */ 1204 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1205 int max = 1,tmp; 1206 for (i=0; i<matin->m; i++) { 1207 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1208 if (max < tmp) { max = tmp; } 1209 } 1210 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1211 mat->rowindices = (int*)(mat->rowvalues + max); 1212 } 1213 1214 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1215 lrow = row - rstart; 1216 1217 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1218 if (!v) {pvA = 0; pvB = 0;} 1219 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1220 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1221 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1222 nztot = nzA + nzB; 1223 1224 cmap = mat->garray; 1225 if (v || idx) { 1226 if (nztot) { 1227 /* Sort by increasing column numbers, assuming A and B already sorted */ 1228 int imark = -1; 1229 if (v) { 1230 *v = v_p = mat->rowvalues; 1231 for (i=0; i<nzB; i++) { 1232 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1233 else break; 1234 } 1235 imark = i; 1236 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1237 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1238 } 1239 if (idx) { 1240 *idx = idx_p = mat->rowindices; 1241 if (imark > -1) { 1242 for (i=0; i<imark; i++) { 1243 idx_p[i] = cmap[cworkB[i]]; 1244 } 1245 } else { 1246 for (i=0; i<nzB; i++) { 1247 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1248 else break; 1249 } 1250 imark = i; 1251 } 1252 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1253 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1254 } 1255 } else { 1256 if (idx) *idx = 0; 1257 if (v) *v = 0; 1258 } 1259 } 1260 *nz = nztot; 1261 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1262 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1263 PetscFunctionReturn(0); 1264 } 1265 1266 #undef __FUNCT__ 1267 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1268 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1269 { 1270 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1271 1272 PetscFunctionBegin; 1273 if (aij->getrowactive == PETSC_FALSE) { 1274 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1275 } 1276 aij->getrowactive = PETSC_FALSE; 1277 PetscFunctionReturn(0); 1278 } 1279 1280 #undef __FUNCT__ 1281 #define __FUNCT__ "MatNorm_MPIAIJ" 1282 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1283 { 1284 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1285 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1286 int ierr,i,j,cstart = aij->cstart; 1287 PetscReal sum = 0.0; 1288 PetscScalar *v; 1289 1290 PetscFunctionBegin; 1291 if (aij->size == 1) { 1292 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1293 } else { 1294 if (type == NORM_FROBENIUS) { 1295 v = amat->a; 1296 for (i=0; i<amat->nz; i++) { 1297 #if defined(PETSC_USE_COMPLEX) 1298 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1299 #else 1300 sum += (*v)*(*v); v++; 1301 #endif 1302 } 1303 v = bmat->a; 1304 for (i=0; i<bmat->nz; i++) { 1305 #if defined(PETSC_USE_COMPLEX) 1306 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1307 #else 1308 sum += (*v)*(*v); v++; 1309 #endif 1310 } 1311 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1312 *norm = sqrt(*norm); 1313 } else if (type == NORM_1) { /* max column norm */ 1314 PetscReal *tmp,*tmp2; 1315 int *jj,*garray = aij->garray; 1316 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1317 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1318 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1319 *norm = 0.0; 1320 v = amat->a; jj = amat->j; 1321 for (j=0; j<amat->nz; j++) { 1322 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1323 } 1324 v = bmat->a; jj = bmat->j; 1325 for (j=0; j<bmat->nz; j++) { 1326 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1327 } 1328 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1329 for (j=0; j<mat->N; j++) { 1330 if (tmp2[j] > *norm) *norm = tmp2[j]; 1331 } 1332 ierr = PetscFree(tmp);CHKERRQ(ierr); 1333 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1334 } else if (type == NORM_INFINITY) { /* max row norm */ 1335 PetscReal ntemp = 0.0; 1336 for (j=0; j<aij->A->m; j++) { 1337 v = amat->a + amat->i[j]; 1338 sum = 0.0; 1339 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1340 sum += PetscAbsScalar(*v); v++; 1341 } 1342 v = bmat->a + bmat->i[j]; 1343 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1344 sum += PetscAbsScalar(*v); v++; 1345 } 1346 if (sum > ntemp) ntemp = sum; 1347 } 1348 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1349 } else { 1350 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1351 } 1352 } 1353 PetscFunctionReturn(0); 1354 } 1355 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "MatTranspose_MPIAIJ" 1358 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1359 { 1360 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1361 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1362 int ierr; 1363 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1364 Mat B; 1365 PetscScalar *array; 1366 1367 PetscFunctionBegin; 1368 if (!matout && M != N) { 1369 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1370 } 1371 1372 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1373 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1374 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1375 1376 /* copy over the A part */ 1377 Aloc = (Mat_SeqAIJ*)a->A->data; 1378 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1379 row = a->rstart; 1380 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1381 for (i=0; i<m; i++) { 1382 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1383 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1384 } 1385 aj = Aloc->j; 1386 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1387 1388 /* copy over the B part */ 1389 Aloc = (Mat_SeqAIJ*)a->B->data; 1390 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1391 row = a->rstart; 1392 ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr); 1393 ct = cols; 1394 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1395 for (i=0; i<m; i++) { 1396 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1397 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1398 } 1399 ierr = PetscFree(ct);CHKERRQ(ierr); 1400 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1401 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1402 if (matout) { 1403 *matout = B; 1404 } else { 1405 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1406 } 1407 PetscFunctionReturn(0); 1408 } 1409 1410 #undef __FUNCT__ 1411 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1412 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1413 { 1414 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1415 Mat a = aij->A,b = aij->B; 1416 int ierr,s1,s2,s3; 1417 1418 PetscFunctionBegin; 1419 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1420 if (rr) { 1421 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1422 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1423 /* Overlap communication with computation. */ 1424 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1425 } 1426 if (ll) { 1427 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1428 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1429 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1430 } 1431 /* scale the diagonal block */ 1432 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1433 1434 if (rr) { 1435 /* Do a scatter end and then right scale the off-diagonal block */ 1436 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1437 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1438 } 1439 1440 PetscFunctionReturn(0); 1441 } 1442 1443 1444 #undef __FUNCT__ 1445 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1446 int MatPrintHelp_MPIAIJ(Mat A) 1447 { 1448 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1449 int ierr; 1450 1451 PetscFunctionBegin; 1452 if (!a->rank) { 1453 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1454 } 1455 PetscFunctionReturn(0); 1456 } 1457 1458 #undef __FUNCT__ 1459 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1460 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1461 { 1462 PetscFunctionBegin; 1463 *bs = 1; 1464 PetscFunctionReturn(0); 1465 } 1466 #undef __FUNCT__ 1467 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1468 int MatSetUnfactored_MPIAIJ(Mat A) 1469 { 1470 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1471 int ierr; 1472 1473 PetscFunctionBegin; 1474 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1475 PetscFunctionReturn(0); 1476 } 1477 1478 #undef __FUNCT__ 1479 #define __FUNCT__ "MatEqual_MPIAIJ" 1480 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1481 { 1482 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1483 Mat a,b,c,d; 1484 PetscTruth flg; 1485 int ierr; 1486 1487 PetscFunctionBegin; 1488 a = matA->A; b = matA->B; 1489 c = matB->A; d = matB->B; 1490 1491 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1492 if (flg == PETSC_TRUE) { 1493 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1494 } 1495 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1496 PetscFunctionReturn(0); 1497 } 1498 1499 #undef __FUNCT__ 1500 #define __FUNCT__ "MatCopy_MPIAIJ" 1501 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1502 { 1503 int ierr; 1504 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1505 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1506 1507 PetscFunctionBegin; 1508 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1509 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1510 /* because of the column compression in the off-processor part of the matrix a->B, 1511 the number of columns in a->B and b->B may be different, hence we cannot call 1512 the MatCopy() directly on the two parts. If need be, we can provide a more 1513 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1514 then copying the submatrices */ 1515 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1516 } else { 1517 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1518 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1519 } 1520 PetscFunctionReturn(0); 1521 } 1522 1523 #undef __FUNCT__ 1524 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1525 int MatSetUpPreallocation_MPIAIJ(Mat A) 1526 { 1527 int ierr; 1528 1529 PetscFunctionBegin; 1530 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1531 PetscFunctionReturn(0); 1532 } 1533 1534 #include "petscblaslapack.h" 1535 #undef __FUNCT__ 1536 #define __FUNCT__ "MatAXPY_MPIAIJ" 1537 int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1538 { 1539 int ierr,one=1,i; 1540 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1541 Mat_SeqAIJ *x,*y; 1542 1543 PetscFunctionBegin; 1544 if (str == SAME_NONZERO_PATTERN) { 1545 x = (Mat_SeqAIJ *)xx->A->data; 1546 y = (Mat_SeqAIJ *)yy->A->data; 1547 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1548 x = (Mat_SeqAIJ *)xx->B->data; 1549 y = (Mat_SeqAIJ *)yy->B->data; 1550 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1551 } else if (str == SUBSET_NONZERO_PATTERN) { 1552 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1553 1554 x = (Mat_SeqAIJ *)xx->B->data; 1555 y = (Mat_SeqAIJ *)yy->B->data; 1556 if (y->xtoy && y->XtoY != xx->B) { 1557 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1558 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1559 } 1560 if (!y->xtoy) { /* get xtoy */ 1561 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1562 y->XtoY = xx->B; 1563 } 1564 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1565 } else { 1566 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1567 } 1568 PetscFunctionReturn(0); 1569 } 1570 1571 /* -------------------------------------------------------------------*/ 1572 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1573 MatGetRow_MPIAIJ, 1574 MatRestoreRow_MPIAIJ, 1575 MatMult_MPIAIJ, 1576 /* 4*/ MatMultAdd_MPIAIJ, 1577 MatMultTranspose_MPIAIJ, 1578 MatMultTransposeAdd_MPIAIJ, 1579 0, 1580 0, 1581 0, 1582 /*10*/ 0, 1583 0, 1584 0, 1585 MatRelax_MPIAIJ, 1586 MatTranspose_MPIAIJ, 1587 /*15*/ MatGetInfo_MPIAIJ, 1588 MatEqual_MPIAIJ, 1589 MatGetDiagonal_MPIAIJ, 1590 MatDiagonalScale_MPIAIJ, 1591 MatNorm_MPIAIJ, 1592 /*20*/ MatAssemblyBegin_MPIAIJ, 1593 MatAssemblyEnd_MPIAIJ, 1594 0, 1595 MatSetOption_MPIAIJ, 1596 MatZeroEntries_MPIAIJ, 1597 /*25*/ MatZeroRows_MPIAIJ, 1598 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1599 MatLUFactorSymbolic_MPIAIJ_TFS, 1600 #else 1601 0, 1602 #endif 1603 0, 1604 0, 1605 0, 1606 /*30*/ MatSetUpPreallocation_MPIAIJ, 1607 0, 1608 0, 1609 0, 1610 0, 1611 /*35*/ MatDuplicate_MPIAIJ, 1612 0, 1613 0, 1614 0, 1615 0, 1616 /*40*/ MatAXPY_MPIAIJ, 1617 MatGetSubMatrices_MPIAIJ, 1618 MatIncreaseOverlap_MPIAIJ, 1619 MatGetValues_MPIAIJ, 1620 MatCopy_MPIAIJ, 1621 /*45*/ MatPrintHelp_MPIAIJ, 1622 MatScale_MPIAIJ, 1623 0, 1624 0, 1625 0, 1626 /*50*/ MatGetBlockSize_MPIAIJ, 1627 0, 1628 0, 1629 0, 1630 0, 1631 /*55*/ MatFDColoringCreate_MPIAIJ, 1632 0, 1633 MatSetUnfactored_MPIAIJ, 1634 0, 1635 0, 1636 /*60*/ MatGetSubMatrix_MPIAIJ, 1637 MatDestroy_MPIAIJ, 1638 MatView_MPIAIJ, 1639 MatGetPetscMaps_Petsc, 1640 0, 1641 /*65*/ 0, 1642 0, 1643 0, 1644 0, 1645 0, 1646 /*70*/ 0, 1647 0, 1648 MatSetColoring_MPIAIJ, 1649 MatSetValuesAdic_MPIAIJ, 1650 MatSetValuesAdifor_MPIAIJ, 1651 /*75*/ 0, 1652 0, 1653 0, 1654 0, 1655 0, 1656 /*80*/ 0, 1657 0, 1658 0, 1659 0, 1660 /*85*/ MatLoad_MPIAIJ}; 1661 1662 /* ----------------------------------------------------------------------------------------*/ 1663 1664 EXTERN_C_BEGIN 1665 #undef __FUNCT__ 1666 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1667 int MatStoreValues_MPIAIJ(Mat mat) 1668 { 1669 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1670 int ierr; 1671 1672 PetscFunctionBegin; 1673 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1674 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1675 PetscFunctionReturn(0); 1676 } 1677 EXTERN_C_END 1678 1679 EXTERN_C_BEGIN 1680 #undef __FUNCT__ 1681 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1682 int MatRetrieveValues_MPIAIJ(Mat mat) 1683 { 1684 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1685 int ierr; 1686 1687 PetscFunctionBegin; 1688 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1689 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1690 PetscFunctionReturn(0); 1691 } 1692 EXTERN_C_END 1693 1694 #include "petscpc.h" 1695 EXTERN_C_BEGIN 1696 #undef __FUNCT__ 1697 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1698 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1699 { 1700 Mat_MPIAIJ *b; 1701 int ierr,i; 1702 1703 PetscFunctionBegin; 1704 B->preallocated = PETSC_TRUE; 1705 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1706 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1707 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1708 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1709 if (d_nnz) { 1710 for (i=0; i<B->m; i++) { 1711 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1712 } 1713 } 1714 if (o_nnz) { 1715 for (i=0; i<B->m; i++) { 1716 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1717 } 1718 } 1719 b = (Mat_MPIAIJ*)B->data; 1720 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1721 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1722 1723 PetscFunctionReturn(0); 1724 } 1725 EXTERN_C_END 1726 1727 /*MC 1728 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 1729 1730 Options Database Keys: 1731 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 1732 1733 Level: beginner 1734 1735 .seealso: MatCreateMPIAIJ 1736 M*/ 1737 1738 EXTERN_C_BEGIN 1739 #undef __FUNCT__ 1740 #define __FUNCT__ "MatCreate_MPIAIJ" 1741 int MatCreate_MPIAIJ(Mat B) 1742 { 1743 Mat_MPIAIJ *b; 1744 int ierr,i,size; 1745 1746 PetscFunctionBegin; 1747 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1748 1749 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1750 B->data = (void*)b; 1751 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1752 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1753 B->factor = 0; 1754 B->assembled = PETSC_FALSE; 1755 B->mapping = 0; 1756 1757 B->insertmode = NOT_SET_VALUES; 1758 b->size = size; 1759 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1760 1761 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1762 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1763 1764 /* the information in the maps duplicates the information computed below, eventually 1765 we should remove the duplicate information that is not contained in the maps */ 1766 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1767 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1768 1769 /* build local table of row and column ownerships */ 1770 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1771 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1772 b->cowners = b->rowners + b->size + 2; 1773 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1774 b->rowners[0] = 0; 1775 for (i=2; i<=b->size; i++) { 1776 b->rowners[i] += b->rowners[i-1]; 1777 } 1778 b->rstart = b->rowners[b->rank]; 1779 b->rend = b->rowners[b->rank+1]; 1780 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1781 b->cowners[0] = 0; 1782 for (i=2; i<=b->size; i++) { 1783 b->cowners[i] += b->cowners[i-1]; 1784 } 1785 b->cstart = b->cowners[b->rank]; 1786 b->cend = b->cowners[b->rank+1]; 1787 1788 /* build cache for off array entries formed */ 1789 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1790 b->donotstash = PETSC_FALSE; 1791 b->colmap = 0; 1792 b->garray = 0; 1793 b->roworiented = PETSC_TRUE; 1794 1795 /* stuff used for matrix vector multiply */ 1796 b->lvec = PETSC_NULL; 1797 b->Mvctx = PETSC_NULL; 1798 1799 /* stuff for MatGetRow() */ 1800 b->rowindices = 0; 1801 b->rowvalues = 0; 1802 b->getrowactive = PETSC_FALSE; 1803 1804 /* Explicitly create 2 MATSEQAIJ matrices. */ 1805 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 1806 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 1807 PetscLogObjectParent(B,b->A); 1808 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 1809 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 1810 PetscLogObjectParent(B,b->B); 1811 1812 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1813 "MatStoreValues_MPIAIJ", 1814 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1815 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1816 "MatRetrieveValues_MPIAIJ", 1817 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1818 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1819 "MatGetDiagonalBlock_MPIAIJ", 1820 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1821 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 1822 "MatIsTranspose_MPIAIJ", 1823 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 1824 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 1825 "MatMPIAIJSetPreallocation_MPIAIJ", 1826 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 1827 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 1828 "MatDiagonalScaleLocal_MPIAIJ", 1829 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 1830 PetscFunctionReturn(0); 1831 } 1832 EXTERN_C_END 1833 1834 /*MC 1835 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1836 1837 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1838 and MATMPIAIJ otherwise. As a result, for single process communicators, 1839 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 1840 for communicators controlling multiple processes. It is recommended that you call both of 1841 the above preallocation routines for simplicity. 1842 1843 Options Database Keys: 1844 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 1845 1846 Level: beginner 1847 1848 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ 1849 M*/ 1850 1851 EXTERN_C_BEGIN 1852 #undef __FUNCT__ 1853 #define __FUNCT__ "MatCreate_AIJ" 1854 int MatCreate_AIJ(Mat A) { 1855 int ierr,size; 1856 1857 PetscFunctionBegin; 1858 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr); 1859 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1860 if (size == 1) { 1861 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1862 } else { 1863 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1864 } 1865 PetscFunctionReturn(0); 1866 } 1867 EXTERN_C_END 1868 1869 #undef __FUNCT__ 1870 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1871 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1872 { 1873 Mat mat; 1874 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1875 int ierr; 1876 1877 PetscFunctionBegin; 1878 *newmat = 0; 1879 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1880 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1881 a = (Mat_MPIAIJ*)mat->data; 1882 1883 mat->factor = matin->factor; 1884 mat->assembled = PETSC_TRUE; 1885 mat->insertmode = NOT_SET_VALUES; 1886 mat->preallocated = PETSC_TRUE; 1887 1888 a->rstart = oldmat->rstart; 1889 a->rend = oldmat->rend; 1890 a->cstart = oldmat->cstart; 1891 a->cend = oldmat->cend; 1892 a->size = oldmat->size; 1893 a->rank = oldmat->rank; 1894 a->donotstash = oldmat->donotstash; 1895 a->roworiented = oldmat->roworiented; 1896 a->rowindices = 0; 1897 a->rowvalues = 0; 1898 a->getrowactive = PETSC_FALSE; 1899 1900 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1901 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1902 if (oldmat->colmap) { 1903 #if defined (PETSC_USE_CTABLE) 1904 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1905 #else 1906 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1907 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1908 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1909 #endif 1910 } else a->colmap = 0; 1911 if (oldmat->garray) { 1912 int len; 1913 len = oldmat->B->n; 1914 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1915 PetscLogObjectMemory(mat,len*sizeof(int)); 1916 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1917 } else a->garray = 0; 1918 1919 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1920 PetscLogObjectParent(mat,a->lvec); 1921 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1922 PetscLogObjectParent(mat,a->Mvctx); 1923 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1924 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1925 PetscLogObjectParent(mat,a->A); 1926 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1927 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1928 PetscLogObjectParent(mat,a->B); 1929 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1930 *newmat = mat; 1931 PetscFunctionReturn(0); 1932 } 1933 1934 #include "petscsys.h" 1935 1936 #undef __FUNCT__ 1937 #define __FUNCT__ "MatLoad_MPIAIJ" 1938 int MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1939 { 1940 Mat A; 1941 PetscScalar *vals,*svals; 1942 MPI_Comm comm = ((PetscObject)viewer)->comm; 1943 MPI_Status status; 1944 int i,nz,ierr,j,rstart,rend,fd; 1945 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1946 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1947 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1948 1949 PetscFunctionBegin; 1950 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1951 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1952 if (!rank) { 1953 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1954 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1955 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1956 if (header[3] < 0) { 1957 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1958 } 1959 } 1960 1961 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1962 M = header[1]; N = header[2]; 1963 /* determine ownership of all rows */ 1964 m = M/size + ((M % size) > rank); 1965 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1966 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1967 rowners[0] = 0; 1968 for (i=2; i<=size; i++) { 1969 rowners[i] += rowners[i-1]; 1970 } 1971 rstart = rowners[rank]; 1972 rend = rowners[rank+1]; 1973 1974 /* distribute row lengths to all processors */ 1975 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1976 offlens = ourlens + (rend-rstart); 1977 if (!rank) { 1978 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1979 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1980 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1981 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1982 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1983 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1984 } else { 1985 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1986 } 1987 1988 if (!rank) { 1989 /* calculate the number of nonzeros on each processor */ 1990 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1991 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1992 for (i=0; i<size; i++) { 1993 for (j=rowners[i]; j< rowners[i+1]; j++) { 1994 procsnz[i] += rowlengths[j]; 1995 } 1996 } 1997 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1998 1999 /* determine max buffer needed and allocate it */ 2000 maxnz = 0; 2001 for (i=0; i<size; i++) { 2002 maxnz = PetscMax(maxnz,procsnz[i]); 2003 } 2004 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2005 2006 /* read in my part of the matrix column indices */ 2007 nz = procsnz[0]; 2008 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 2009 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2010 2011 /* read in every one elses and ship off */ 2012 for (i=1; i<size; i++) { 2013 nz = procsnz[i]; 2014 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2015 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2016 } 2017 ierr = PetscFree(cols);CHKERRQ(ierr); 2018 } else { 2019 /* determine buffer space needed for message */ 2020 nz = 0; 2021 for (i=0; i<m; i++) { 2022 nz += ourlens[i]; 2023 } 2024 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 2025 2026 /* receive message of column indices*/ 2027 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2028 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2029 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2030 } 2031 2032 /* determine column ownership if matrix is not square */ 2033 if (N != M) { 2034 n = N/size + ((N % size) > rank); 2035 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2036 cstart = cend - n; 2037 } else { 2038 cstart = rstart; 2039 cend = rend; 2040 n = cend - cstart; 2041 } 2042 2043 /* loop over local rows, determining number of off diagonal entries */ 2044 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2045 jj = 0; 2046 for (i=0; i<m; i++) { 2047 for (j=0; j<ourlens[i]; j++) { 2048 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2049 jj++; 2050 } 2051 } 2052 2053 /* create our matrix */ 2054 for (i=0; i<m; i++) { 2055 ourlens[i] -= offlens[i]; 2056 } 2057 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2058 ierr = MatSetType(A,type);CHKERRQ(ierr); 2059 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2060 2061 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2062 for (i=0; i<m; i++) { 2063 ourlens[i] += offlens[i]; 2064 } 2065 2066 if (!rank) { 2067 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2068 2069 /* read in my part of the matrix numerical values */ 2070 nz = procsnz[0]; 2071 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2072 2073 /* insert into matrix */ 2074 jj = rstart; 2075 smycols = mycols; 2076 svals = vals; 2077 for (i=0; i<m; i++) { 2078 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2079 smycols += ourlens[i]; 2080 svals += ourlens[i]; 2081 jj++; 2082 } 2083 2084 /* read in other processors and ship out */ 2085 for (i=1; i<size; i++) { 2086 nz = procsnz[i]; 2087 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2088 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2089 } 2090 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2091 } else { 2092 /* receive numeric values */ 2093 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2094 2095 /* receive message of values*/ 2096 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2097 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2098 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2099 2100 /* insert into matrix */ 2101 jj = rstart; 2102 smycols = mycols; 2103 svals = vals; 2104 for (i=0; i<m; i++) { 2105 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2106 smycols += ourlens[i]; 2107 svals += ourlens[i]; 2108 jj++; 2109 } 2110 } 2111 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2112 ierr = PetscFree(vals);CHKERRQ(ierr); 2113 ierr = PetscFree(mycols);CHKERRQ(ierr); 2114 ierr = PetscFree(rowners);CHKERRQ(ierr); 2115 2116 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2117 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2118 *newmat = A; 2119 PetscFunctionReturn(0); 2120 } 2121 2122 #undef __FUNCT__ 2123 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2124 /* 2125 Not great since it makes two copies of the submatrix, first an SeqAIJ 2126 in local and then by concatenating the local matrices the end result. 2127 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2128 */ 2129 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2130 { 2131 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2132 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2133 Mat *local,M,Mreuse; 2134 PetscScalar *vwork,*aa; 2135 MPI_Comm comm = mat->comm; 2136 Mat_SeqAIJ *aij; 2137 2138 2139 PetscFunctionBegin; 2140 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2141 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2142 2143 if (call == MAT_REUSE_MATRIX) { 2144 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2145 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2146 local = &Mreuse; 2147 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2148 } else { 2149 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2150 Mreuse = *local; 2151 ierr = PetscFree(local);CHKERRQ(ierr); 2152 } 2153 2154 /* 2155 m - number of local rows 2156 n - number of columns (same on all processors) 2157 rstart - first row in new global matrix generated 2158 */ 2159 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2160 if (call == MAT_INITIAL_MATRIX) { 2161 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2162 ii = aij->i; 2163 jj = aij->j; 2164 2165 /* 2166 Determine the number of non-zeros in the diagonal and off-diagonal 2167 portions of the matrix in order to do correct preallocation 2168 */ 2169 2170 /* first get start and end of "diagonal" columns */ 2171 if (csize == PETSC_DECIDE) { 2172 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2173 if (mglobal == n) { /* square matrix */ 2174 nlocal = m; 2175 } else { 2176 nlocal = n/size + ((n % size) > rank); 2177 } 2178 } else { 2179 nlocal = csize; 2180 } 2181 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2182 rstart = rend - nlocal; 2183 if (rank == size - 1 && rend != n) { 2184 SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n); 2185 } 2186 2187 /* next, compute all the lengths */ 2188 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2189 olens = dlens + m; 2190 for (i=0; i<m; i++) { 2191 jend = ii[i+1] - ii[i]; 2192 olen = 0; 2193 dlen = 0; 2194 for (j=0; j<jend; j++) { 2195 if (*jj < rstart || *jj >= rend) olen++; 2196 else dlen++; 2197 jj++; 2198 } 2199 olens[i] = olen; 2200 dlens[i] = dlen; 2201 } 2202 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2203 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2204 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2205 ierr = PetscFree(dlens);CHKERRQ(ierr); 2206 } else { 2207 int ml,nl; 2208 2209 M = *newmat; 2210 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2211 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2212 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2213 /* 2214 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2215 rather than the slower MatSetValues(). 2216 */ 2217 M->was_assembled = PETSC_TRUE; 2218 M->assembled = PETSC_FALSE; 2219 } 2220 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2221 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2222 ii = aij->i; 2223 jj = aij->j; 2224 aa = aij->a; 2225 for (i=0; i<m; i++) { 2226 row = rstart + i; 2227 nz = ii[i+1] - ii[i]; 2228 cwork = jj; jj += nz; 2229 vwork = aa; aa += nz; 2230 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2231 } 2232 2233 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2234 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2235 *newmat = M; 2236 2237 /* save submatrix used in processor for next request */ 2238 if (call == MAT_INITIAL_MATRIX) { 2239 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2240 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2241 } 2242 2243 PetscFunctionReturn(0); 2244 } 2245 2246 #undef __FUNCT__ 2247 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2248 /*@C 2249 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2250 (the default parallel PETSc format). For good matrix assembly performance 2251 the user should preallocate the matrix storage by setting the parameters 2252 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2253 performance can be increased by more than a factor of 50. 2254 2255 Collective on MPI_Comm 2256 2257 Input Parameters: 2258 + A - the matrix 2259 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2260 (same value is used for all local rows) 2261 . d_nnz - array containing the number of nonzeros in the various rows of the 2262 DIAGONAL portion of the local submatrix (possibly different for each row) 2263 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2264 The size of this array is equal to the number of local rows, i.e 'm'. 2265 You must leave room for the diagonal entry even if it is zero. 2266 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2267 submatrix (same value is used for all local rows). 2268 - o_nnz - array containing the number of nonzeros in the various rows of the 2269 OFF-DIAGONAL portion of the local submatrix (possibly different for 2270 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2271 structure. The size of this array is equal to the number 2272 of local rows, i.e 'm'. 2273 2274 The AIJ format (also called the Yale sparse matrix format or 2275 compressed row storage), is fully compatible with standard Fortran 77 2276 storage. That is, the stored row and column indices can begin at 2277 either one (as in Fortran) or zero. See the users manual for details. 2278 2279 The user MUST specify either the local or global matrix dimensions 2280 (possibly both). 2281 2282 The parallel matrix is partitioned such that the first m0 rows belong to 2283 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2284 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2285 2286 The DIAGONAL portion of the local submatrix of a processor can be defined 2287 as the submatrix which is obtained by extraction the part corresponding 2288 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2289 first row that belongs to the processor, and r2 is the last row belonging 2290 to the this processor. This is a square mxm matrix. The remaining portion 2291 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2292 2293 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2294 2295 By default, this format uses inodes (identical nodes) when possible. 2296 We search for consecutive rows with the same nonzero structure, thereby 2297 reusing matrix information to achieve increased efficiency. 2298 2299 Options Database Keys: 2300 + -mat_aij_no_inode - Do not use inodes 2301 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2302 - -mat_aij_oneindex - Internally use indexing starting at 1 2303 rather than 0. Note that when calling MatSetValues(), 2304 the user still MUST index entries starting at 0! 2305 2306 Example usage: 2307 2308 Consider the following 8x8 matrix with 34 non-zero values, that is 2309 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2310 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2311 as follows: 2312 2313 .vb 2314 1 2 0 | 0 3 0 | 0 4 2315 Proc0 0 5 6 | 7 0 0 | 8 0 2316 9 0 10 | 11 0 0 | 12 0 2317 ------------------------------------- 2318 13 0 14 | 15 16 17 | 0 0 2319 Proc1 0 18 0 | 19 20 21 | 0 0 2320 0 0 0 | 22 23 0 | 24 0 2321 ------------------------------------- 2322 Proc2 25 26 27 | 0 0 28 | 29 0 2323 30 0 0 | 31 32 33 | 0 34 2324 .ve 2325 2326 This can be represented as a collection of submatrices as: 2327 2328 .vb 2329 A B C 2330 D E F 2331 G H I 2332 .ve 2333 2334 Where the submatrices A,B,C are owned by proc0, D,E,F are 2335 owned by proc1, G,H,I are owned by proc2. 2336 2337 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2338 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2339 The 'M','N' parameters are 8,8, and have the same values on all procs. 2340 2341 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2342 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2343 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2344 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2345 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2346 matrix, ans [DF] as another SeqAIJ matrix. 2347 2348 When d_nz, o_nz parameters are specified, d_nz storage elements are 2349 allocated for every row of the local diagonal submatrix, and o_nz 2350 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2351 One way to choose d_nz and o_nz is to use the max nonzerors per local 2352 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2353 In this case, the values of d_nz,o_nz are: 2354 .vb 2355 proc0 : dnz = 2, o_nz = 2 2356 proc1 : dnz = 3, o_nz = 2 2357 proc2 : dnz = 1, o_nz = 4 2358 .ve 2359 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2360 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2361 for proc3. i.e we are using 12+15+10=37 storage locations to store 2362 34 values. 2363 2364 When d_nnz, o_nnz parameters are specified, the storage is specified 2365 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2366 In the above case the values for d_nnz,o_nnz are: 2367 .vb 2368 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2369 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2370 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2371 .ve 2372 Here the space allocated is sum of all the above values i.e 34, and 2373 hence pre-allocation is perfect. 2374 2375 Level: intermediate 2376 2377 .keywords: matrix, aij, compressed row, sparse, parallel 2378 2379 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2380 @*/ 2381 int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2382 { 2383 int ierr,(*f)(Mat,int,const int[],int,const int[]); 2384 2385 PetscFunctionBegin; 2386 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2387 if (f) { 2388 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2389 } 2390 PetscFunctionReturn(0); 2391 } 2392 2393 #undef __FUNCT__ 2394 #define __FUNCT__ "MatCreateMPIAIJ" 2395 /*@C 2396 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2397 (the default parallel PETSc format). For good matrix assembly performance 2398 the user should preallocate the matrix storage by setting the parameters 2399 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2400 performance can be increased by more than a factor of 50. 2401 2402 Collective on MPI_Comm 2403 2404 Input Parameters: 2405 + comm - MPI communicator 2406 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2407 This value should be the same as the local size used in creating the 2408 y vector for the matrix-vector product y = Ax. 2409 . n - This value should be the same as the local size used in creating the 2410 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2411 calculated if N is given) For square matrices n is almost always m. 2412 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2413 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2414 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2415 (same value is used for all local rows) 2416 . d_nnz - array containing the number of nonzeros in the various rows of the 2417 DIAGONAL portion of the local submatrix (possibly different for each row) 2418 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2419 The size of this array is equal to the number of local rows, i.e 'm'. 2420 You must leave room for the diagonal entry even if it is zero. 2421 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2422 submatrix (same value is used for all local rows). 2423 - o_nnz - array containing the number of nonzeros in the various rows of the 2424 OFF-DIAGONAL portion of the local submatrix (possibly different for 2425 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2426 structure. The size of this array is equal to the number 2427 of local rows, i.e 'm'. 2428 2429 Output Parameter: 2430 . A - the matrix 2431 2432 Notes: 2433 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2434 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2435 storage requirements for this matrix. 2436 2437 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2438 processor than it must be used on all processors that share the object for 2439 that argument. 2440 2441 The AIJ format (also called the Yale sparse matrix format or 2442 compressed row storage), is fully compatible with standard Fortran 77 2443 storage. That is, the stored row and column indices can begin at 2444 either one (as in Fortran) or zero. See the users manual for details. 2445 2446 The user MUST specify either the local or global matrix dimensions 2447 (possibly both). 2448 2449 The parallel matrix is partitioned such that the first m0 rows belong to 2450 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2451 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2452 2453 The DIAGONAL portion of the local submatrix of a processor can be defined 2454 as the submatrix which is obtained by extraction the part corresponding 2455 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2456 first row that belongs to the processor, and r2 is the last row belonging 2457 to the this processor. This is a square mxm matrix. The remaining portion 2458 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2459 2460 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2461 2462 When calling this routine with a single process communicator, a matrix of 2463 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2464 type of communicator, use the construction mechanism: 2465 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2466 2467 By default, this format uses inodes (identical nodes) when possible. 2468 We search for consecutive rows with the same nonzero structure, thereby 2469 reusing matrix information to achieve increased efficiency. 2470 2471 Options Database Keys: 2472 + -mat_aij_no_inode - Do not use inodes 2473 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2474 - -mat_aij_oneindex - Internally use indexing starting at 1 2475 rather than 0. Note that when calling MatSetValues(), 2476 the user still MUST index entries starting at 0! 2477 2478 2479 Example usage: 2480 2481 Consider the following 8x8 matrix with 34 non-zero values, that is 2482 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2483 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2484 as follows: 2485 2486 .vb 2487 1 2 0 | 0 3 0 | 0 4 2488 Proc0 0 5 6 | 7 0 0 | 8 0 2489 9 0 10 | 11 0 0 | 12 0 2490 ------------------------------------- 2491 13 0 14 | 15 16 17 | 0 0 2492 Proc1 0 18 0 | 19 20 21 | 0 0 2493 0 0 0 | 22 23 0 | 24 0 2494 ------------------------------------- 2495 Proc2 25 26 27 | 0 0 28 | 29 0 2496 30 0 0 | 31 32 33 | 0 34 2497 .ve 2498 2499 This can be represented as a collection of submatrices as: 2500 2501 .vb 2502 A B C 2503 D E F 2504 G H I 2505 .ve 2506 2507 Where the submatrices A,B,C are owned by proc0, D,E,F are 2508 owned by proc1, G,H,I are owned by proc2. 2509 2510 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2511 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2512 The 'M','N' parameters are 8,8, and have the same values on all procs. 2513 2514 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2515 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2516 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2517 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2518 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2519 matrix, ans [DF] as another SeqAIJ matrix. 2520 2521 When d_nz, o_nz parameters are specified, d_nz storage elements are 2522 allocated for every row of the local diagonal submatrix, and o_nz 2523 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2524 One way to choose d_nz and o_nz is to use the max nonzerors per local 2525 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2526 In this case, the values of d_nz,o_nz are: 2527 .vb 2528 proc0 : dnz = 2, o_nz = 2 2529 proc1 : dnz = 3, o_nz = 2 2530 proc2 : dnz = 1, o_nz = 4 2531 .ve 2532 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2533 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2534 for proc3. i.e we are using 12+15+10=37 storage locations to store 2535 34 values. 2536 2537 When d_nnz, o_nnz parameters are specified, the storage is specified 2538 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2539 In the above case the values for d_nnz,o_nnz are: 2540 .vb 2541 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2542 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2543 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2544 .ve 2545 Here the space allocated is sum of all the above values i.e 34, and 2546 hence pre-allocation is perfect. 2547 2548 Level: intermediate 2549 2550 .keywords: matrix, aij, compressed row, sparse, parallel 2551 2552 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2553 @*/ 2554 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2555 { 2556 int ierr,size; 2557 2558 PetscFunctionBegin; 2559 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2560 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2561 if (size > 1) { 2562 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2563 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2564 } else { 2565 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2566 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2567 } 2568 PetscFunctionReturn(0); 2569 } 2570 2571 #undef __FUNCT__ 2572 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2573 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2574 { 2575 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2576 PetscFunctionBegin; 2577 *Ad = a->A; 2578 *Ao = a->B; 2579 *colmap = a->garray; 2580 PetscFunctionReturn(0); 2581 } 2582 2583 #undef __FUNCT__ 2584 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2585 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2586 { 2587 int ierr,i; 2588 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2589 2590 PetscFunctionBegin; 2591 if (coloring->ctype == IS_COLORING_LOCAL) { 2592 ISColoringValue *allcolors,*colors; 2593 ISColoring ocoloring; 2594 2595 /* set coloring for diagonal portion */ 2596 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2597 2598 /* set coloring for off-diagonal portion */ 2599 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2600 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2601 for (i=0; i<a->B->n; i++) { 2602 colors[i] = allcolors[a->garray[i]]; 2603 } 2604 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2605 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2606 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2607 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2608 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2609 ISColoringValue *colors; 2610 int *larray; 2611 ISColoring ocoloring; 2612 2613 /* set coloring for diagonal portion */ 2614 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2615 for (i=0; i<a->A->n; i++) { 2616 larray[i] = i + a->cstart; 2617 } 2618 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2619 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2620 for (i=0; i<a->A->n; i++) { 2621 colors[i] = coloring->colors[larray[i]]; 2622 } 2623 ierr = PetscFree(larray);CHKERRQ(ierr); 2624 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2625 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2626 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2627 2628 /* set coloring for off-diagonal portion */ 2629 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2630 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2631 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2632 for (i=0; i<a->B->n; i++) { 2633 colors[i] = coloring->colors[larray[i]]; 2634 } 2635 ierr = PetscFree(larray);CHKERRQ(ierr); 2636 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2637 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2638 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2639 } else { 2640 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2641 } 2642 2643 PetscFunctionReturn(0); 2644 } 2645 2646 #undef __FUNCT__ 2647 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2648 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2649 { 2650 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2651 int ierr; 2652 2653 PetscFunctionBegin; 2654 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2655 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2656 PetscFunctionReturn(0); 2657 } 2658 2659 #undef __FUNCT__ 2660 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2661 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2662 { 2663 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2664 int ierr; 2665 2666 PetscFunctionBegin; 2667 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2668 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2669 PetscFunctionReturn(0); 2670 } 2671 2672 #undef __FUNCT__ 2673 #define __FUNCT__ "MatMerge" 2674 /*@C 2675 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2676 matrices from each processor 2677 2678 Collective on MPI_Comm 2679 2680 Input Parameters: 2681 + comm - the communicators the parallel matrix will live on 2682 - inmat - the input sequential matrices 2683 2684 Output Parameter: 2685 . outmat - the parallel matrix generated 2686 2687 Level: advanced 2688 2689 Notes: The number of columns of the matrix in EACH of the seperate files 2690 MUST be the same. 2691 2692 @*/ 2693 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat) 2694 { 2695 int ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz; 2696 PetscScalar *values; 2697 PetscMap columnmap,rowmap; 2698 2699 PetscFunctionBegin; 2700 2701 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2702 2703 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2704 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2705 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2706 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2707 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2708 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2709 2710 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2711 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2712 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2713 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2714 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2715 2716 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2717 for (i=0;i<m;i++) { 2718 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2719 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2720 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2721 } 2722 /* This routine will ONLY return MPIAIJ type matrix */ 2723 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2724 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2725 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2726 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2727 2728 for (i=0;i<m;i++) { 2729 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2730 I = i + rstart; 2731 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2732 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2733 } 2734 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2735 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2736 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2737 2738 PetscFunctionReturn(0); 2739 } 2740 2741 #undef __FUNCT__ 2742 #define __FUNCT__ "MatFileSplit" 2743 int MatFileSplit(Mat A,char *outfile) 2744 { 2745 int ierr,rank,len,m,N,i,rstart,*indx,nnz; 2746 PetscViewer out; 2747 char *name; 2748 Mat B; 2749 PetscScalar *values; 2750 2751 PetscFunctionBegin; 2752 2753 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2754 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2755 /* Should this be the type of the diagonal block of A? */ 2756 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2757 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2758 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2759 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2760 for (i=0;i<m;i++) { 2761 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2762 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2763 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2764 } 2765 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2766 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2767 2768 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2769 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2770 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2771 sprintf(name,"%s.%d",outfile,rank); 2772 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2773 ierr = PetscFree(name); 2774 ierr = MatView(B,out);CHKERRQ(ierr); 2775 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2776 ierr = MatDestroy(B);CHKERRQ(ierr); 2777 PetscFunctionReturn(0); 2778 } 2779