1 2 #include "src/mat/impls/aij/mpi/mpiaij.h" 3 #include "src/inline/spops.h" 4 5 /* 6 Local utility routine that creates a mapping from the global column 7 number to the local number in the off-diagonal part of the local 8 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 9 a slightly higher hash table cost; without it it is not scalable (each processor 10 has an order N integer array but is fast to acess. 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 14 int CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 17 int n = aij->B->n,i,ierr; 18 19 PetscFunctionBegin; 20 #if defined (PETSC_USE_CTABLE) 21 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 22 for (i=0; i<n; i++){ 23 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 24 } 25 #else 26 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 27 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 28 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 29 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 30 #endif 31 PetscFunctionReturn(0); 32 } 33 34 #define CHUNKSIZE 15 35 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 36 { \ 37 \ 38 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 39 rmax = aimax[row]; nrow = ailen[row]; \ 40 col1 = col - shift; \ 41 \ 42 low = 0; high = nrow; \ 43 while (high-low > 5) { \ 44 t = (low+high)/2; \ 45 if (rp[t] > col) high = t; \ 46 else low = t; \ 47 } \ 48 for (_i=low; _i<high; _i++) { \ 49 if (rp[_i] > col1) break; \ 50 if (rp[_i] == col1) { \ 51 if (addv == ADD_VALUES) ap[_i] += value; \ 52 else ap[_i] = value; \ 53 goto a_noinsert; \ 54 } \ 55 } \ 56 if (nonew == 1) goto a_noinsert; \ 57 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 58 if (nrow >= rmax) { \ 59 /* there is no extra room in row, therefore enlarge */ \ 60 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 61 PetscScalar *new_a; \ 62 \ 63 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 64 \ 65 /* malloc new storage space */ \ 66 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 67 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 68 new_j = (int*)(new_a + new_nz); \ 69 new_i = new_j + new_nz; \ 70 \ 71 /* copy over old data into new slots */ \ 72 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 73 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 74 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 75 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 76 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 77 len*sizeof(int));CHKERRQ(ierr); \ 78 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 79 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 80 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 81 /* free up old matrix storage */ \ 82 \ 83 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 84 if (!a->singlemalloc) { \ 85 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 86 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 87 } \ 88 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 89 a->singlemalloc = PETSC_TRUE; \ 90 \ 91 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 92 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 93 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 94 a->maxnz += CHUNKSIZE; \ 95 a->reallocs++; \ 96 } \ 97 N = nrow++ - 1; a->nz++; \ 98 /* shift up all the later entries in this row */ \ 99 for (ii=N; ii>=_i; ii--) { \ 100 rp[ii+1] = rp[ii]; \ 101 ap[ii+1] = ap[ii]; \ 102 } \ 103 rp[_i] = col1; \ 104 ap[_i] = value; \ 105 a_noinsert: ; \ 106 ailen[row] = nrow; \ 107 } 108 109 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 110 { \ 111 \ 112 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 113 rmax = bimax[row]; nrow = bilen[row]; \ 114 col1 = col - shift; \ 115 \ 116 low = 0; high = nrow; \ 117 while (high-low > 5) { \ 118 t = (low+high)/2; \ 119 if (rp[t] > col) high = t; \ 120 else low = t; \ 121 } \ 122 for (_i=low; _i<high; _i++) { \ 123 if (rp[_i] > col1) break; \ 124 if (rp[_i] == col1) { \ 125 if (addv == ADD_VALUES) ap[_i] += value; \ 126 else ap[_i] = value; \ 127 goto b_noinsert; \ 128 } \ 129 } \ 130 if (nonew == 1) goto b_noinsert; \ 131 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 132 if (nrow >= rmax) { \ 133 /* there is no extra room in row, therefore enlarge */ \ 134 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 135 PetscScalar *new_a; \ 136 \ 137 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 138 \ 139 /* malloc new storage space */ \ 140 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 141 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 142 new_j = (int*)(new_a + new_nz); \ 143 new_i = new_j + new_nz; \ 144 \ 145 /* copy over old data into new slots */ \ 146 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 147 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 148 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 149 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 150 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 151 len*sizeof(int));CHKERRQ(ierr); \ 152 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 153 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 154 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 155 /* free up old matrix storage */ \ 156 \ 157 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 158 if (!b->singlemalloc) { \ 159 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 160 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 161 } \ 162 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 163 b->singlemalloc = PETSC_TRUE; \ 164 \ 165 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 166 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 167 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 168 b->maxnz += CHUNKSIZE; \ 169 b->reallocs++; \ 170 } \ 171 N = nrow++ - 1; b->nz++; \ 172 /* shift up all the later entries in this row */ \ 173 for (ii=N; ii>=_i; ii--) { \ 174 rp[ii+1] = rp[ii]; \ 175 ap[ii+1] = ap[ii]; \ 176 } \ 177 rp[_i] = col1; \ 178 ap[_i] = value; \ 179 b_noinsert: ; \ 180 bilen[row] = nrow; \ 181 } 182 183 #undef __FUNCT__ 184 #define __FUNCT__ "MatSetValues_MPIAIJ" 185 int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscScalar value; 189 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 190 int cstart = aij->cstart,cend = aij->cend,row,col; 191 PetscTruth roworiented = aij->roworiented; 192 193 /* Some Variables required in the macro */ 194 Mat A = aij->A; 195 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 196 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 197 PetscScalar *aa = a->a; 198 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 199 Mat B = aij->B; 200 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 201 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 202 PetscScalar *ba = b->a; 203 204 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 205 int nonew = a->nonew,shift=0; 206 PetscScalar *ap; 207 208 PetscFunctionBegin; 209 for (i=0; i<m; i++) { 210 if (im[i] < 0) continue; 211 #if defined(PETSC_USE_BOPT_g) 212 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 213 #endif 214 if (im[i] >= rstart && im[i] < rend) { 215 row = im[i] - rstart; 216 for (j=0; j<n; j++) { 217 if (in[j] >= cstart && in[j] < cend){ 218 col = in[j] - cstart; 219 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 220 if (ignorezeroentries && value == 0.0) continue; 221 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 222 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 223 } else if (in[j] < 0) continue; 224 #if defined(PETSC_USE_BOPT_g) 225 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 226 #endif 227 else { 228 if (mat->was_assembled) { 229 if (!aij->colmap) { 230 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 231 } 232 #if defined (PETSC_USE_CTABLE) 233 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 234 col--; 235 #else 236 col = aij->colmap[in[j]] - 1; 237 #endif 238 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 239 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 240 col = in[j]; 241 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 242 B = aij->B; 243 b = (Mat_SeqAIJ*)B->data; 244 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 245 ba = b->a; 246 } 247 } else col = in[j]; 248 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 249 if (ignorezeroentries && value == 0.0) continue; 250 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 251 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 252 } 253 } 254 } else { 255 if (!aij->donotstash) { 256 if (roworiented) { 257 if (ignorezeroentries && v[i*n] == 0.0) continue; 258 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 259 } else { 260 if (ignorezeroentries && v[i] == 0.0) continue; 261 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 262 } 263 } 264 } 265 } 266 PetscFunctionReturn(0); 267 } 268 269 #undef __FUNCT__ 270 #define __FUNCT__ "MatGetValues_MPIAIJ" 271 int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 272 { 273 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 274 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 275 int cstart = aij->cstart,cend = aij->cend,row,col; 276 277 PetscFunctionBegin; 278 for (i=0; i<m; i++) { 279 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 280 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 281 if (idxm[i] >= rstart && idxm[i] < rend) { 282 row = idxm[i] - rstart; 283 for (j=0; j<n; j++) { 284 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 285 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 286 if (idxn[j] >= cstart && idxn[j] < cend){ 287 col = idxn[j] - cstart; 288 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 289 } else { 290 if (!aij->colmap) { 291 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 292 } 293 #if defined (PETSC_USE_CTABLE) 294 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 295 col --; 296 #else 297 col = aij->colmap[idxn[j]] - 1; 298 #endif 299 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 300 else { 301 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 302 } 303 } 304 } 305 } else { 306 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 307 } 308 } 309 PetscFunctionReturn(0); 310 } 311 312 #undef __FUNCT__ 313 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 314 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 315 { 316 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 317 int ierr,nstash,reallocs; 318 InsertMode addv; 319 320 PetscFunctionBegin; 321 if (aij->donotstash) { 322 PetscFunctionReturn(0); 323 } 324 325 /* make sure all processors are either in INSERTMODE or ADDMODE */ 326 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 327 if (addv == (ADD_VALUES|INSERT_VALUES)) { 328 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 329 } 330 mat->insertmode = addv; /* in case this processor had no cache */ 331 332 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 333 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 334 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 335 PetscFunctionReturn(0); 336 } 337 338 339 #undef __FUNCT__ 340 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 341 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 342 { 343 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 344 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 345 int i,j,rstart,ncols,n,ierr,flg; 346 int *row,*col,other_disassembled; 347 PetscScalar *val; 348 InsertMode addv = mat->insertmode; 349 350 PetscFunctionBegin; 351 if (!aij->donotstash) { 352 while (1) { 353 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 354 if (!flg) break; 355 356 for (i=0; i<n;) { 357 /* Now identify the consecutive vals belonging to the same row */ 358 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 359 if (j < n) ncols = j-i; 360 else ncols = n-i; 361 /* Now assemble all these values with a single function call */ 362 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 363 i = j; 364 } 365 } 366 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 367 } 368 369 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 370 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 371 372 /* determine if any processor has disassembled, if so we must 373 also disassemble ourselfs, in order that we may reassemble. */ 374 /* 375 if nonzero structure of submatrix B cannot change then we know that 376 no processor disassembled thus we can skip this stuff 377 */ 378 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 379 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 380 if (mat->was_assembled && !other_disassembled) { 381 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 382 } 383 } 384 385 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 386 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 387 } 388 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 389 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 390 391 if (aij->rowvalues) { 392 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 393 aij->rowvalues = 0; 394 } 395 396 /* used by MatAXPY() */ 397 a->xtoy = 0; b->xtoy = 0; 398 a->XtoY = 0; b->XtoY = 0; 399 400 PetscFunctionReturn(0); 401 } 402 403 #undef __FUNCT__ 404 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 405 int MatZeroEntries_MPIAIJ(Mat A) 406 { 407 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 408 int ierr; 409 410 PetscFunctionBegin; 411 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 412 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 413 PetscFunctionReturn(0); 414 } 415 416 #undef __FUNCT__ 417 #define __FUNCT__ "MatZeroRows_MPIAIJ" 418 int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 419 { 420 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 421 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 422 int *nprocs,j,idx,nsends,row; 423 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 424 int *rvalues,tag = A->tag,count,base,slen,n,*source; 425 int *lens,imdex,*lrows,*values,rstart=l->rstart; 426 MPI_Comm comm = A->comm; 427 MPI_Request *send_waits,*recv_waits; 428 MPI_Status recv_status,*send_status; 429 IS istmp; 430 PetscTruth found; 431 432 PetscFunctionBegin; 433 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 434 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 435 436 /* first count number of contributors to each processor */ 437 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 438 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 439 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 440 for (i=0; i<N; i++) { 441 idx = rows[i]; 442 found = PETSC_FALSE; 443 for (j=0; j<size; j++) { 444 if (idx >= owners[j] && idx < owners[j+1]) { 445 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 446 } 447 } 448 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 449 } 450 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 451 452 /* inform other processors of number of messages and max length*/ 453 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 454 455 /* post receives: */ 456 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 457 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 458 for (i=0; i<nrecvs; i++) { 459 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 460 } 461 462 /* do sends: 463 1) starts[i] gives the starting index in svalues for stuff going to 464 the ith processor 465 */ 466 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 467 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 468 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 469 starts[0] = 0; 470 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 471 for (i=0; i<N; i++) { 472 svalues[starts[owner[i]]++] = rows[i]; 473 } 474 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 475 476 starts[0] = 0; 477 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 478 count = 0; 479 for (i=0; i<size; i++) { 480 if (nprocs[2*i+1]) { 481 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 482 } 483 } 484 ierr = PetscFree(starts);CHKERRQ(ierr); 485 486 base = owners[rank]; 487 488 /* wait on receives */ 489 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 490 source = lens + nrecvs; 491 count = nrecvs; slen = 0; 492 while (count) { 493 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 494 /* unpack receives into our local space */ 495 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 496 source[imdex] = recv_status.MPI_SOURCE; 497 lens[imdex] = n; 498 slen += n; 499 count--; 500 } 501 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 502 503 /* move the data into the send scatter */ 504 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 505 count = 0; 506 for (i=0; i<nrecvs; i++) { 507 values = rvalues + i*nmax; 508 for (j=0; j<lens[i]; j++) { 509 lrows[count++] = values[j] - base; 510 } 511 } 512 ierr = PetscFree(rvalues);CHKERRQ(ierr); 513 ierr = PetscFree(lens);CHKERRQ(ierr); 514 ierr = PetscFree(owner);CHKERRQ(ierr); 515 ierr = PetscFree(nprocs);CHKERRQ(ierr); 516 517 /* actually zap the local rows */ 518 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 519 PetscLogObjectParent(A,istmp); 520 521 /* 522 Zero the required rows. If the "diagonal block" of the matrix 523 is square and the user wishes to set the diagonal we use seperate 524 code so that MatSetValues() is not called for each diagonal allocating 525 new memory, thus calling lots of mallocs and slowing things down. 526 527 Contributed by: Mathew Knepley 528 */ 529 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 530 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 531 if (diag && (l->A->M == l->A->N)) { 532 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 533 } else if (diag) { 534 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 535 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 536 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 537 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 538 } 539 for (i = 0; i < slen; i++) { 540 row = lrows[i] + rstart; 541 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 542 } 543 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 544 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 545 } else { 546 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 547 } 548 ierr = ISDestroy(istmp);CHKERRQ(ierr); 549 ierr = PetscFree(lrows);CHKERRQ(ierr); 550 551 /* wait on sends */ 552 if (nsends) { 553 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 554 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 555 ierr = PetscFree(send_status);CHKERRQ(ierr); 556 } 557 ierr = PetscFree(send_waits);CHKERRQ(ierr); 558 ierr = PetscFree(svalues);CHKERRQ(ierr); 559 560 PetscFunctionReturn(0); 561 } 562 563 #undef __FUNCT__ 564 #define __FUNCT__ "MatMult_MPIAIJ" 565 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 566 { 567 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 568 int ierr,nt; 569 570 PetscFunctionBegin; 571 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 572 if (nt != A->n) { 573 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 574 } 575 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 576 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 577 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 578 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatMultAdd_MPIAIJ" 584 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 585 { 586 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 587 int ierr; 588 589 PetscFunctionBegin; 590 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 591 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 592 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 593 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 594 PetscFunctionReturn(0); 595 } 596 597 #undef __FUNCT__ 598 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 599 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 600 { 601 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 602 int ierr; 603 604 PetscFunctionBegin; 605 /* do nondiagonal part */ 606 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 607 /* send it on its way */ 608 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 609 /* do local part */ 610 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 611 /* receive remote parts: note this assumes the values are not actually */ 612 /* inserted in yy until the next line, which is true for my implementation*/ 613 /* but is not perhaps always true. */ 614 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 615 PetscFunctionReturn(0); 616 } 617 618 EXTERN_C_BEGIN 619 #undef __FUNCT__ 620 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 621 int MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 622 { 623 MPI_Comm comm; 624 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 625 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 626 IS Me,Notme; 627 int M,N,first,last,*notme,ntids,i, ierr; 628 629 PetscFunctionBegin; 630 631 /* Easy test: symmetric diagonal block */ 632 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 633 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 634 if (!*f) PetscFunctionReturn(0); 635 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 636 ierr = MPI_Comm_size(comm,&ntids);CHKERRQ(ierr); 637 if (ntids==1) PetscFunctionReturn(0); 638 639 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 640 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 641 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 642 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me);CHKERRQ(ierr); 643 for (i=0; i<first; i++) notme[i] = i; 644 for (i=last; i<M; i++) notme[i-last+first] = i; 645 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 646 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 647 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 648 Aoff = Aoffs[0]; 649 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 650 Boff = Boffs[0]; 651 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 652 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 653 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 654 ierr = ISDestroy(Me);CHKERRQ(ierr); 655 ierr = ISDestroy(Notme);CHKERRQ(ierr); 656 657 PetscFunctionReturn(0); 658 } 659 EXTERN_C_END 660 661 #undef __FUNCT__ 662 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 663 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 664 { 665 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 666 int ierr; 667 668 PetscFunctionBegin; 669 /* do nondiagonal part */ 670 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 671 /* send it on its way */ 672 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 673 /* do local part */ 674 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 675 /* receive remote parts: note this assumes the values are not actually */ 676 /* inserted in yy until the next line, which is true for my implementation*/ 677 /* but is not perhaps always true. */ 678 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 679 PetscFunctionReturn(0); 680 } 681 682 /* 683 This only works correctly for square matrices where the subblock A->A is the 684 diagonal block 685 */ 686 #undef __FUNCT__ 687 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 688 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 689 { 690 int ierr; 691 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 692 693 PetscFunctionBegin; 694 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 695 if (a->rstart != a->cstart || a->rend != a->cend) { 696 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 697 } 698 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 699 PetscFunctionReturn(0); 700 } 701 702 #undef __FUNCT__ 703 #define __FUNCT__ "MatScale_MPIAIJ" 704 int MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 705 { 706 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 707 int ierr; 708 709 PetscFunctionBegin; 710 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 711 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 712 PetscFunctionReturn(0); 713 } 714 715 #undef __FUNCT__ 716 #define __FUNCT__ "MatDestroy_MPIAIJ" 717 int MatDestroy_MPIAIJ(Mat mat) 718 { 719 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 720 int ierr; 721 722 PetscFunctionBegin; 723 #if defined(PETSC_USE_LOG) 724 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 725 #endif 726 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 727 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 728 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 729 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 730 #if defined (PETSC_USE_CTABLE) 731 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 732 #else 733 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 734 #endif 735 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 736 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 737 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 738 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 739 ierr = PetscFree(aij);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatView_MPIAIJ_Binary" 745 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 746 { 747 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 748 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 749 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 750 int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 751 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 752 PetscScalar *column_values; 753 754 PetscFunctionBegin; 755 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 756 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 757 nz = A->nz + B->nz; 758 if (!rank) { 759 header[0] = MAT_FILE_COOKIE; 760 header[1] = mat->M; 761 header[2] = mat->N; 762 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 763 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 764 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 765 /* get largest number of rows any processor has */ 766 rlen = mat->m; 767 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 768 for (i=1; i<size; i++) { 769 rlen = PetscMax(rlen,range[i+1] - range[i]); 770 } 771 } else { 772 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 773 rlen = mat->m; 774 } 775 776 /* load up the local row counts */ 777 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 778 for (i=0; i<mat->m; i++) { 779 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 780 } 781 782 /* store the row lengths to the file */ 783 if (!rank) { 784 MPI_Status status; 785 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 786 for (i=1; i<size; i++) { 787 rlen = range[i+1] - range[i]; 788 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 789 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 790 } 791 } else { 792 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 793 } 794 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 795 796 /* load up the local column indices */ 797 nzmax = nz; /* )th processor needs space a largest processor needs */ 798 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 799 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 800 cnt = 0; 801 for (i=0; i<mat->m; i++) { 802 for (j=B->i[i]; j<B->i[i+1]; j++) { 803 if ( (col = garray[B->j[j]]) > cstart) break; 804 column_indices[cnt++] = col; 805 } 806 for (k=A->i[i]; k<A->i[i+1]; k++) { 807 column_indices[cnt++] = A->j[k] + cstart; 808 } 809 for (; j<B->i[i+1]; j++) { 810 column_indices[cnt++] = garray[B->j[j]]; 811 } 812 } 813 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 814 815 /* store the column indices to the file */ 816 if (!rank) { 817 MPI_Status status; 818 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 819 for (i=1; i<size; i++) { 820 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 821 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 822 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 823 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 824 } 825 } else { 826 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 827 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 828 } 829 ierr = PetscFree(column_indices);CHKERRQ(ierr); 830 831 /* load up the local column values */ 832 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 833 cnt = 0; 834 for (i=0; i<mat->m; i++) { 835 for (j=B->i[i]; j<B->i[i+1]; j++) { 836 if ( garray[B->j[j]] > cstart) break; 837 column_values[cnt++] = B->a[j]; 838 } 839 for (k=A->i[i]; k<A->i[i+1]; k++) { 840 column_values[cnt++] = A->a[k]; 841 } 842 for (; j<B->i[i+1]; j++) { 843 column_values[cnt++] = B->a[j]; 844 } 845 } 846 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 847 848 /* store the column values to the file */ 849 if (!rank) { 850 MPI_Status status; 851 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 852 for (i=1; i<size; i++) { 853 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 854 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 855 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 856 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 857 } 858 } else { 859 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 860 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 861 } 862 ierr = PetscFree(column_values);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 868 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 869 { 870 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 871 int ierr,rank = aij->rank,size = aij->size; 872 PetscTruth isdraw,iascii,flg,isbinary; 873 PetscViewer sviewer; 874 PetscViewerFormat format; 875 876 PetscFunctionBegin; 877 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 878 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 880 if (iascii) { 881 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 882 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 883 MatInfo info; 884 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 885 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 886 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 887 if (flg) { 888 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 889 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 890 } else { 891 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 892 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 893 } 894 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 895 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 896 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 897 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 898 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 899 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 900 PetscFunctionReturn(0); 901 } else if (format == PETSC_VIEWER_ASCII_INFO) { 902 PetscFunctionReturn(0); 903 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 904 PetscFunctionReturn(0); 905 } 906 } else if (isbinary) { 907 if (size == 1) { 908 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 909 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 910 } else { 911 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 912 } 913 PetscFunctionReturn(0); 914 } else if (isdraw) { 915 PetscDraw draw; 916 PetscTruth isnull; 917 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 918 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 919 } 920 921 if (size == 1) { 922 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 923 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 924 } else { 925 /* assemble the entire matrix onto first processor. */ 926 Mat A; 927 Mat_SeqAIJ *Aloc; 928 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 929 PetscScalar *a; 930 931 if (!rank) { 932 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 933 } else { 934 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 935 } 936 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 937 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 938 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 939 PetscLogObjectParent(mat,A); 940 941 /* copy over the A part */ 942 Aloc = (Mat_SeqAIJ*)aij->A->data; 943 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 944 row = aij->rstart; 945 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 946 for (i=0; i<m; i++) { 947 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 948 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 949 } 950 aj = Aloc->j; 951 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 952 953 /* copy over the B part */ 954 Aloc = (Mat_SeqAIJ*)aij->B->data; 955 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 956 row = aij->rstart; 957 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 958 ct = cols; 959 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 960 for (i=0; i<m; i++) { 961 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 962 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 963 } 964 ierr = PetscFree(ct);CHKERRQ(ierr); 965 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 966 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 967 /* 968 Everyone has to call to draw the matrix since the graphics waits are 969 synchronized across all processors that share the PetscDraw object 970 */ 971 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 972 if (!rank) { 973 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 974 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 975 } 976 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 977 ierr = MatDestroy(A);CHKERRQ(ierr); 978 } 979 PetscFunctionReturn(0); 980 } 981 982 #undef __FUNCT__ 983 #define __FUNCT__ "MatView_MPIAIJ" 984 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 985 { 986 int ierr; 987 PetscTruth iascii,isdraw,issocket,isbinary; 988 989 PetscFunctionBegin; 990 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 991 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 992 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 993 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 994 if (iascii || isdraw || isbinary || issocket) { 995 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 996 } else { 997 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 998 } 999 PetscFunctionReturn(0); 1000 } 1001 1002 1003 1004 #undef __FUNCT__ 1005 #define __FUNCT__ "MatRelax_MPIAIJ" 1006 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1007 { 1008 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1009 int ierr; 1010 Vec bb1; 1011 PetscScalar mone=-1.0; 1012 1013 PetscFunctionBegin; 1014 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1015 1016 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1017 1018 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1019 if (flag & SOR_ZERO_INITIAL_GUESS) { 1020 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1021 its--; 1022 } 1023 1024 while (its--) { 1025 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1026 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1027 1028 /* update rhs: bb1 = bb - B*x */ 1029 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1030 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1031 1032 /* local sweep */ 1033 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1034 CHKERRQ(ierr); 1035 } 1036 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1037 if (flag & SOR_ZERO_INITIAL_GUESS) { 1038 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1039 its--; 1040 } 1041 while (its--) { 1042 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1043 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1044 1045 /* update rhs: bb1 = bb - B*x */ 1046 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1047 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1048 1049 /* local sweep */ 1050 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1051 CHKERRQ(ierr); 1052 } 1053 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1054 if (flag & SOR_ZERO_INITIAL_GUESS) { 1055 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1056 its--; 1057 } 1058 while (its--) { 1059 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1060 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1061 1062 /* update rhs: bb1 = bb - B*x */ 1063 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1064 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1065 1066 /* local sweep */ 1067 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1068 CHKERRQ(ierr); 1069 } 1070 } else { 1071 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1072 } 1073 1074 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1080 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1081 { 1082 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1083 Mat A = mat->A,B = mat->B; 1084 int ierr; 1085 PetscReal isend[5],irecv[5]; 1086 1087 PetscFunctionBegin; 1088 info->block_size = 1.0; 1089 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1090 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1091 isend[3] = info->memory; isend[4] = info->mallocs; 1092 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1093 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1094 isend[3] += info->memory; isend[4] += info->mallocs; 1095 if (flag == MAT_LOCAL) { 1096 info->nz_used = isend[0]; 1097 info->nz_allocated = isend[1]; 1098 info->nz_unneeded = isend[2]; 1099 info->memory = isend[3]; 1100 info->mallocs = isend[4]; 1101 } else if (flag == MAT_GLOBAL_MAX) { 1102 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1103 info->nz_used = irecv[0]; 1104 info->nz_allocated = irecv[1]; 1105 info->nz_unneeded = irecv[2]; 1106 info->memory = irecv[3]; 1107 info->mallocs = irecv[4]; 1108 } else if (flag == MAT_GLOBAL_SUM) { 1109 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1110 info->nz_used = irecv[0]; 1111 info->nz_allocated = irecv[1]; 1112 info->nz_unneeded = irecv[2]; 1113 info->memory = irecv[3]; 1114 info->mallocs = irecv[4]; 1115 } 1116 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1117 info->fill_ratio_needed = 0; 1118 info->factor_mallocs = 0; 1119 info->rows_global = (double)matin->M; 1120 info->columns_global = (double)matin->N; 1121 info->rows_local = (double)matin->m; 1122 info->columns_local = (double)matin->N; 1123 1124 PetscFunctionReturn(0); 1125 } 1126 1127 #undef __FUNCT__ 1128 #define __FUNCT__ "MatSetOption_MPIAIJ" 1129 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1130 { 1131 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1132 int ierr; 1133 1134 PetscFunctionBegin; 1135 switch (op) { 1136 case MAT_NO_NEW_NONZERO_LOCATIONS: 1137 case MAT_YES_NEW_NONZERO_LOCATIONS: 1138 case MAT_COLUMNS_UNSORTED: 1139 case MAT_COLUMNS_SORTED: 1140 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1141 case MAT_KEEP_ZEROED_ROWS: 1142 case MAT_NEW_NONZERO_LOCATION_ERR: 1143 case MAT_USE_INODES: 1144 case MAT_DO_NOT_USE_INODES: 1145 case MAT_IGNORE_ZERO_ENTRIES: 1146 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1147 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1148 break; 1149 case MAT_ROW_ORIENTED: 1150 a->roworiented = PETSC_TRUE; 1151 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1152 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1153 break; 1154 case MAT_ROWS_SORTED: 1155 case MAT_ROWS_UNSORTED: 1156 case MAT_YES_NEW_DIAGONALS: 1157 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1158 break; 1159 case MAT_COLUMN_ORIENTED: 1160 a->roworiented = PETSC_FALSE; 1161 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1162 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1163 break; 1164 case MAT_IGNORE_OFF_PROC_ENTRIES: 1165 a->donotstash = PETSC_TRUE; 1166 break; 1167 case MAT_NO_NEW_DIAGONALS: 1168 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1169 case MAT_SYMMETRIC: 1170 case MAT_STRUCTURALLY_SYMMETRIC: 1171 case MAT_HERMITIAN: 1172 case MAT_SYMMETRY_ETERNAL: 1173 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1174 break; 1175 case MAT_NOT_SYMMETRIC: 1176 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1177 case MAT_NOT_HERMITIAN: 1178 case MAT_NOT_SYMMETRY_ETERNAL: 1179 break; 1180 default: 1181 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1182 } 1183 PetscFunctionReturn(0); 1184 } 1185 1186 #undef __FUNCT__ 1187 #define __FUNCT__ "MatGetRow_MPIAIJ" 1188 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1189 { 1190 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1191 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1192 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1193 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1194 int *cmap,*idx_p; 1195 1196 PetscFunctionBegin; 1197 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1198 mat->getrowactive = PETSC_TRUE; 1199 1200 if (!mat->rowvalues && (idx || v)) { 1201 /* 1202 allocate enough space to hold information from the longest row. 1203 */ 1204 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1205 int max = 1,tmp; 1206 for (i=0; i<matin->m; i++) { 1207 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1208 if (max < tmp) { max = tmp; } 1209 } 1210 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1211 mat->rowindices = (int*)(mat->rowvalues + max); 1212 } 1213 1214 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1215 lrow = row - rstart; 1216 1217 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1218 if (!v) {pvA = 0; pvB = 0;} 1219 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1220 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1221 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1222 nztot = nzA + nzB; 1223 1224 cmap = mat->garray; 1225 if (v || idx) { 1226 if (nztot) { 1227 /* Sort by increasing column numbers, assuming A and B already sorted */ 1228 int imark = -1; 1229 if (v) { 1230 *v = v_p = mat->rowvalues; 1231 for (i=0; i<nzB; i++) { 1232 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1233 else break; 1234 } 1235 imark = i; 1236 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1237 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1238 } 1239 if (idx) { 1240 *idx = idx_p = mat->rowindices; 1241 if (imark > -1) { 1242 for (i=0; i<imark; i++) { 1243 idx_p[i] = cmap[cworkB[i]]; 1244 } 1245 } else { 1246 for (i=0; i<nzB; i++) { 1247 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1248 else break; 1249 } 1250 imark = i; 1251 } 1252 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1253 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1254 } 1255 } else { 1256 if (idx) *idx = 0; 1257 if (v) *v = 0; 1258 } 1259 } 1260 *nz = nztot; 1261 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1262 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1263 PetscFunctionReturn(0); 1264 } 1265 1266 #undef __FUNCT__ 1267 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1268 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1269 { 1270 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1271 1272 PetscFunctionBegin; 1273 if (aij->getrowactive == PETSC_FALSE) { 1274 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1275 } 1276 aij->getrowactive = PETSC_FALSE; 1277 PetscFunctionReturn(0); 1278 } 1279 1280 #undef __FUNCT__ 1281 #define __FUNCT__ "MatNorm_MPIAIJ" 1282 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1283 { 1284 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1285 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1286 int ierr,i,j,cstart = aij->cstart; 1287 PetscReal sum = 0.0; 1288 PetscScalar *v; 1289 1290 PetscFunctionBegin; 1291 if (aij->size == 1) { 1292 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1293 } else { 1294 if (type == NORM_FROBENIUS) { 1295 v = amat->a; 1296 for (i=0; i<amat->nz; i++) { 1297 #if defined(PETSC_USE_COMPLEX) 1298 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1299 #else 1300 sum += (*v)*(*v); v++; 1301 #endif 1302 } 1303 v = bmat->a; 1304 for (i=0; i<bmat->nz; i++) { 1305 #if defined(PETSC_USE_COMPLEX) 1306 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1307 #else 1308 sum += (*v)*(*v); v++; 1309 #endif 1310 } 1311 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1312 *norm = sqrt(*norm); 1313 } else if (type == NORM_1) { /* max column norm */ 1314 PetscReal *tmp,*tmp2; 1315 int *jj,*garray = aij->garray; 1316 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1317 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1318 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1319 *norm = 0.0; 1320 v = amat->a; jj = amat->j; 1321 for (j=0; j<amat->nz; j++) { 1322 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1323 } 1324 v = bmat->a; jj = bmat->j; 1325 for (j=0; j<bmat->nz; j++) { 1326 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1327 } 1328 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1329 for (j=0; j<mat->N; j++) { 1330 if (tmp2[j] > *norm) *norm = tmp2[j]; 1331 } 1332 ierr = PetscFree(tmp);CHKERRQ(ierr); 1333 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1334 } else if (type == NORM_INFINITY) { /* max row norm */ 1335 PetscReal ntemp = 0.0; 1336 for (j=0; j<aij->A->m; j++) { 1337 v = amat->a + amat->i[j]; 1338 sum = 0.0; 1339 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1340 sum += PetscAbsScalar(*v); v++; 1341 } 1342 v = bmat->a + bmat->i[j]; 1343 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1344 sum += PetscAbsScalar(*v); v++; 1345 } 1346 if (sum > ntemp) ntemp = sum; 1347 } 1348 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1349 } else { 1350 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1351 } 1352 } 1353 PetscFunctionReturn(0); 1354 } 1355 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "MatTranspose_MPIAIJ" 1358 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1359 { 1360 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1361 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1362 int ierr; 1363 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1364 Mat B; 1365 PetscScalar *array; 1366 1367 PetscFunctionBegin; 1368 if (!matout && M != N) { 1369 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1370 } 1371 1372 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1373 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1374 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1375 1376 /* copy over the A part */ 1377 Aloc = (Mat_SeqAIJ*)a->A->data; 1378 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1379 row = a->rstart; 1380 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1381 for (i=0; i<m; i++) { 1382 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1383 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1384 } 1385 aj = Aloc->j; 1386 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1387 1388 /* copy over the B part */ 1389 Aloc = (Mat_SeqAIJ*)a->B->data; 1390 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1391 row = a->rstart; 1392 ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr); 1393 ct = cols; 1394 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1395 for (i=0; i<m; i++) { 1396 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1397 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1398 } 1399 ierr = PetscFree(ct);CHKERRQ(ierr); 1400 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1401 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1402 if (matout) { 1403 *matout = B; 1404 } else { 1405 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1406 } 1407 PetscFunctionReturn(0); 1408 } 1409 1410 #undef __FUNCT__ 1411 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1412 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1413 { 1414 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1415 Mat a = aij->A,b = aij->B; 1416 int ierr,s1,s2,s3; 1417 1418 PetscFunctionBegin; 1419 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1420 if (rr) { 1421 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1422 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1423 /* Overlap communication with computation. */ 1424 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1425 } 1426 if (ll) { 1427 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1428 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1429 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1430 } 1431 /* scale the diagonal block */ 1432 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1433 1434 if (rr) { 1435 /* Do a scatter end and then right scale the off-diagonal block */ 1436 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1437 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1438 } 1439 1440 PetscFunctionReturn(0); 1441 } 1442 1443 1444 #undef __FUNCT__ 1445 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1446 int MatPrintHelp_MPIAIJ(Mat A) 1447 { 1448 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1449 int ierr; 1450 1451 PetscFunctionBegin; 1452 if (!a->rank) { 1453 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1454 } 1455 PetscFunctionReturn(0); 1456 } 1457 1458 #undef __FUNCT__ 1459 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1460 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1461 { 1462 PetscFunctionBegin; 1463 *bs = 1; 1464 PetscFunctionReturn(0); 1465 } 1466 #undef __FUNCT__ 1467 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1468 int MatSetUnfactored_MPIAIJ(Mat A) 1469 { 1470 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1471 int ierr; 1472 1473 PetscFunctionBegin; 1474 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1475 PetscFunctionReturn(0); 1476 } 1477 1478 #undef __FUNCT__ 1479 #define __FUNCT__ "MatEqual_MPIAIJ" 1480 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1481 { 1482 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1483 Mat a,b,c,d; 1484 PetscTruth flg; 1485 int ierr; 1486 1487 PetscFunctionBegin; 1488 a = matA->A; b = matA->B; 1489 c = matB->A; d = matB->B; 1490 1491 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1492 if (flg == PETSC_TRUE) { 1493 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1494 } 1495 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1496 PetscFunctionReturn(0); 1497 } 1498 1499 #undef __FUNCT__ 1500 #define __FUNCT__ "MatCopy_MPIAIJ" 1501 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1502 { 1503 int ierr; 1504 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1505 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1506 1507 PetscFunctionBegin; 1508 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1509 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1510 /* because of the column compression in the off-processor part of the matrix a->B, 1511 the number of columns in a->B and b->B may be different, hence we cannot call 1512 the MatCopy() directly on the two parts. If need be, we can provide a more 1513 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1514 then copying the submatrices */ 1515 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1516 } else { 1517 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1518 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1519 } 1520 PetscFunctionReturn(0); 1521 } 1522 1523 #undef __FUNCT__ 1524 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1525 int MatSetUpPreallocation_MPIAIJ(Mat A) 1526 { 1527 int ierr; 1528 1529 PetscFunctionBegin; 1530 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1531 PetscFunctionReturn(0); 1532 } 1533 1534 #include "petscblaslapack.h" 1535 #undef __FUNCT__ 1536 #define __FUNCT__ "MatAXPY_MPIAIJ" 1537 int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1538 { 1539 int ierr,one=1,i; 1540 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1541 Mat_SeqAIJ *x,*y; 1542 1543 PetscFunctionBegin; 1544 if (str == SAME_NONZERO_PATTERN) { 1545 x = (Mat_SeqAIJ *)xx->A->data; 1546 y = (Mat_SeqAIJ *)yy->A->data; 1547 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1548 x = (Mat_SeqAIJ *)xx->B->data; 1549 y = (Mat_SeqAIJ *)yy->B->data; 1550 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1551 } else if (str == SUBSET_NONZERO_PATTERN) { 1552 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1553 1554 x = (Mat_SeqAIJ *)xx->B->data; 1555 y = (Mat_SeqAIJ *)yy->B->data; 1556 if (y->xtoy && y->XtoY != xx->B) { 1557 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1558 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1559 } 1560 if (!y->xtoy) { /* get xtoy */ 1561 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1562 y->XtoY = xx->B; 1563 } 1564 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1565 } else { 1566 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1567 } 1568 PetscFunctionReturn(0); 1569 } 1570 1571 /* -------------------------------------------------------------------*/ 1572 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1573 MatGetRow_MPIAIJ, 1574 MatRestoreRow_MPIAIJ, 1575 MatMult_MPIAIJ, 1576 /* 4*/ MatMultAdd_MPIAIJ, 1577 MatMultTranspose_MPIAIJ, 1578 MatMultTransposeAdd_MPIAIJ, 1579 0, 1580 0, 1581 0, 1582 /*10*/ 0, 1583 0, 1584 0, 1585 MatRelax_MPIAIJ, 1586 MatTranspose_MPIAIJ, 1587 /*15*/ MatGetInfo_MPIAIJ, 1588 MatEqual_MPIAIJ, 1589 MatGetDiagonal_MPIAIJ, 1590 MatDiagonalScale_MPIAIJ, 1591 MatNorm_MPIAIJ, 1592 /*20*/ MatAssemblyBegin_MPIAIJ, 1593 MatAssemblyEnd_MPIAIJ, 1594 0, 1595 MatSetOption_MPIAIJ, 1596 MatZeroEntries_MPIAIJ, 1597 /*25*/ MatZeroRows_MPIAIJ, 1598 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1599 MatLUFactorSymbolic_MPIAIJ_TFS, 1600 #else 1601 0, 1602 #endif 1603 0, 1604 0, 1605 0, 1606 /*30*/ MatSetUpPreallocation_MPIAIJ, 1607 0, 1608 0, 1609 0, 1610 0, 1611 /*35*/ MatDuplicate_MPIAIJ, 1612 0, 1613 0, 1614 0, 1615 0, 1616 /*40*/ MatAXPY_MPIAIJ, 1617 MatGetSubMatrices_MPIAIJ, 1618 MatIncreaseOverlap_MPIAIJ, 1619 MatGetValues_MPIAIJ, 1620 MatCopy_MPIAIJ, 1621 /*45*/ MatPrintHelp_MPIAIJ, 1622 MatScale_MPIAIJ, 1623 0, 1624 0, 1625 0, 1626 /*50*/ MatGetBlockSize_MPIAIJ, 1627 0, 1628 0, 1629 0, 1630 0, 1631 /*55*/ MatFDColoringCreate_MPIAIJ, 1632 0, 1633 MatSetUnfactored_MPIAIJ, 1634 0, 1635 0, 1636 /*60*/ MatGetSubMatrix_MPIAIJ, 1637 MatDestroy_MPIAIJ, 1638 MatView_MPIAIJ, 1639 MatGetPetscMaps_Petsc, 1640 0, 1641 /*65*/ 0, 1642 0, 1643 0, 1644 0, 1645 0, 1646 /*70*/ 0, 1647 0, 1648 MatSetColoring_MPIAIJ, 1649 MatSetValuesAdic_MPIAIJ, 1650 MatSetValuesAdifor_MPIAIJ, 1651 /*75*/ 0, 1652 0, 1653 0, 1654 0, 1655 0, 1656 /*80*/ 0, 1657 0, 1658 0, 1659 0, 1660 /*85*/ MatLoad_MPIAIJ, 1661 0, 1662 0, 1663 0, 1664 0, 1665 /*90*/ 0, 1666 MatMatMult_MPIAIJ_MPIAIJ, 1667 }; 1668 1669 /* ----------------------------------------------------------------------------------------*/ 1670 1671 EXTERN_C_BEGIN 1672 #undef __FUNCT__ 1673 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1674 int MatStoreValues_MPIAIJ(Mat mat) 1675 { 1676 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1677 int ierr; 1678 1679 PetscFunctionBegin; 1680 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1681 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1682 PetscFunctionReturn(0); 1683 } 1684 EXTERN_C_END 1685 1686 EXTERN_C_BEGIN 1687 #undef __FUNCT__ 1688 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1689 int MatRetrieveValues_MPIAIJ(Mat mat) 1690 { 1691 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1692 int ierr; 1693 1694 PetscFunctionBegin; 1695 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1696 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1697 PetscFunctionReturn(0); 1698 } 1699 EXTERN_C_END 1700 1701 #include "petscpc.h" 1702 EXTERN_C_BEGIN 1703 #undef __FUNCT__ 1704 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1705 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1706 { 1707 Mat_MPIAIJ *b; 1708 int ierr,i; 1709 1710 PetscFunctionBegin; 1711 B->preallocated = PETSC_TRUE; 1712 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1713 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1714 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1715 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1716 if (d_nnz) { 1717 for (i=0; i<B->m; i++) { 1718 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1719 } 1720 } 1721 if (o_nnz) { 1722 for (i=0; i<B->m; i++) { 1723 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1724 } 1725 } 1726 b = (Mat_MPIAIJ*)B->data; 1727 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1728 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1729 1730 PetscFunctionReturn(0); 1731 } 1732 EXTERN_C_END 1733 1734 /*MC 1735 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 1736 1737 Options Database Keys: 1738 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 1739 1740 Level: beginner 1741 1742 .seealso: MatCreateMPIAIJ 1743 M*/ 1744 1745 EXTERN_C_BEGIN 1746 #undef __FUNCT__ 1747 #define __FUNCT__ "MatCreate_MPIAIJ" 1748 int MatCreate_MPIAIJ(Mat B) 1749 { 1750 Mat_MPIAIJ *b; 1751 int ierr,i,size; 1752 1753 PetscFunctionBegin; 1754 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1755 1756 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1757 B->data = (void*)b; 1758 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1759 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1760 B->factor = 0; 1761 B->assembled = PETSC_FALSE; 1762 B->mapping = 0; 1763 1764 B->insertmode = NOT_SET_VALUES; 1765 b->size = size; 1766 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1767 1768 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1769 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1770 1771 /* the information in the maps duplicates the information computed below, eventually 1772 we should remove the duplicate information that is not contained in the maps */ 1773 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1774 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1775 1776 /* build local table of row and column ownerships */ 1777 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1778 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1779 b->cowners = b->rowners + b->size + 2; 1780 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1781 b->rowners[0] = 0; 1782 for (i=2; i<=b->size; i++) { 1783 b->rowners[i] += b->rowners[i-1]; 1784 } 1785 b->rstart = b->rowners[b->rank]; 1786 b->rend = b->rowners[b->rank+1]; 1787 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1788 b->cowners[0] = 0; 1789 for (i=2; i<=b->size; i++) { 1790 b->cowners[i] += b->cowners[i-1]; 1791 } 1792 b->cstart = b->cowners[b->rank]; 1793 b->cend = b->cowners[b->rank+1]; 1794 1795 /* build cache for off array entries formed */ 1796 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1797 b->donotstash = PETSC_FALSE; 1798 b->colmap = 0; 1799 b->garray = 0; 1800 b->roworiented = PETSC_TRUE; 1801 1802 /* stuff used for matrix vector multiply */ 1803 b->lvec = PETSC_NULL; 1804 b->Mvctx = PETSC_NULL; 1805 1806 /* stuff for MatGetRow() */ 1807 b->rowindices = 0; 1808 b->rowvalues = 0; 1809 b->getrowactive = PETSC_FALSE; 1810 1811 /* Explicitly create 2 MATSEQAIJ matrices. */ 1812 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 1813 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 1814 PetscLogObjectParent(B,b->A); 1815 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 1816 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 1817 PetscLogObjectParent(B,b->B); 1818 1819 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1820 "MatStoreValues_MPIAIJ", 1821 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1822 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1823 "MatRetrieveValues_MPIAIJ", 1824 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1825 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1826 "MatGetDiagonalBlock_MPIAIJ", 1827 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1828 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 1829 "MatIsTranspose_MPIAIJ", 1830 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 1831 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 1832 "MatMPIAIJSetPreallocation_MPIAIJ", 1833 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 1834 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 1835 "MatDiagonalScaleLocal_MPIAIJ", 1836 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 1837 PetscFunctionReturn(0); 1838 } 1839 EXTERN_C_END 1840 1841 /*MC 1842 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1843 1844 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1845 and MATMPIAIJ otherwise. As a result, for single process communicators, 1846 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 1847 for communicators controlling multiple processes. It is recommended that you call both of 1848 the above preallocation routines for simplicity. 1849 1850 Options Database Keys: 1851 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 1852 1853 Level: beginner 1854 1855 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ 1856 M*/ 1857 1858 EXTERN_C_BEGIN 1859 #undef __FUNCT__ 1860 #define __FUNCT__ "MatCreate_AIJ" 1861 int MatCreate_AIJ(Mat A) { 1862 int ierr,size; 1863 1864 PetscFunctionBegin; 1865 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr); 1866 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1867 if (size == 1) { 1868 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1869 } else { 1870 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1871 } 1872 PetscFunctionReturn(0); 1873 } 1874 EXTERN_C_END 1875 1876 #undef __FUNCT__ 1877 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1878 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1879 { 1880 Mat mat; 1881 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1882 int ierr; 1883 1884 PetscFunctionBegin; 1885 *newmat = 0; 1886 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1887 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1888 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1889 a = (Mat_MPIAIJ*)mat->data; 1890 1891 mat->factor = matin->factor; 1892 mat->assembled = PETSC_TRUE; 1893 mat->insertmode = NOT_SET_VALUES; 1894 mat->preallocated = PETSC_TRUE; 1895 1896 a->rstart = oldmat->rstart; 1897 a->rend = oldmat->rend; 1898 a->cstart = oldmat->cstart; 1899 a->cend = oldmat->cend; 1900 a->size = oldmat->size; 1901 a->rank = oldmat->rank; 1902 a->donotstash = oldmat->donotstash; 1903 a->roworiented = oldmat->roworiented; 1904 a->rowindices = 0; 1905 a->rowvalues = 0; 1906 a->getrowactive = PETSC_FALSE; 1907 1908 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1909 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1910 if (oldmat->colmap) { 1911 #if defined (PETSC_USE_CTABLE) 1912 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1913 #else 1914 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1915 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1916 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1917 #endif 1918 } else a->colmap = 0; 1919 if (oldmat->garray) { 1920 int len; 1921 len = oldmat->B->n; 1922 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1923 PetscLogObjectMemory(mat,len*sizeof(int)); 1924 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1925 } else a->garray = 0; 1926 1927 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1928 PetscLogObjectParent(mat,a->lvec); 1929 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1930 PetscLogObjectParent(mat,a->Mvctx); 1931 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1932 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1933 PetscLogObjectParent(mat,a->A); 1934 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1935 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1936 PetscLogObjectParent(mat,a->B); 1937 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1938 *newmat = mat; 1939 PetscFunctionReturn(0); 1940 } 1941 1942 #include "petscsys.h" 1943 1944 #undef __FUNCT__ 1945 #define __FUNCT__ "MatLoad_MPIAIJ" 1946 int MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1947 { 1948 Mat A; 1949 PetscScalar *vals,*svals; 1950 MPI_Comm comm = ((PetscObject)viewer)->comm; 1951 MPI_Status status; 1952 int i,nz,ierr,j,rstart,rend,fd; 1953 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1954 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1955 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1956 1957 PetscFunctionBegin; 1958 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1959 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1960 if (!rank) { 1961 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1962 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1963 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1964 if (header[3] < 0) { 1965 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1966 } 1967 } 1968 1969 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1970 M = header[1]; N = header[2]; 1971 /* determine ownership of all rows */ 1972 m = M/size + ((M % size) > rank); 1973 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1974 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1975 rowners[0] = 0; 1976 for (i=2; i<=size; i++) { 1977 rowners[i] += rowners[i-1]; 1978 } 1979 rstart = rowners[rank]; 1980 rend = rowners[rank+1]; 1981 1982 /* distribute row lengths to all processors */ 1983 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1984 offlens = ourlens + (rend-rstart); 1985 if (!rank) { 1986 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1987 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1988 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1989 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1990 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1991 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1992 } else { 1993 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1994 } 1995 1996 if (!rank) { 1997 /* calculate the number of nonzeros on each processor */ 1998 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1999 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2000 for (i=0; i<size; i++) { 2001 for (j=rowners[i]; j< rowners[i+1]; j++) { 2002 procsnz[i] += rowlengths[j]; 2003 } 2004 } 2005 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2006 2007 /* determine max buffer needed and allocate it */ 2008 maxnz = 0; 2009 for (i=0; i<size; i++) { 2010 maxnz = PetscMax(maxnz,procsnz[i]); 2011 } 2012 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2013 2014 /* read in my part of the matrix column indices */ 2015 nz = procsnz[0]; 2016 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 2017 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2018 2019 /* read in every one elses and ship off */ 2020 for (i=1; i<size; i++) { 2021 nz = procsnz[i]; 2022 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2023 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2024 } 2025 ierr = PetscFree(cols);CHKERRQ(ierr); 2026 } else { 2027 /* determine buffer space needed for message */ 2028 nz = 0; 2029 for (i=0; i<m; i++) { 2030 nz += ourlens[i]; 2031 } 2032 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 2033 2034 /* receive message of column indices*/ 2035 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2036 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2037 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2038 } 2039 2040 /* determine column ownership if matrix is not square */ 2041 if (N != M) { 2042 n = N/size + ((N % size) > rank); 2043 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2044 cstart = cend - n; 2045 } else { 2046 cstart = rstart; 2047 cend = rend; 2048 n = cend - cstart; 2049 } 2050 2051 /* loop over local rows, determining number of off diagonal entries */ 2052 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2053 jj = 0; 2054 for (i=0; i<m; i++) { 2055 for (j=0; j<ourlens[i]; j++) { 2056 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2057 jj++; 2058 } 2059 } 2060 2061 /* create our matrix */ 2062 for (i=0; i<m; i++) { 2063 ourlens[i] -= offlens[i]; 2064 } 2065 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2066 ierr = MatSetType(A,type);CHKERRQ(ierr); 2067 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2068 2069 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2070 for (i=0; i<m; i++) { 2071 ourlens[i] += offlens[i]; 2072 } 2073 2074 if (!rank) { 2075 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2076 2077 /* read in my part of the matrix numerical values */ 2078 nz = procsnz[0]; 2079 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2080 2081 /* insert into matrix */ 2082 jj = rstart; 2083 smycols = mycols; 2084 svals = vals; 2085 for (i=0; i<m; i++) { 2086 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2087 smycols += ourlens[i]; 2088 svals += ourlens[i]; 2089 jj++; 2090 } 2091 2092 /* read in other processors and ship out */ 2093 for (i=1; i<size; i++) { 2094 nz = procsnz[i]; 2095 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2096 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2097 } 2098 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2099 } else { 2100 /* receive numeric values */ 2101 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2102 2103 /* receive message of values*/ 2104 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2105 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2106 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2107 2108 /* insert into matrix */ 2109 jj = rstart; 2110 smycols = mycols; 2111 svals = vals; 2112 for (i=0; i<m; i++) { 2113 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2114 smycols += ourlens[i]; 2115 svals += ourlens[i]; 2116 jj++; 2117 } 2118 } 2119 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2120 ierr = PetscFree(vals);CHKERRQ(ierr); 2121 ierr = PetscFree(mycols);CHKERRQ(ierr); 2122 ierr = PetscFree(rowners);CHKERRQ(ierr); 2123 2124 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2125 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2126 *newmat = A; 2127 PetscFunctionReturn(0); 2128 } 2129 2130 #undef __FUNCT__ 2131 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2132 /* 2133 Not great since it makes two copies of the submatrix, first an SeqAIJ 2134 in local and then by concatenating the local matrices the end result. 2135 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2136 */ 2137 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2138 { 2139 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2140 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2141 Mat *local,M,Mreuse; 2142 PetscScalar *vwork,*aa; 2143 MPI_Comm comm = mat->comm; 2144 Mat_SeqAIJ *aij; 2145 2146 2147 PetscFunctionBegin; 2148 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2149 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2150 2151 if (call == MAT_REUSE_MATRIX) { 2152 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2153 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2154 local = &Mreuse; 2155 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2156 } else { 2157 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2158 Mreuse = *local; 2159 ierr = PetscFree(local);CHKERRQ(ierr); 2160 } 2161 2162 /* 2163 m - number of local rows 2164 n - number of columns (same on all processors) 2165 rstart - first row in new global matrix generated 2166 */ 2167 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2168 if (call == MAT_INITIAL_MATRIX) { 2169 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2170 ii = aij->i; 2171 jj = aij->j; 2172 2173 /* 2174 Determine the number of non-zeros in the diagonal and off-diagonal 2175 portions of the matrix in order to do correct preallocation 2176 */ 2177 2178 /* first get start and end of "diagonal" columns */ 2179 if (csize == PETSC_DECIDE) { 2180 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2181 if (mglobal == n) { /* square matrix */ 2182 nlocal = m; 2183 } else { 2184 nlocal = n/size + ((n % size) > rank); 2185 } 2186 } else { 2187 nlocal = csize; 2188 } 2189 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2190 rstart = rend - nlocal; 2191 if (rank == size - 1 && rend != n) { 2192 SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n); 2193 } 2194 2195 /* next, compute all the lengths */ 2196 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2197 olens = dlens + m; 2198 for (i=0; i<m; i++) { 2199 jend = ii[i+1] - ii[i]; 2200 olen = 0; 2201 dlen = 0; 2202 for (j=0; j<jend; j++) { 2203 if (*jj < rstart || *jj >= rend) olen++; 2204 else dlen++; 2205 jj++; 2206 } 2207 olens[i] = olen; 2208 dlens[i] = dlen; 2209 } 2210 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2211 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2212 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2213 ierr = PetscFree(dlens);CHKERRQ(ierr); 2214 } else { 2215 int ml,nl; 2216 2217 M = *newmat; 2218 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2219 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2220 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2221 /* 2222 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2223 rather than the slower MatSetValues(). 2224 */ 2225 M->was_assembled = PETSC_TRUE; 2226 M->assembled = PETSC_FALSE; 2227 } 2228 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2229 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2230 ii = aij->i; 2231 jj = aij->j; 2232 aa = aij->a; 2233 for (i=0; i<m; i++) { 2234 row = rstart + i; 2235 nz = ii[i+1] - ii[i]; 2236 cwork = jj; jj += nz; 2237 vwork = aa; aa += nz; 2238 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2239 } 2240 2241 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2242 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2243 *newmat = M; 2244 2245 /* save submatrix used in processor for next request */ 2246 if (call == MAT_INITIAL_MATRIX) { 2247 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2248 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2249 } 2250 2251 PetscFunctionReturn(0); 2252 } 2253 2254 #undef __FUNCT__ 2255 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2256 /*@C 2257 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2258 (the default parallel PETSc format). For good matrix assembly performance 2259 the user should preallocate the matrix storage by setting the parameters 2260 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2261 performance can be increased by more than a factor of 50. 2262 2263 Collective on MPI_Comm 2264 2265 Input Parameters: 2266 + A - the matrix 2267 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2268 (same value is used for all local rows) 2269 . d_nnz - array containing the number of nonzeros in the various rows of the 2270 DIAGONAL portion of the local submatrix (possibly different for each row) 2271 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2272 The size of this array is equal to the number of local rows, i.e 'm'. 2273 You must leave room for the diagonal entry even if it is zero. 2274 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2275 submatrix (same value is used for all local rows). 2276 - o_nnz - array containing the number of nonzeros in the various rows of the 2277 OFF-DIAGONAL portion of the local submatrix (possibly different for 2278 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2279 structure. The size of this array is equal to the number 2280 of local rows, i.e 'm'. 2281 2282 The AIJ format (also called the Yale sparse matrix format or 2283 compressed row storage), is fully compatible with standard Fortran 77 2284 storage. That is, the stored row and column indices can begin at 2285 either one (as in Fortran) or zero. See the users manual for details. 2286 2287 The user MUST specify either the local or global matrix dimensions 2288 (possibly both). 2289 2290 The parallel matrix is partitioned such that the first m0 rows belong to 2291 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2292 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2293 2294 The DIAGONAL portion of the local submatrix of a processor can be defined 2295 as the submatrix which is obtained by extraction the part corresponding 2296 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2297 first row that belongs to the processor, and r2 is the last row belonging 2298 to the this processor. This is a square mxm matrix. The remaining portion 2299 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2300 2301 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2302 2303 By default, this format uses inodes (identical nodes) when possible. 2304 We search for consecutive rows with the same nonzero structure, thereby 2305 reusing matrix information to achieve increased efficiency. 2306 2307 Options Database Keys: 2308 + -mat_aij_no_inode - Do not use inodes 2309 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2310 - -mat_aij_oneindex - Internally use indexing starting at 1 2311 rather than 0. Note that when calling MatSetValues(), 2312 the user still MUST index entries starting at 0! 2313 2314 Example usage: 2315 2316 Consider the following 8x8 matrix with 34 non-zero values, that is 2317 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2318 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2319 as follows: 2320 2321 .vb 2322 1 2 0 | 0 3 0 | 0 4 2323 Proc0 0 5 6 | 7 0 0 | 8 0 2324 9 0 10 | 11 0 0 | 12 0 2325 ------------------------------------- 2326 13 0 14 | 15 16 17 | 0 0 2327 Proc1 0 18 0 | 19 20 21 | 0 0 2328 0 0 0 | 22 23 0 | 24 0 2329 ------------------------------------- 2330 Proc2 25 26 27 | 0 0 28 | 29 0 2331 30 0 0 | 31 32 33 | 0 34 2332 .ve 2333 2334 This can be represented as a collection of submatrices as: 2335 2336 .vb 2337 A B C 2338 D E F 2339 G H I 2340 .ve 2341 2342 Where the submatrices A,B,C are owned by proc0, D,E,F are 2343 owned by proc1, G,H,I are owned by proc2. 2344 2345 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2346 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2347 The 'M','N' parameters are 8,8, and have the same values on all procs. 2348 2349 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2350 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2351 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2352 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2353 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2354 matrix, ans [DF] as another SeqAIJ matrix. 2355 2356 When d_nz, o_nz parameters are specified, d_nz storage elements are 2357 allocated for every row of the local diagonal submatrix, and o_nz 2358 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2359 One way to choose d_nz and o_nz is to use the max nonzerors per local 2360 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2361 In this case, the values of d_nz,o_nz are: 2362 .vb 2363 proc0 : dnz = 2, o_nz = 2 2364 proc1 : dnz = 3, o_nz = 2 2365 proc2 : dnz = 1, o_nz = 4 2366 .ve 2367 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2368 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2369 for proc3. i.e we are using 12+15+10=37 storage locations to store 2370 34 values. 2371 2372 When d_nnz, o_nnz parameters are specified, the storage is specified 2373 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2374 In the above case the values for d_nnz,o_nnz are: 2375 .vb 2376 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2377 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2378 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2379 .ve 2380 Here the space allocated is sum of all the above values i.e 34, and 2381 hence pre-allocation is perfect. 2382 2383 Level: intermediate 2384 2385 .keywords: matrix, aij, compressed row, sparse, parallel 2386 2387 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2388 @*/ 2389 int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2390 { 2391 int ierr,(*f)(Mat,int,const int[],int,const int[]); 2392 2393 PetscFunctionBegin; 2394 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2395 if (f) { 2396 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2397 } 2398 PetscFunctionReturn(0); 2399 } 2400 2401 #undef __FUNCT__ 2402 #define __FUNCT__ "MatCreateMPIAIJ" 2403 /*@C 2404 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2405 (the default parallel PETSc format). For good matrix assembly performance 2406 the user should preallocate the matrix storage by setting the parameters 2407 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2408 performance can be increased by more than a factor of 50. 2409 2410 Collective on MPI_Comm 2411 2412 Input Parameters: 2413 + comm - MPI communicator 2414 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2415 This value should be the same as the local size used in creating the 2416 y vector for the matrix-vector product y = Ax. 2417 . n - This value should be the same as the local size used in creating the 2418 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2419 calculated if N is given) For square matrices n is almost always m. 2420 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2421 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2422 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2423 (same value is used for all local rows) 2424 . d_nnz - array containing the number of nonzeros in the various rows of the 2425 DIAGONAL portion of the local submatrix (possibly different for each row) 2426 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2427 The size of this array is equal to the number of local rows, i.e 'm'. 2428 You must leave room for the diagonal entry even if it is zero. 2429 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2430 submatrix (same value is used for all local rows). 2431 - o_nnz - array containing the number of nonzeros in the various rows of the 2432 OFF-DIAGONAL portion of the local submatrix (possibly different for 2433 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2434 structure. The size of this array is equal to the number 2435 of local rows, i.e 'm'. 2436 2437 Output Parameter: 2438 . A - the matrix 2439 2440 Notes: 2441 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2442 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2443 storage requirements for this matrix. 2444 2445 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2446 processor than it must be used on all processors that share the object for 2447 that argument. 2448 2449 The AIJ format (also called the Yale sparse matrix format or 2450 compressed row storage), is fully compatible with standard Fortran 77 2451 storage. That is, the stored row and column indices can begin at 2452 either one (as in Fortran) or zero. See the users manual for details. 2453 2454 The user MUST specify either the local or global matrix dimensions 2455 (possibly both). 2456 2457 The parallel matrix is partitioned such that the first m0 rows belong to 2458 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2459 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2460 2461 The DIAGONAL portion of the local submatrix of a processor can be defined 2462 as the submatrix which is obtained by extraction the part corresponding 2463 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2464 first row that belongs to the processor, and r2 is the last row belonging 2465 to the this processor. This is a square mxm matrix. The remaining portion 2466 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2467 2468 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2469 2470 When calling this routine with a single process communicator, a matrix of 2471 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2472 type of communicator, use the construction mechanism: 2473 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2474 2475 By default, this format uses inodes (identical nodes) when possible. 2476 We search for consecutive rows with the same nonzero structure, thereby 2477 reusing matrix information to achieve increased efficiency. 2478 2479 Options Database Keys: 2480 + -mat_aij_no_inode - Do not use inodes 2481 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2482 - -mat_aij_oneindex - Internally use indexing starting at 1 2483 rather than 0. Note that when calling MatSetValues(), 2484 the user still MUST index entries starting at 0! 2485 2486 2487 Example usage: 2488 2489 Consider the following 8x8 matrix with 34 non-zero values, that is 2490 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2491 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2492 as follows: 2493 2494 .vb 2495 1 2 0 | 0 3 0 | 0 4 2496 Proc0 0 5 6 | 7 0 0 | 8 0 2497 9 0 10 | 11 0 0 | 12 0 2498 ------------------------------------- 2499 13 0 14 | 15 16 17 | 0 0 2500 Proc1 0 18 0 | 19 20 21 | 0 0 2501 0 0 0 | 22 23 0 | 24 0 2502 ------------------------------------- 2503 Proc2 25 26 27 | 0 0 28 | 29 0 2504 30 0 0 | 31 32 33 | 0 34 2505 .ve 2506 2507 This can be represented as a collection of submatrices as: 2508 2509 .vb 2510 A B C 2511 D E F 2512 G H I 2513 .ve 2514 2515 Where the submatrices A,B,C are owned by proc0, D,E,F are 2516 owned by proc1, G,H,I are owned by proc2. 2517 2518 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2519 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2520 The 'M','N' parameters are 8,8, and have the same values on all procs. 2521 2522 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2523 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2524 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2525 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2526 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2527 matrix, ans [DF] as another SeqAIJ matrix. 2528 2529 When d_nz, o_nz parameters are specified, d_nz storage elements are 2530 allocated for every row of the local diagonal submatrix, and o_nz 2531 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2532 One way to choose d_nz and o_nz is to use the max nonzerors per local 2533 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2534 In this case, the values of d_nz,o_nz are: 2535 .vb 2536 proc0 : dnz = 2, o_nz = 2 2537 proc1 : dnz = 3, o_nz = 2 2538 proc2 : dnz = 1, o_nz = 4 2539 .ve 2540 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2541 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2542 for proc3. i.e we are using 12+15+10=37 storage locations to store 2543 34 values. 2544 2545 When d_nnz, o_nnz parameters are specified, the storage is specified 2546 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2547 In the above case the values for d_nnz,o_nnz are: 2548 .vb 2549 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2550 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2551 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2552 .ve 2553 Here the space allocated is sum of all the above values i.e 34, and 2554 hence pre-allocation is perfect. 2555 2556 Level: intermediate 2557 2558 .keywords: matrix, aij, compressed row, sparse, parallel 2559 2560 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2561 @*/ 2562 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2563 { 2564 int ierr,size; 2565 2566 PetscFunctionBegin; 2567 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2568 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2569 if (size > 1) { 2570 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2571 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2572 } else { 2573 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2574 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2575 } 2576 PetscFunctionReturn(0); 2577 } 2578 2579 #undef __FUNCT__ 2580 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2581 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2582 { 2583 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2584 PetscFunctionBegin; 2585 *Ad = a->A; 2586 *Ao = a->B; 2587 *colmap = a->garray; 2588 PetscFunctionReturn(0); 2589 } 2590 2591 #undef __FUNCT__ 2592 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2593 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2594 { 2595 int ierr,i; 2596 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2597 2598 PetscFunctionBegin; 2599 if (coloring->ctype == IS_COLORING_LOCAL) { 2600 ISColoringValue *allcolors,*colors; 2601 ISColoring ocoloring; 2602 2603 /* set coloring for diagonal portion */ 2604 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2605 2606 /* set coloring for off-diagonal portion */ 2607 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2608 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2609 for (i=0; i<a->B->n; i++) { 2610 colors[i] = allcolors[a->garray[i]]; 2611 } 2612 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2613 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2614 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2615 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2616 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2617 ISColoringValue *colors; 2618 int *larray; 2619 ISColoring ocoloring; 2620 2621 /* set coloring for diagonal portion */ 2622 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2623 for (i=0; i<a->A->n; i++) { 2624 larray[i] = i + a->cstart; 2625 } 2626 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2627 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2628 for (i=0; i<a->A->n; i++) { 2629 colors[i] = coloring->colors[larray[i]]; 2630 } 2631 ierr = PetscFree(larray);CHKERRQ(ierr); 2632 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2633 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2634 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2635 2636 /* set coloring for off-diagonal portion */ 2637 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2638 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2639 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2640 for (i=0; i<a->B->n; i++) { 2641 colors[i] = coloring->colors[larray[i]]; 2642 } 2643 ierr = PetscFree(larray);CHKERRQ(ierr); 2644 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2645 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2646 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2647 } else { 2648 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2649 } 2650 2651 PetscFunctionReturn(0); 2652 } 2653 2654 #undef __FUNCT__ 2655 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2656 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2657 { 2658 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2659 int ierr; 2660 2661 PetscFunctionBegin; 2662 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2663 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2664 PetscFunctionReturn(0); 2665 } 2666 2667 #undef __FUNCT__ 2668 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2669 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2670 { 2671 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2672 int ierr; 2673 2674 PetscFunctionBegin; 2675 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2676 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2677 PetscFunctionReturn(0); 2678 } 2679 2680 #undef __FUNCT__ 2681 #define __FUNCT__ "MatMerge" 2682 /*@C 2683 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2684 matrices from each processor 2685 2686 Collective on MPI_Comm 2687 2688 Input Parameters: 2689 + comm - the communicators the parallel matrix will live on 2690 - inmat - the input sequential matrices 2691 2692 Output Parameter: 2693 . outmat - the parallel matrix generated 2694 2695 Level: advanced 2696 2697 Notes: The number of columns of the matrix in EACH of the seperate files 2698 MUST be the same. 2699 2700 @*/ 2701 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat) 2702 { 2703 int ierr,m,n,i,rstart,nnz,I,*dnz,*onz; 2704 const int *indx; 2705 const PetscScalar *values; 2706 PetscMap columnmap,rowmap; 2707 2708 PetscFunctionBegin; 2709 2710 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2711 2712 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2713 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2714 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2715 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2716 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2717 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2718 2719 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2720 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2721 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2722 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2723 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2724 2725 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2726 for (i=0;i<m;i++) { 2727 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2728 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2729 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2730 } 2731 /* This routine will ONLY return MPIAIJ type matrix */ 2732 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2733 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2734 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2735 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2736 2737 for (i=0;i<m;i++) { 2738 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2739 I = i + rstart; 2740 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2741 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2742 } 2743 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2744 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2745 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2746 2747 PetscFunctionReturn(0); 2748 } 2749 2750 #undef __FUNCT__ 2751 #define __FUNCT__ "MatFileSplit" 2752 int MatFileSplit(Mat A,char *outfile) 2753 { 2754 int ierr,rank,len,m,N,i,rstart,nnz; 2755 const int *indx; 2756 PetscViewer out; 2757 char *name; 2758 Mat B; 2759 const PetscScalar *values; 2760 2761 PetscFunctionBegin; 2762 2763 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2764 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2765 /* Should this be the type of the diagonal block of A? */ 2766 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2767 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2768 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2769 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2770 for (i=0;i<m;i++) { 2771 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2772 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2773 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2774 } 2775 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2776 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2777 2778 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2779 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2780 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2781 sprintf(name,"%s.%d",outfile,rank); 2782 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2783 ierr = PetscFree(name); 2784 ierr = MatView(B,out);CHKERRQ(ierr); 2785 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2786 ierr = MatDestroy(B);CHKERRQ(ierr); 2787 PetscFunctionReturn(0); 2788 } 2789