1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 int CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 int n = aij->B->n,i,ierr; 19 20 PetscFunctionBegin; 21 #if defined (PETSC_USE_CTABLE) 22 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 23 for (i=0; i<n; i++){ 24 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 25 } 26 #else 27 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 28 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 29 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 30 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 31 #endif 32 PetscFunctionReturn(0); 33 } 34 35 #define CHUNKSIZE 15 36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 40 rmax = aimax[row]; nrow = ailen[row]; \ 41 col1 = col - shift; \ 42 \ 43 low = 0; high = nrow; \ 44 while (high-low > 5) { \ 45 t = (low+high)/2; \ 46 if (rp[t] > col) high = t; \ 47 else low = t; \ 48 } \ 49 for (_i=low; _i<high; _i++) { \ 50 if (rp[_i] > col1) break; \ 51 if (rp[_i] == col1) { \ 52 if (addv == ADD_VALUES) ap[_i] += value; \ 53 else ap[_i] = value; \ 54 goto a_noinsert; \ 55 } \ 56 } \ 57 if (nonew == 1) goto a_noinsert; \ 58 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 59 if (nrow >= rmax) { \ 60 /* there is no extra room in row, therefore enlarge */ \ 61 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 62 PetscScalar *new_a; \ 63 \ 64 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 65 \ 66 /* malloc new storage space */ \ 67 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 68 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 69 new_j = (int*)(new_a + new_nz); \ 70 new_i = new_j + new_nz; \ 71 \ 72 /* copy over old data into new slots */ \ 73 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 74 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 75 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 76 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 77 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 78 len*sizeof(int));CHKERRQ(ierr); \ 79 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 81 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 82 /* free up old matrix storage */ \ 83 \ 84 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 85 if (!a->singlemalloc) { \ 86 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 87 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 88 } \ 89 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 90 a->singlemalloc = PETSC_TRUE; \ 91 \ 92 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 93 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 94 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 95 a->maxnz += CHUNKSIZE; \ 96 a->reallocs++; \ 97 } \ 98 N = nrow++ - 1; a->nz++; \ 99 /* shift up all the later entries in this row */ \ 100 for (ii=N; ii>=_i; ii--) { \ 101 rp[ii+1] = rp[ii]; \ 102 ap[ii+1] = ap[ii]; \ 103 } \ 104 rp[_i] = col1; \ 105 ap[_i] = value; \ 106 a_noinsert: ; \ 107 ailen[row] = nrow; \ 108 } 109 110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 111 { \ 112 \ 113 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 114 rmax = bimax[row]; nrow = bilen[row]; \ 115 col1 = col - shift; \ 116 \ 117 low = 0; high = nrow; \ 118 while (high-low > 5) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > col) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > col1) break; \ 125 if (rp[_i] == col1) { \ 126 if (addv == ADD_VALUES) ap[_i] += value; \ 127 else ap[_i] = value; \ 128 goto b_noinsert; \ 129 } \ 130 } \ 131 if (nonew == 1) goto b_noinsert; \ 132 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 133 if (nrow >= rmax) { \ 134 /* there is no extra room in row, therefore enlarge */ \ 135 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 136 PetscScalar *new_a; \ 137 \ 138 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 139 \ 140 /* malloc new storage space */ \ 141 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 142 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 143 new_j = (int*)(new_a + new_nz); \ 144 new_i = new_j + new_nz; \ 145 \ 146 /* copy over old data into new slots */ \ 147 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 148 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 149 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 150 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 151 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 152 len*sizeof(int));CHKERRQ(ierr); \ 153 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 154 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 155 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 156 /* free up old matrix storage */ \ 157 \ 158 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 159 if (!b->singlemalloc) { \ 160 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 161 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 162 } \ 163 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 164 b->singlemalloc = PETSC_TRUE; \ 165 \ 166 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 167 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 168 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 169 b->maxnz += CHUNKSIZE; \ 170 b->reallocs++; \ 171 } \ 172 N = nrow++ - 1; b->nz++; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ap[ii+1] = ap[ii]; \ 177 } \ 178 rp[_i] = col1; \ 179 ap[_i] = value; \ 180 b_noinsert: ; \ 181 bilen[row] = nrow; \ 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatSetValues_MPIAIJ" 186 int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 187 { 188 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 189 PetscScalar value; 190 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 191 int cstart = aij->cstart,cend = aij->cend,row,col; 192 PetscTruth roworiented = aij->roworiented; 193 194 /* Some Variables required in the macro */ 195 Mat A = aij->A; 196 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 197 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 198 PetscScalar *aa = a->a; 199 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 200 Mat B = aij->B; 201 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 202 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 203 PetscScalar *ba = b->a; 204 205 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 206 int nonew = a->nonew,shift=0; 207 PetscScalar *ap; 208 209 PetscFunctionBegin; 210 for (i=0; i<m; i++) { 211 if (im[i] < 0) continue; 212 #if defined(PETSC_USE_BOPT_g) 213 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 214 #endif 215 if (im[i] >= rstart && im[i] < rend) { 216 row = im[i] - rstart; 217 for (j=0; j<n; j++) { 218 if (in[j] >= cstart && in[j] < cend){ 219 col = in[j] - cstart; 220 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 221 if (ignorezeroentries && value == 0.0) continue; 222 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 223 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 224 } else if (in[j] < 0) continue; 225 #if defined(PETSC_USE_BOPT_g) 226 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 227 #endif 228 else { 229 if (mat->was_assembled) { 230 if (!aij->colmap) { 231 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 232 } 233 #if defined (PETSC_USE_CTABLE) 234 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 235 col--; 236 #else 237 col = aij->colmap[in[j]] - 1; 238 #endif 239 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 240 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 241 col = in[j]; 242 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 243 B = aij->B; 244 b = (Mat_SeqAIJ*)B->data; 245 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 246 ba = b->a; 247 } 248 } else col = in[j]; 249 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 250 if (ignorezeroentries && value == 0.0) continue; 251 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 252 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 253 } 254 } 255 } else { 256 if (!aij->donotstash) { 257 if (roworiented) { 258 if (ignorezeroentries && v[i*n] == 0.0) continue; 259 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 260 } else { 261 if (ignorezeroentries && v[i] == 0.0) continue; 262 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 263 } 264 } 265 } 266 } 267 PetscFunctionReturn(0); 268 } 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatGetValues_MPIAIJ" 272 int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 273 { 274 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 275 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 276 int cstart = aij->cstart,cend = aij->cend,row,col; 277 278 PetscFunctionBegin; 279 for (i=0; i<m; i++) { 280 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 281 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 282 if (idxm[i] >= rstart && idxm[i] < rend) { 283 row = idxm[i] - rstart; 284 for (j=0; j<n; j++) { 285 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 286 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 287 if (idxn[j] >= cstart && idxn[j] < cend){ 288 col = idxn[j] - cstart; 289 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 290 } else { 291 if (!aij->colmap) { 292 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 293 } 294 #if defined (PETSC_USE_CTABLE) 295 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 296 col --; 297 #else 298 col = aij->colmap[idxn[j]] - 1; 299 #endif 300 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 301 else { 302 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 303 } 304 } 305 } 306 } else { 307 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 308 } 309 } 310 PetscFunctionReturn(0); 311 } 312 313 #undef __FUNCT__ 314 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 315 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 316 { 317 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 318 int ierr,nstash,reallocs; 319 InsertMode addv; 320 321 PetscFunctionBegin; 322 if (aij->donotstash) { 323 PetscFunctionReturn(0); 324 } 325 326 /* make sure all processors are either in INSERTMODE or ADDMODE */ 327 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 328 if (addv == (ADD_VALUES|INSERT_VALUES)) { 329 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 330 } 331 mat->insertmode = addv; /* in case this processor had no cache */ 332 333 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 334 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 335 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 336 PetscFunctionReturn(0); 337 } 338 339 340 #undef __FUNCT__ 341 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 342 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 343 { 344 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 345 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 346 int i,j,rstart,ncols,n,ierr,flg; 347 int *row,*col,other_disassembled; 348 PetscScalar *val; 349 InsertMode addv = mat->insertmode; 350 351 PetscFunctionBegin; 352 if (!aij->donotstash) { 353 while (1) { 354 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 355 if (!flg) break; 356 357 for (i=0; i<n;) { 358 /* Now identify the consecutive vals belonging to the same row */ 359 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 360 if (j < n) ncols = j-i; 361 else ncols = n-i; 362 /* Now assemble all these values with a single function call */ 363 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 364 i = j; 365 } 366 } 367 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 368 } 369 370 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 371 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 372 373 /* determine if any processor has disassembled, if so we must 374 also disassemble ourselfs, in order that we may reassemble. */ 375 /* 376 if nonzero structure of submatrix B cannot change then we know that 377 no processor disassembled thus we can skip this stuff 378 */ 379 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 380 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 381 if (mat->was_assembled && !other_disassembled) { 382 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 383 } 384 } 385 386 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 387 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 388 } 389 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 390 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 391 392 if (aij->rowvalues) { 393 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 394 aij->rowvalues = 0; 395 } 396 397 /* used by MatAXPY() */ 398 a->xtoy = 0; b->xtoy = 0; 399 a->XtoY = 0; b->XtoY = 0; 400 401 PetscFunctionReturn(0); 402 } 403 404 #undef __FUNCT__ 405 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 406 int MatZeroEntries_MPIAIJ(Mat A) 407 { 408 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 409 int ierr; 410 411 PetscFunctionBegin; 412 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 413 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNCT__ 418 #define __FUNCT__ "MatZeroRows_MPIAIJ" 419 int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 420 { 421 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 422 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 423 int *nprocs,j,idx,nsends,row; 424 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 425 int *rvalues,tag = A->tag,count,base,slen,n,*source; 426 int *lens,imdex,*lrows,*values,rstart=l->rstart; 427 MPI_Comm comm = A->comm; 428 MPI_Request *send_waits,*recv_waits; 429 MPI_Status recv_status,*send_status; 430 IS istmp; 431 PetscTruth found; 432 433 PetscFunctionBegin; 434 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 435 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 436 437 /* first count number of contributors to each processor */ 438 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 439 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 440 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 441 for (i=0; i<N; i++) { 442 idx = rows[i]; 443 found = PETSC_FALSE; 444 for (j=0; j<size; j++) { 445 if (idx >= owners[j] && idx < owners[j+1]) { 446 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 447 } 448 } 449 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 450 } 451 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 452 453 /* inform other processors of number of messages and max length*/ 454 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 455 456 /* post receives: */ 457 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 458 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 459 for (i=0; i<nrecvs; i++) { 460 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 461 } 462 463 /* do sends: 464 1) starts[i] gives the starting index in svalues for stuff going to 465 the ith processor 466 */ 467 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 468 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 469 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 470 starts[0] = 0; 471 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 472 for (i=0; i<N; i++) { 473 svalues[starts[owner[i]]++] = rows[i]; 474 } 475 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 476 477 starts[0] = 0; 478 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 479 count = 0; 480 for (i=0; i<size; i++) { 481 if (nprocs[2*i+1]) { 482 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 483 } 484 } 485 ierr = PetscFree(starts);CHKERRQ(ierr); 486 487 base = owners[rank]; 488 489 /* wait on receives */ 490 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 491 source = lens + nrecvs; 492 count = nrecvs; slen = 0; 493 while (count) { 494 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 495 /* unpack receives into our local space */ 496 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 497 source[imdex] = recv_status.MPI_SOURCE; 498 lens[imdex] = n; 499 slen += n; 500 count--; 501 } 502 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 503 504 /* move the data into the send scatter */ 505 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 506 count = 0; 507 for (i=0; i<nrecvs; i++) { 508 values = rvalues + i*nmax; 509 for (j=0; j<lens[i]; j++) { 510 lrows[count++] = values[j] - base; 511 } 512 } 513 ierr = PetscFree(rvalues);CHKERRQ(ierr); 514 ierr = PetscFree(lens);CHKERRQ(ierr); 515 ierr = PetscFree(owner);CHKERRQ(ierr); 516 ierr = PetscFree(nprocs);CHKERRQ(ierr); 517 518 /* actually zap the local rows */ 519 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 520 PetscLogObjectParent(A,istmp); 521 522 /* 523 Zero the required rows. If the "diagonal block" of the matrix 524 is square and the user wishes to set the diagonal we use seperate 525 code so that MatSetValues() is not called for each diagonal allocating 526 new memory, thus calling lots of mallocs and slowing things down. 527 528 Contributed by: Mathew Knepley 529 */ 530 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 531 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 532 if (diag && (l->A->M == l->A->N)) { 533 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 534 } else if (diag) { 535 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 536 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 537 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 538 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 539 } 540 for (i = 0; i < slen; i++) { 541 row = lrows[i] + rstart; 542 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 543 } 544 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 545 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 546 } else { 547 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 548 } 549 ierr = ISDestroy(istmp);CHKERRQ(ierr); 550 ierr = PetscFree(lrows);CHKERRQ(ierr); 551 552 /* wait on sends */ 553 if (nsends) { 554 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 555 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 556 ierr = PetscFree(send_status);CHKERRQ(ierr); 557 } 558 ierr = PetscFree(send_waits);CHKERRQ(ierr); 559 ierr = PetscFree(svalues);CHKERRQ(ierr); 560 561 PetscFunctionReturn(0); 562 } 563 564 #undef __FUNCT__ 565 #define __FUNCT__ "MatMult_MPIAIJ" 566 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 567 { 568 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 569 int ierr,nt; 570 571 PetscFunctionBegin; 572 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 573 if (nt != A->n) { 574 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 575 } 576 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 577 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 578 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 579 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 580 PetscFunctionReturn(0); 581 } 582 583 #undef __FUNCT__ 584 #define __FUNCT__ "MatMultAdd_MPIAIJ" 585 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 586 { 587 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 588 int ierr; 589 590 PetscFunctionBegin; 591 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 592 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 593 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 594 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 595 PetscFunctionReturn(0); 596 } 597 598 #undef __FUNCT__ 599 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 600 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 601 { 602 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 603 int ierr; 604 605 PetscFunctionBegin; 606 /* do nondiagonal part */ 607 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 608 /* send it on its way */ 609 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 610 /* do local part */ 611 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 612 /* receive remote parts: note this assumes the values are not actually */ 613 /* inserted in yy until the next line, which is true for my implementation*/ 614 /* but is not perhaps always true. */ 615 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 616 PetscFunctionReturn(0); 617 } 618 619 EXTERN_C_BEGIN 620 #undef __FUNCT__ 621 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 622 int MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f) 623 { 624 MPI_Comm comm; 625 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 626 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 627 IS Me,Notme; 628 int M,N,first,last,*notme,ntids,i, ierr; 629 630 PetscFunctionBegin; 631 632 /* Easy test: symmetric diagonal block */ 633 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 634 ierr = MatIsTranspose(Adia,Bdia,f); CHKERRQ(ierr); 635 if (!*f) PetscFunctionReturn(0); 636 ierr = PetscObjectGetComm((PetscObject)Amat,&comm); CHKERRQ(ierr); 637 ierr = MPI_Comm_size(comm,&ntids); CHKERRQ(ierr); 638 if (ntids==1) PetscFunctionReturn(0); 639 640 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 641 ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr); 642 ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr); 643 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me); CHKERRQ(ierr); 644 for (i=0; i<first; i++) notme[i] = i; 645 for (i=last; i<M; i++) notme[i-last+first] = i; 646 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr); 647 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr); 648 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs); CHKERRQ(ierr); 649 Aoff = Aoffs[0]; 650 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs); CHKERRQ(ierr); 651 Boff = Boffs[0]; 652 ierr = MatIsTranspose(Aoff,Boff,f); CHKERRQ(ierr); 653 ierr = MatDestroyMatrices(1,&Aoffs); CHKERRQ(ierr); 654 ierr = MatDestroyMatrices(1,&Boffs); CHKERRQ(ierr); 655 ierr = ISDestroy(Me); CHKERRQ(ierr); 656 ierr = ISDestroy(Notme); CHKERRQ(ierr); 657 658 PetscFunctionReturn(0); 659 } 660 EXTERN_C_END 661 662 #undef __FUNCT__ 663 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 664 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 665 { 666 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 667 int ierr; 668 669 PetscFunctionBegin; 670 /* do nondiagonal part */ 671 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 672 /* send it on its way */ 673 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 674 /* do local part */ 675 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 676 /* receive remote parts: note this assumes the values are not actually */ 677 /* inserted in yy until the next line, which is true for my implementation*/ 678 /* but is not perhaps always true. */ 679 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 680 PetscFunctionReturn(0); 681 } 682 683 /* 684 This only works correctly for square matrices where the subblock A->A is the 685 diagonal block 686 */ 687 #undef __FUNCT__ 688 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 689 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 690 { 691 int ierr; 692 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 693 694 PetscFunctionBegin; 695 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 696 if (a->rstart != a->cstart || a->rend != a->cend) { 697 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 698 } 699 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 700 PetscFunctionReturn(0); 701 } 702 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatScale_MPIAIJ" 705 int MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 706 { 707 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 708 int ierr; 709 710 PetscFunctionBegin; 711 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 712 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 713 PetscFunctionReturn(0); 714 } 715 716 #undef __FUNCT__ 717 #define __FUNCT__ "MatDestroy_MPIAIJ" 718 int MatDestroy_MPIAIJ(Mat mat) 719 { 720 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 721 int ierr; 722 723 PetscFunctionBegin; 724 #if defined(PETSC_USE_LOG) 725 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 726 #endif 727 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 728 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 729 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 730 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 731 #if defined (PETSC_USE_CTABLE) 732 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 733 #else 734 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 735 #endif 736 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 737 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 738 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 739 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 740 ierr = PetscFree(aij);CHKERRQ(ierr); 741 PetscFunctionReturn(0); 742 } 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "MatView_MPIAIJ_Binary" 746 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 747 { 748 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 749 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 750 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 751 int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 752 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 753 PetscScalar *column_values; 754 755 PetscFunctionBegin; 756 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 757 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 758 nz = A->nz + B->nz; 759 if (rank == 0) { 760 header[0] = MAT_FILE_COOKIE; 761 header[1] = mat->M; 762 header[2] = mat->N; 763 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 764 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 765 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 766 /* get largest number of rows any processor has */ 767 rlen = mat->m; 768 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 769 for (i=1; i<size; i++) { 770 rlen = PetscMax(rlen,range[i+1] - range[i]); 771 } 772 } else { 773 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 774 rlen = mat->m; 775 } 776 777 /* load up the local row counts */ 778 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 779 for (i=0; i<mat->m; i++) { 780 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 781 } 782 783 /* store the row lengths to the file */ 784 if (rank == 0) { 785 MPI_Status status; 786 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 787 for (i=1; i<size; i++) { 788 rlen = range[i+1] - range[i]; 789 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 790 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 791 } 792 } else { 793 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 794 } 795 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 796 797 /* load up the local column indices */ 798 nzmax = nz; /* )th processor needs space a largest processor needs */ 799 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 800 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 801 cnt = 0; 802 for (i=0; i<mat->m; i++) { 803 for (j=B->i[i]; j<B->i[i+1]; j++) { 804 if ( (col = garray[B->j[j]]) > cstart) break; 805 column_indices[cnt++] = col; 806 } 807 for (k=A->i[i]; k<A->i[i+1]; k++) { 808 column_indices[cnt++] = A->j[k] + cstart; 809 } 810 for (; j<B->i[i+1]; j++) { 811 column_indices[cnt++] = garray[B->j[j]]; 812 } 813 } 814 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 815 816 /* store the column indices to the file */ 817 if (rank == 0) { 818 MPI_Status status; 819 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 820 for (i=1; i<size; i++) { 821 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 822 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 823 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 824 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 825 } 826 } else { 827 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 828 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 829 } 830 ierr = PetscFree(column_indices);CHKERRQ(ierr); 831 832 /* load up the local column values */ 833 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 834 cnt = 0; 835 for (i=0; i<mat->m; i++) { 836 for (j=B->i[i]; j<B->i[i+1]; j++) { 837 if ( garray[B->j[j]] > cstart) break; 838 column_values[cnt++] = B->a[j]; 839 } 840 for (k=A->i[i]; k<A->i[i+1]; k++) { 841 column_values[cnt++] = A->a[k]; 842 } 843 for (; j<B->i[i+1]; j++) { 844 column_values[cnt++] = B->a[j]; 845 } 846 } 847 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 848 849 /* store the column values to the file */ 850 if (rank == 0) { 851 MPI_Status status; 852 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 853 for (i=1; i<size; i++) { 854 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 855 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 856 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 857 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 858 } 859 } else { 860 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 861 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 862 } 863 ierr = PetscFree(column_values);CHKERRQ(ierr); 864 PetscFunctionReturn(0); 865 } 866 867 #undef __FUNCT__ 868 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 869 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 870 { 871 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 872 int ierr,rank = aij->rank,size = aij->size; 873 PetscTruth isdraw,isascii,flg,isbinary; 874 PetscViewer sviewer; 875 PetscViewerFormat format; 876 877 PetscFunctionBegin; 878 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 880 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 881 if (isascii) { 882 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 883 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 884 MatInfo info; 885 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 886 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 887 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 888 if (flg) { 889 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 890 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 891 } else { 892 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 893 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 894 } 895 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 896 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 897 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 898 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 899 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 900 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } else if (format == PETSC_VIEWER_ASCII_INFO) { 903 PetscFunctionReturn(0); 904 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 905 PetscFunctionReturn(0); 906 } 907 } else if (isbinary) { 908 if (size == 1) { 909 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 910 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 911 } else { 912 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 913 } 914 PetscFunctionReturn(0); 915 } else if (isdraw) { 916 PetscDraw draw; 917 PetscTruth isnull; 918 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 919 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 920 } 921 922 if (size == 1) { 923 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 924 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 925 } else { 926 /* assemble the entire matrix onto first processor. */ 927 Mat A; 928 Mat_SeqAIJ *Aloc; 929 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 930 PetscScalar *a; 931 932 if (!rank) { 933 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 934 } else { 935 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 936 } 937 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 938 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 939 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 940 PetscLogObjectParent(mat,A); 941 942 /* copy over the A part */ 943 Aloc = (Mat_SeqAIJ*)aij->A->data; 944 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 945 row = aij->rstart; 946 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 947 for (i=0; i<m; i++) { 948 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 949 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 950 } 951 aj = Aloc->j; 952 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 953 954 /* copy over the B part */ 955 Aloc = (Mat_SeqAIJ*)aij->B->data; 956 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 957 row = aij->rstart; 958 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 959 ct = cols; 960 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 961 for (i=0; i<m; i++) { 962 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 963 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 964 } 965 ierr = PetscFree(ct);CHKERRQ(ierr); 966 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 967 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 968 /* 969 Everyone has to call to draw the matrix since the graphics waits are 970 synchronized across all processors that share the PetscDraw object 971 */ 972 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 973 if (!rank) { 974 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 975 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 976 } 977 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 978 ierr = MatDestroy(A);CHKERRQ(ierr); 979 } 980 PetscFunctionReturn(0); 981 } 982 983 #undef __FUNCT__ 984 #define __FUNCT__ "MatView_MPIAIJ" 985 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 986 { 987 int ierr; 988 PetscTruth isascii,isdraw,issocket,isbinary; 989 990 PetscFunctionBegin; 991 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 992 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 993 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 994 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 995 if (isascii || isdraw || isbinary || issocket) { 996 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 997 } else { 998 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 999 } 1000 PetscFunctionReturn(0); 1001 } 1002 1003 1004 1005 #undef __FUNCT__ 1006 #define __FUNCT__ "MatRelax_MPIAIJ" 1007 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1008 { 1009 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1010 int ierr; 1011 Vec bb1; 1012 PetscScalar mone=-1.0; 1013 1014 PetscFunctionBegin; 1015 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1016 1017 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1018 1019 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1020 if (flag & SOR_ZERO_INITIAL_GUESS) { 1021 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1022 its--; 1023 } 1024 1025 while (its--) { 1026 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1027 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1028 1029 /* update rhs: bb1 = bb - B*x */ 1030 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1031 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1032 1033 /* local sweep */ 1034 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1035 CHKERRQ(ierr); 1036 } 1037 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1038 if (flag & SOR_ZERO_INITIAL_GUESS) { 1039 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1040 its--; 1041 } 1042 while (its--) { 1043 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1044 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1045 1046 /* update rhs: bb1 = bb - B*x */ 1047 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1048 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1049 1050 /* local sweep */ 1051 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1052 CHKERRQ(ierr); 1053 } 1054 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1055 if (flag & SOR_ZERO_INITIAL_GUESS) { 1056 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1057 its--; 1058 } 1059 while (its--) { 1060 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1061 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1062 1063 /* update rhs: bb1 = bb - B*x */ 1064 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1065 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1066 1067 /* local sweep */ 1068 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1069 CHKERRQ(ierr); 1070 } 1071 } else { 1072 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1073 } 1074 1075 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1076 PetscFunctionReturn(0); 1077 } 1078 1079 #undef __FUNCT__ 1080 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1081 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1082 { 1083 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1084 Mat A = mat->A,B = mat->B; 1085 int ierr; 1086 PetscReal isend[5],irecv[5]; 1087 1088 PetscFunctionBegin; 1089 info->block_size = 1.0; 1090 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1091 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1092 isend[3] = info->memory; isend[4] = info->mallocs; 1093 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1094 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1095 isend[3] += info->memory; isend[4] += info->mallocs; 1096 if (flag == MAT_LOCAL) { 1097 info->nz_used = isend[0]; 1098 info->nz_allocated = isend[1]; 1099 info->nz_unneeded = isend[2]; 1100 info->memory = isend[3]; 1101 info->mallocs = isend[4]; 1102 } else if (flag == MAT_GLOBAL_MAX) { 1103 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1104 info->nz_used = irecv[0]; 1105 info->nz_allocated = irecv[1]; 1106 info->nz_unneeded = irecv[2]; 1107 info->memory = irecv[3]; 1108 info->mallocs = irecv[4]; 1109 } else if (flag == MAT_GLOBAL_SUM) { 1110 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1111 info->nz_used = irecv[0]; 1112 info->nz_allocated = irecv[1]; 1113 info->nz_unneeded = irecv[2]; 1114 info->memory = irecv[3]; 1115 info->mallocs = irecv[4]; 1116 } 1117 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1118 info->fill_ratio_needed = 0; 1119 info->factor_mallocs = 0; 1120 info->rows_global = (double)matin->M; 1121 info->columns_global = (double)matin->N; 1122 info->rows_local = (double)matin->m; 1123 info->columns_local = (double)matin->N; 1124 1125 PetscFunctionReturn(0); 1126 } 1127 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "MatSetOption_MPIAIJ" 1130 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1131 { 1132 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1133 int ierr; 1134 1135 PetscFunctionBegin; 1136 switch (op) { 1137 case MAT_NO_NEW_NONZERO_LOCATIONS: 1138 case MAT_YES_NEW_NONZERO_LOCATIONS: 1139 case MAT_COLUMNS_UNSORTED: 1140 case MAT_COLUMNS_SORTED: 1141 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1142 case MAT_KEEP_ZEROED_ROWS: 1143 case MAT_NEW_NONZERO_LOCATION_ERR: 1144 case MAT_USE_INODES: 1145 case MAT_DO_NOT_USE_INODES: 1146 case MAT_IGNORE_ZERO_ENTRIES: 1147 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1148 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1149 break; 1150 case MAT_ROW_ORIENTED: 1151 a->roworiented = PETSC_TRUE; 1152 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1153 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1154 break; 1155 case MAT_ROWS_SORTED: 1156 case MAT_ROWS_UNSORTED: 1157 case MAT_YES_NEW_DIAGONALS: 1158 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1159 break; 1160 case MAT_COLUMN_ORIENTED: 1161 a->roworiented = PETSC_FALSE; 1162 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1163 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1164 break; 1165 case MAT_IGNORE_OFF_PROC_ENTRIES: 1166 a->donotstash = PETSC_TRUE; 1167 break; 1168 case MAT_NO_NEW_DIAGONALS: 1169 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1170 case MAT_SYMMETRIC: 1171 case MAT_STRUCTURALLY_SYMMETRIC: 1172 case MAT_HERMITIAN: 1173 case MAT_SYMMETRY_ETERNAL: 1174 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1175 break; 1176 case MAT_NOT_SYMMETRIC: 1177 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1178 case MAT_NOT_HERMITIAN: 1179 case MAT_NOT_SYMMETRY_ETERNAL: 1180 break; 1181 default: 1182 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1183 } 1184 PetscFunctionReturn(0); 1185 } 1186 1187 #undef __FUNCT__ 1188 #define __FUNCT__ "MatGetRow_MPIAIJ" 1189 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1190 { 1191 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1192 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1193 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1194 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1195 int *cmap,*idx_p; 1196 1197 PetscFunctionBegin; 1198 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1199 mat->getrowactive = PETSC_TRUE; 1200 1201 if (!mat->rowvalues && (idx || v)) { 1202 /* 1203 allocate enough space to hold information from the longest row. 1204 */ 1205 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1206 int max = 1,tmp; 1207 for (i=0; i<matin->m; i++) { 1208 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1209 if (max < tmp) { max = tmp; } 1210 } 1211 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1212 mat->rowindices = (int*)(mat->rowvalues + max); 1213 } 1214 1215 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1216 lrow = row - rstart; 1217 1218 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1219 if (!v) {pvA = 0; pvB = 0;} 1220 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1221 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1222 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1223 nztot = nzA + nzB; 1224 1225 cmap = mat->garray; 1226 if (v || idx) { 1227 if (nztot) { 1228 /* Sort by increasing column numbers, assuming A and B already sorted */ 1229 int imark = -1; 1230 if (v) { 1231 *v = v_p = mat->rowvalues; 1232 for (i=0; i<nzB; i++) { 1233 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1234 else break; 1235 } 1236 imark = i; 1237 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1238 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1239 } 1240 if (idx) { 1241 *idx = idx_p = mat->rowindices; 1242 if (imark > -1) { 1243 for (i=0; i<imark; i++) { 1244 idx_p[i] = cmap[cworkB[i]]; 1245 } 1246 } else { 1247 for (i=0; i<nzB; i++) { 1248 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1249 else break; 1250 } 1251 imark = i; 1252 } 1253 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1254 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1255 } 1256 } else { 1257 if (idx) *idx = 0; 1258 if (v) *v = 0; 1259 } 1260 } 1261 *nz = nztot; 1262 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1263 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1264 PetscFunctionReturn(0); 1265 } 1266 1267 #undef __FUNCT__ 1268 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1269 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1270 { 1271 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1272 1273 PetscFunctionBegin; 1274 if (aij->getrowactive == PETSC_FALSE) { 1275 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1276 } 1277 aij->getrowactive = PETSC_FALSE; 1278 PetscFunctionReturn(0); 1279 } 1280 1281 #undef __FUNCT__ 1282 #define __FUNCT__ "MatNorm_MPIAIJ" 1283 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1284 { 1285 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1286 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1287 int ierr,i,j,cstart = aij->cstart; 1288 PetscReal sum = 0.0; 1289 PetscScalar *v; 1290 1291 PetscFunctionBegin; 1292 if (aij->size == 1) { 1293 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1294 } else { 1295 if (type == NORM_FROBENIUS) { 1296 v = amat->a; 1297 for (i=0; i<amat->nz; i++) { 1298 #if defined(PETSC_USE_COMPLEX) 1299 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1300 #else 1301 sum += (*v)*(*v); v++; 1302 #endif 1303 } 1304 v = bmat->a; 1305 for (i=0; i<bmat->nz; i++) { 1306 #if defined(PETSC_USE_COMPLEX) 1307 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1308 #else 1309 sum += (*v)*(*v); v++; 1310 #endif 1311 } 1312 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1313 *norm = sqrt(*norm); 1314 } else if (type == NORM_1) { /* max column norm */ 1315 PetscReal *tmp,*tmp2; 1316 int *jj,*garray = aij->garray; 1317 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1318 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1319 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1320 *norm = 0.0; 1321 v = amat->a; jj = amat->j; 1322 for (j=0; j<amat->nz; j++) { 1323 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1324 } 1325 v = bmat->a; jj = bmat->j; 1326 for (j=0; j<bmat->nz; j++) { 1327 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1328 } 1329 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1330 for (j=0; j<mat->N; j++) { 1331 if (tmp2[j] > *norm) *norm = tmp2[j]; 1332 } 1333 ierr = PetscFree(tmp);CHKERRQ(ierr); 1334 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1335 } else if (type == NORM_INFINITY) { /* max row norm */ 1336 PetscReal ntemp = 0.0; 1337 for (j=0; j<aij->A->m; j++) { 1338 v = amat->a + amat->i[j]; 1339 sum = 0.0; 1340 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1341 sum += PetscAbsScalar(*v); v++; 1342 } 1343 v = bmat->a + bmat->i[j]; 1344 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1345 sum += PetscAbsScalar(*v); v++; 1346 } 1347 if (sum > ntemp) ntemp = sum; 1348 } 1349 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1350 } else { 1351 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1352 } 1353 } 1354 PetscFunctionReturn(0); 1355 } 1356 1357 #undef __FUNCT__ 1358 #define __FUNCT__ "MatTranspose_MPIAIJ" 1359 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1360 { 1361 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1362 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1363 int ierr; 1364 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1365 Mat B; 1366 PetscScalar *array; 1367 1368 PetscFunctionBegin; 1369 if (!matout && M != N) { 1370 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1371 } 1372 1373 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1374 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1375 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1376 1377 /* copy over the A part */ 1378 Aloc = (Mat_SeqAIJ*)a->A->data; 1379 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1380 row = a->rstart; 1381 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1382 for (i=0; i<m; i++) { 1383 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1384 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1385 } 1386 aj = Aloc->j; 1387 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1388 1389 /* copy over the B part */ 1390 Aloc = (Mat_SeqAIJ*)a->B->data; 1391 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1392 row = a->rstart; 1393 ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr); 1394 ct = cols; 1395 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1396 for (i=0; i<m; i++) { 1397 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1398 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1399 } 1400 ierr = PetscFree(ct);CHKERRQ(ierr); 1401 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1402 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1403 if (matout) { 1404 *matout = B; 1405 } else { 1406 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1407 } 1408 PetscFunctionReturn(0); 1409 } 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1413 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1414 { 1415 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1416 Mat a = aij->A,b = aij->B; 1417 int ierr,s1,s2,s3; 1418 1419 PetscFunctionBegin; 1420 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1421 if (rr) { 1422 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1423 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1424 /* Overlap communication with computation. */ 1425 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1426 } 1427 if (ll) { 1428 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1429 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1430 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1431 } 1432 /* scale the diagonal block */ 1433 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1434 1435 if (rr) { 1436 /* Do a scatter end and then right scale the off-diagonal block */ 1437 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1438 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1439 } 1440 1441 PetscFunctionReturn(0); 1442 } 1443 1444 1445 #undef __FUNCT__ 1446 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1447 int MatPrintHelp_MPIAIJ(Mat A) 1448 { 1449 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1450 int ierr; 1451 1452 PetscFunctionBegin; 1453 if (!a->rank) { 1454 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1455 } 1456 PetscFunctionReturn(0); 1457 } 1458 1459 #undef __FUNCT__ 1460 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1461 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1462 { 1463 PetscFunctionBegin; 1464 *bs = 1; 1465 PetscFunctionReturn(0); 1466 } 1467 #undef __FUNCT__ 1468 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1469 int MatSetUnfactored_MPIAIJ(Mat A) 1470 { 1471 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1472 int ierr; 1473 1474 PetscFunctionBegin; 1475 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1476 PetscFunctionReturn(0); 1477 } 1478 1479 #undef __FUNCT__ 1480 #define __FUNCT__ "MatEqual_MPIAIJ" 1481 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1482 { 1483 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1484 Mat a,b,c,d; 1485 PetscTruth flg; 1486 int ierr; 1487 1488 PetscFunctionBegin; 1489 a = matA->A; b = matA->B; 1490 c = matB->A; d = matB->B; 1491 1492 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1493 if (flg == PETSC_TRUE) { 1494 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1495 } 1496 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1497 PetscFunctionReturn(0); 1498 } 1499 1500 #undef __FUNCT__ 1501 #define __FUNCT__ "MatCopy_MPIAIJ" 1502 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1503 { 1504 int ierr; 1505 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1506 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1507 1508 PetscFunctionBegin; 1509 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1510 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1511 /* because of the column compression in the off-processor part of the matrix a->B, 1512 the number of columns in a->B and b->B may be different, hence we cannot call 1513 the MatCopy() directly on the two parts. If need be, we can provide a more 1514 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1515 then copying the submatrices */ 1516 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1517 } else { 1518 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1519 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1520 } 1521 PetscFunctionReturn(0); 1522 } 1523 1524 #undef __FUNCT__ 1525 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1526 int MatSetUpPreallocation_MPIAIJ(Mat A) 1527 { 1528 int ierr; 1529 1530 PetscFunctionBegin; 1531 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1532 PetscFunctionReturn(0); 1533 } 1534 1535 #include "petscblaslapack.h" 1536 #undef __FUNCT__ 1537 #define __FUNCT__ "MatAXPY_MPIAIJ" 1538 int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1539 { 1540 int ierr,one=1,i; 1541 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1542 Mat_SeqAIJ *x,*y; 1543 1544 PetscFunctionBegin; 1545 if (str == SAME_NONZERO_PATTERN) { 1546 x = (Mat_SeqAIJ *)xx->A->data; 1547 y = (Mat_SeqAIJ *)yy->A->data; 1548 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1549 x = (Mat_SeqAIJ *)xx->B->data; 1550 y = (Mat_SeqAIJ *)yy->B->data; 1551 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1552 } else if (str == SUBSET_NONZERO_PATTERN) { 1553 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1554 1555 x = (Mat_SeqAIJ *)xx->B->data; 1556 y = (Mat_SeqAIJ *)yy->B->data; 1557 if (y->xtoy && y->XtoY != xx->B) { 1558 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1559 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1560 } 1561 if (!y->xtoy) { /* get xtoy */ 1562 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1563 y->XtoY = xx->B; 1564 } 1565 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1566 } else { 1567 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1568 } 1569 PetscFunctionReturn(0); 1570 } 1571 1572 /* -------------------------------------------------------------------*/ 1573 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1574 MatGetRow_MPIAIJ, 1575 MatRestoreRow_MPIAIJ, 1576 MatMult_MPIAIJ, 1577 /* 4*/ MatMultAdd_MPIAIJ, 1578 MatMultTranspose_MPIAIJ, 1579 MatMultTransposeAdd_MPIAIJ, 1580 0, 1581 0, 1582 0, 1583 /*10*/ 0, 1584 0, 1585 0, 1586 MatRelax_MPIAIJ, 1587 MatTranspose_MPIAIJ, 1588 /*15*/ MatGetInfo_MPIAIJ, 1589 MatEqual_MPIAIJ, 1590 MatGetDiagonal_MPIAIJ, 1591 MatDiagonalScale_MPIAIJ, 1592 MatNorm_MPIAIJ, 1593 /*20*/ MatAssemblyBegin_MPIAIJ, 1594 MatAssemblyEnd_MPIAIJ, 1595 0, 1596 MatSetOption_MPIAIJ, 1597 MatZeroEntries_MPIAIJ, 1598 /*25*/ MatZeroRows_MPIAIJ, 1599 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1600 MatLUFactorSymbolic_MPIAIJ_TFS, 1601 #else 1602 0, 1603 #endif 1604 0, 1605 0, 1606 0, 1607 /*30*/ MatSetUpPreallocation_MPIAIJ, 1608 0, 1609 0, 1610 0, 1611 0, 1612 /*35*/ MatDuplicate_MPIAIJ, 1613 0, 1614 0, 1615 0, 1616 0, 1617 /*40*/ MatAXPY_MPIAIJ, 1618 MatGetSubMatrices_MPIAIJ, 1619 MatIncreaseOverlap_MPIAIJ, 1620 MatGetValues_MPIAIJ, 1621 MatCopy_MPIAIJ, 1622 /*45*/ MatPrintHelp_MPIAIJ, 1623 MatScale_MPIAIJ, 1624 0, 1625 0, 1626 0, 1627 /*50*/ MatGetBlockSize_MPIAIJ, 1628 0, 1629 0, 1630 0, 1631 0, 1632 /*55*/ MatFDColoringCreate_MPIAIJ, 1633 0, 1634 MatSetUnfactored_MPIAIJ, 1635 0, 1636 0, 1637 /*60*/ MatGetSubMatrix_MPIAIJ, 1638 MatDestroy_MPIAIJ, 1639 MatView_MPIAIJ, 1640 MatGetPetscMaps_Petsc, 1641 0, 1642 /*65*/ 0, 1643 0, 1644 0, 1645 0, 1646 0, 1647 /*70*/ 0, 1648 0, 1649 MatSetColoring_MPIAIJ, 1650 MatSetValuesAdic_MPIAIJ, 1651 MatSetValuesAdifor_MPIAIJ, 1652 /*75*/ 0, 1653 0, 1654 0, 1655 0, 1656 0, 1657 /*80*/ 0, 1658 0, 1659 0, 1660 0, 1661 /*85*/ MatLoad_MPIAIJ}; 1662 1663 /* ----------------------------------------------------------------------------------------*/ 1664 1665 EXTERN_C_BEGIN 1666 #undef __FUNCT__ 1667 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1668 int MatStoreValues_MPIAIJ(Mat mat) 1669 { 1670 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1671 int ierr; 1672 1673 PetscFunctionBegin; 1674 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1675 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1676 PetscFunctionReturn(0); 1677 } 1678 EXTERN_C_END 1679 1680 EXTERN_C_BEGIN 1681 #undef __FUNCT__ 1682 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1683 int MatRetrieveValues_MPIAIJ(Mat mat) 1684 { 1685 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1686 int ierr; 1687 1688 PetscFunctionBegin; 1689 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1690 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1691 PetscFunctionReturn(0); 1692 } 1693 EXTERN_C_END 1694 1695 #include "petscpc.h" 1696 EXTERN_C_BEGIN 1697 #undef __FUNCT__ 1698 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1699 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1700 { 1701 Mat_MPIAIJ *b; 1702 int ierr,i; 1703 1704 PetscFunctionBegin; 1705 B->preallocated = PETSC_TRUE; 1706 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1707 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1708 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1709 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1710 if (d_nnz) { 1711 for (i=0; i<B->m; i++) { 1712 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1713 } 1714 } 1715 if (o_nnz) { 1716 for (i=0; i<B->m; i++) { 1717 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1718 } 1719 } 1720 b = (Mat_MPIAIJ*)B->data; 1721 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1722 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1723 1724 PetscFunctionReturn(0); 1725 } 1726 EXTERN_C_END 1727 1728 /*MC 1729 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 1730 1731 Options Database Keys: 1732 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 1733 1734 Level: beginner 1735 1736 .seealso: MatCreateMPIAIJ 1737 M*/ 1738 1739 EXTERN_C_BEGIN 1740 #undef __FUNCT__ 1741 #define __FUNCT__ "MatCreate_MPIAIJ" 1742 int MatCreate_MPIAIJ(Mat B) 1743 { 1744 Mat_MPIAIJ *b; 1745 int ierr,i,size; 1746 1747 PetscFunctionBegin; 1748 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1749 1750 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1751 B->data = (void*)b; 1752 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1753 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1754 B->factor = 0; 1755 B->assembled = PETSC_FALSE; 1756 B->mapping = 0; 1757 1758 B->insertmode = NOT_SET_VALUES; 1759 b->size = size; 1760 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1761 1762 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1763 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1764 1765 /* the information in the maps duplicates the information computed below, eventually 1766 we should remove the duplicate information that is not contained in the maps */ 1767 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1768 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1769 1770 /* build local table of row and column ownerships */ 1771 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1772 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1773 b->cowners = b->rowners + b->size + 2; 1774 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1775 b->rowners[0] = 0; 1776 for (i=2; i<=b->size; i++) { 1777 b->rowners[i] += b->rowners[i-1]; 1778 } 1779 b->rstart = b->rowners[b->rank]; 1780 b->rend = b->rowners[b->rank+1]; 1781 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1782 b->cowners[0] = 0; 1783 for (i=2; i<=b->size; i++) { 1784 b->cowners[i] += b->cowners[i-1]; 1785 } 1786 b->cstart = b->cowners[b->rank]; 1787 b->cend = b->cowners[b->rank+1]; 1788 1789 /* build cache for off array entries formed */ 1790 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1791 b->donotstash = PETSC_FALSE; 1792 b->colmap = 0; 1793 b->garray = 0; 1794 b->roworiented = PETSC_TRUE; 1795 1796 /* stuff used for matrix vector multiply */ 1797 b->lvec = PETSC_NULL; 1798 b->Mvctx = PETSC_NULL; 1799 1800 /* stuff for MatGetRow() */ 1801 b->rowindices = 0; 1802 b->rowvalues = 0; 1803 b->getrowactive = PETSC_FALSE; 1804 1805 /* Explicitly create 2 MATSEQAIJ matrices. */ 1806 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 1807 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 1808 PetscLogObjectParent(B,b->A); 1809 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 1810 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 1811 PetscLogObjectParent(B,b->B); 1812 1813 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1814 "MatStoreValues_MPIAIJ", 1815 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1816 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1817 "MatRetrieveValues_MPIAIJ", 1818 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1819 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1820 "MatGetDiagonalBlock_MPIAIJ", 1821 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1822 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 1823 "MatIsTranspose_MPIAIJ", 1824 MatIsTranspose_MPIAIJ); CHKERRQ(ierr); 1825 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 1826 "MatMPIAIJSetPreallocation_MPIAIJ", 1827 MatMPIAIJSetPreallocation_MPIAIJ); CHKERRQ(ierr); 1828 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 1829 "MatDiagonalScaleLocal_MPIAIJ", 1830 MatDiagonalScaleLocal_MPIAIJ); CHKERRQ(ierr); 1831 PetscFunctionReturn(0); 1832 } 1833 EXTERN_C_END 1834 1835 /*MC 1836 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1837 1838 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1839 and MATMPIAIJ otherwise. 1840 1841 Options Database Keys: 1842 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 1843 1844 Level: beginner 1845 1846 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ 1847 M*/ 1848 1849 EXTERN_C_BEGIN 1850 #undef __FUNCT__ 1851 #define __FUNCT__ "MatCreate_AIJ" 1852 int MatCreate_AIJ(Mat A) { 1853 int ierr,size; 1854 1855 PetscFunctionBegin; 1856 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr); 1857 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1858 if (size == 1) { 1859 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1860 } else { 1861 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1862 } 1863 PetscFunctionReturn(0); 1864 } 1865 EXTERN_C_END 1866 1867 #undef __FUNCT__ 1868 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1869 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1870 { 1871 Mat mat; 1872 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1873 int ierr; 1874 1875 PetscFunctionBegin; 1876 *newmat = 0; 1877 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1878 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1879 a = (Mat_MPIAIJ*)mat->data; 1880 1881 mat->factor = matin->factor; 1882 mat->assembled = PETSC_TRUE; 1883 mat->insertmode = NOT_SET_VALUES; 1884 mat->preallocated = PETSC_TRUE; 1885 1886 a->rstart = oldmat->rstart; 1887 a->rend = oldmat->rend; 1888 a->cstart = oldmat->cstart; 1889 a->cend = oldmat->cend; 1890 a->size = oldmat->size; 1891 a->rank = oldmat->rank; 1892 a->donotstash = oldmat->donotstash; 1893 a->roworiented = oldmat->roworiented; 1894 a->rowindices = 0; 1895 a->rowvalues = 0; 1896 a->getrowactive = PETSC_FALSE; 1897 1898 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1899 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1900 if (oldmat->colmap) { 1901 #if defined (PETSC_USE_CTABLE) 1902 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1903 #else 1904 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1905 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1906 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1907 #endif 1908 } else a->colmap = 0; 1909 if (oldmat->garray) { 1910 int len; 1911 len = oldmat->B->n; 1912 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1913 PetscLogObjectMemory(mat,len*sizeof(int)); 1914 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1915 } else a->garray = 0; 1916 1917 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1918 PetscLogObjectParent(mat,a->lvec); 1919 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1920 PetscLogObjectParent(mat,a->Mvctx); 1921 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1922 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1923 PetscLogObjectParent(mat,a->A); 1924 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1925 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1926 PetscLogObjectParent(mat,a->B); 1927 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1928 *newmat = mat; 1929 PetscFunctionReturn(0); 1930 } 1931 1932 #include "petscsys.h" 1933 1934 #undef __FUNCT__ 1935 #define __FUNCT__ "MatLoad_MPIAIJ" 1936 int MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1937 { 1938 Mat A; 1939 PetscScalar *vals,*svals; 1940 MPI_Comm comm = ((PetscObject)viewer)->comm; 1941 MPI_Status status; 1942 int i,nz,ierr,j,rstart,rend,fd; 1943 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1944 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1945 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1946 1947 PetscFunctionBegin; 1948 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1949 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1950 if (!rank) { 1951 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1952 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1953 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1954 if (header[3] < 0) { 1955 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1956 } 1957 } 1958 1959 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1960 M = header[1]; N = header[2]; 1961 /* determine ownership of all rows */ 1962 m = M/size + ((M % size) > rank); 1963 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1964 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1965 rowners[0] = 0; 1966 for (i=2; i<=size; i++) { 1967 rowners[i] += rowners[i-1]; 1968 } 1969 rstart = rowners[rank]; 1970 rend = rowners[rank+1]; 1971 1972 /* distribute row lengths to all processors */ 1973 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1974 offlens = ourlens + (rend-rstart); 1975 if (!rank) { 1976 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1977 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1978 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1979 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1980 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1981 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1982 } else { 1983 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1984 } 1985 1986 if (!rank) { 1987 /* calculate the number of nonzeros on each processor */ 1988 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1989 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1990 for (i=0; i<size; i++) { 1991 for (j=rowners[i]; j< rowners[i+1]; j++) { 1992 procsnz[i] += rowlengths[j]; 1993 } 1994 } 1995 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1996 1997 /* determine max buffer needed and allocate it */ 1998 maxnz = 0; 1999 for (i=0; i<size; i++) { 2000 maxnz = PetscMax(maxnz,procsnz[i]); 2001 } 2002 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2003 2004 /* read in my part of the matrix column indices */ 2005 nz = procsnz[0]; 2006 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 2007 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2008 2009 /* read in every one elses and ship off */ 2010 for (i=1; i<size; i++) { 2011 nz = procsnz[i]; 2012 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2013 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2014 } 2015 ierr = PetscFree(cols);CHKERRQ(ierr); 2016 } else { 2017 /* determine buffer space needed for message */ 2018 nz = 0; 2019 for (i=0; i<m; i++) { 2020 nz += ourlens[i]; 2021 } 2022 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 2023 2024 /* receive message of column indices*/ 2025 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2026 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2027 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2028 } 2029 2030 /* determine column ownership if matrix is not square */ 2031 if (N != M) { 2032 n = N/size + ((N % size) > rank); 2033 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2034 cstart = cend - n; 2035 } else { 2036 cstart = rstart; 2037 cend = rend; 2038 n = cend - cstart; 2039 } 2040 2041 /* loop over local rows, determining number of off diagonal entries */ 2042 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2043 jj = 0; 2044 for (i=0; i<m; i++) { 2045 for (j=0; j<ourlens[i]; j++) { 2046 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2047 jj++; 2048 } 2049 } 2050 2051 /* create our matrix */ 2052 for (i=0; i<m; i++) { 2053 ourlens[i] -= offlens[i]; 2054 } 2055 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2056 ierr = MatSetType(A,type);CHKERRQ(ierr); 2057 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2058 2059 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2060 for (i=0; i<m; i++) { 2061 ourlens[i] += offlens[i]; 2062 } 2063 2064 if (!rank) { 2065 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2066 2067 /* read in my part of the matrix numerical values */ 2068 nz = procsnz[0]; 2069 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2070 2071 /* insert into matrix */ 2072 jj = rstart; 2073 smycols = mycols; 2074 svals = vals; 2075 for (i=0; i<m; i++) { 2076 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2077 smycols += ourlens[i]; 2078 svals += ourlens[i]; 2079 jj++; 2080 } 2081 2082 /* read in other processors and ship out */ 2083 for (i=1; i<size; i++) { 2084 nz = procsnz[i]; 2085 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2086 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2087 } 2088 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2089 } else { 2090 /* receive numeric values */ 2091 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2092 2093 /* receive message of values*/ 2094 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2095 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2096 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2097 2098 /* insert into matrix */ 2099 jj = rstart; 2100 smycols = mycols; 2101 svals = vals; 2102 for (i=0; i<m; i++) { 2103 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2104 smycols += ourlens[i]; 2105 svals += ourlens[i]; 2106 jj++; 2107 } 2108 } 2109 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2110 ierr = PetscFree(vals);CHKERRQ(ierr); 2111 ierr = PetscFree(mycols);CHKERRQ(ierr); 2112 ierr = PetscFree(rowners);CHKERRQ(ierr); 2113 2114 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2115 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2116 *newmat = A; 2117 PetscFunctionReturn(0); 2118 } 2119 2120 #undef __FUNCT__ 2121 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2122 /* 2123 Not great since it makes two copies of the submatrix, first an SeqAIJ 2124 in local and then by concatenating the local matrices the end result. 2125 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2126 */ 2127 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2128 { 2129 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2130 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2131 Mat *local,M,Mreuse; 2132 PetscScalar *vwork,*aa; 2133 MPI_Comm comm = mat->comm; 2134 Mat_SeqAIJ *aij; 2135 2136 2137 PetscFunctionBegin; 2138 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2139 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2140 2141 if (call == MAT_REUSE_MATRIX) { 2142 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2143 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2144 local = &Mreuse; 2145 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2146 } else { 2147 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2148 Mreuse = *local; 2149 ierr = PetscFree(local);CHKERRQ(ierr); 2150 } 2151 2152 /* 2153 m - number of local rows 2154 n - number of columns (same on all processors) 2155 rstart - first row in new global matrix generated 2156 */ 2157 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2158 if (call == MAT_INITIAL_MATRIX) { 2159 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2160 ii = aij->i; 2161 jj = aij->j; 2162 2163 /* 2164 Determine the number of non-zeros in the diagonal and off-diagonal 2165 portions of the matrix in order to do correct preallocation 2166 */ 2167 2168 /* first get start and end of "diagonal" columns */ 2169 if (csize == PETSC_DECIDE) { 2170 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2171 if (mglobal == n) { /* square matrix */ 2172 nlocal = m; 2173 } else { 2174 nlocal = n/size + ((n % size) > rank); 2175 } 2176 } else { 2177 nlocal = csize; 2178 } 2179 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2180 rstart = rend - nlocal; 2181 if (rank == size - 1 && rend != n) { 2182 SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n); 2183 } 2184 2185 /* next, compute all the lengths */ 2186 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2187 olens = dlens + m; 2188 for (i=0; i<m; i++) { 2189 jend = ii[i+1] - ii[i]; 2190 olen = 0; 2191 dlen = 0; 2192 for (j=0; j<jend; j++) { 2193 if (*jj < rstart || *jj >= rend) olen++; 2194 else dlen++; 2195 jj++; 2196 } 2197 olens[i] = olen; 2198 dlens[i] = dlen; 2199 } 2200 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2201 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2202 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2203 ierr = PetscFree(dlens);CHKERRQ(ierr); 2204 } else { 2205 int ml,nl; 2206 2207 M = *newmat; 2208 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2209 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2210 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2211 /* 2212 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2213 rather than the slower MatSetValues(). 2214 */ 2215 M->was_assembled = PETSC_TRUE; 2216 M->assembled = PETSC_FALSE; 2217 } 2218 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2219 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2220 ii = aij->i; 2221 jj = aij->j; 2222 aa = aij->a; 2223 for (i=0; i<m; i++) { 2224 row = rstart + i; 2225 nz = ii[i+1] - ii[i]; 2226 cwork = jj; jj += nz; 2227 vwork = aa; aa += nz; 2228 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2229 } 2230 2231 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2232 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2233 *newmat = M; 2234 2235 /* save submatrix used in processor for next request */ 2236 if (call == MAT_INITIAL_MATRIX) { 2237 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2238 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2239 } 2240 2241 PetscFunctionReturn(0); 2242 } 2243 2244 #undef __FUNCT__ 2245 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2246 /*@C 2247 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2248 (the default parallel PETSc format). For good matrix assembly performance 2249 the user should preallocate the matrix storage by setting the parameters 2250 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2251 performance can be increased by more than a factor of 50. 2252 2253 Collective on MPI_Comm 2254 2255 Input Parameters: 2256 + A - the matrix 2257 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2258 (same value is used for all local rows) 2259 . d_nnz - array containing the number of nonzeros in the various rows of the 2260 DIAGONAL portion of the local submatrix (possibly different for each row) 2261 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2262 The size of this array is equal to the number of local rows, i.e 'm'. 2263 You must leave room for the diagonal entry even if it is zero. 2264 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2265 submatrix (same value is used for all local rows). 2266 - o_nnz - array containing the number of nonzeros in the various rows of the 2267 OFF-DIAGONAL portion of the local submatrix (possibly different for 2268 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2269 structure. The size of this array is equal to the number 2270 of local rows, i.e 'm'. 2271 2272 The AIJ format (also called the Yale sparse matrix format or 2273 compressed row storage), is fully compatible with standard Fortran 77 2274 storage. That is, the stored row and column indices can begin at 2275 either one (as in Fortran) or zero. See the users manual for details. 2276 2277 The user MUST specify either the local or global matrix dimensions 2278 (possibly both). 2279 2280 The parallel matrix is partitioned such that the first m0 rows belong to 2281 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2282 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2283 2284 The DIAGONAL portion of the local submatrix of a processor can be defined 2285 as the submatrix which is obtained by extraction the part corresponding 2286 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2287 first row that belongs to the processor, and r2 is the last row belonging 2288 to the this processor. This is a square mxm matrix. The remaining portion 2289 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2290 2291 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2292 2293 By default, this format uses inodes (identical nodes) when possible. 2294 We search for consecutive rows with the same nonzero structure, thereby 2295 reusing matrix information to achieve increased efficiency. 2296 2297 Options Database Keys: 2298 + -mat_aij_no_inode - Do not use inodes 2299 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2300 - -mat_aij_oneindex - Internally use indexing starting at 1 2301 rather than 0. Note that when calling MatSetValues(), 2302 the user still MUST index entries starting at 0! 2303 2304 Example usage: 2305 2306 Consider the following 8x8 matrix with 34 non-zero values, that is 2307 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2308 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2309 as follows: 2310 2311 .vb 2312 1 2 0 | 0 3 0 | 0 4 2313 Proc0 0 5 6 | 7 0 0 | 8 0 2314 9 0 10 | 11 0 0 | 12 0 2315 ------------------------------------- 2316 13 0 14 | 15 16 17 | 0 0 2317 Proc1 0 18 0 | 19 20 21 | 0 0 2318 0 0 0 | 22 23 0 | 24 0 2319 ------------------------------------- 2320 Proc2 25 26 27 | 0 0 28 | 29 0 2321 30 0 0 | 31 32 33 | 0 34 2322 .ve 2323 2324 This can be represented as a collection of submatrices as: 2325 2326 .vb 2327 A B C 2328 D E F 2329 G H I 2330 .ve 2331 2332 Where the submatrices A,B,C are owned by proc0, D,E,F are 2333 owned by proc1, G,H,I are owned by proc2. 2334 2335 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2336 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2337 The 'M','N' parameters are 8,8, and have the same values on all procs. 2338 2339 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2340 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2341 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2342 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2343 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2344 matrix, ans [DF] as another SeqAIJ matrix. 2345 2346 When d_nz, o_nz parameters are specified, d_nz storage elements are 2347 allocated for every row of the local diagonal submatrix, and o_nz 2348 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2349 One way to choose d_nz and o_nz is to use the max nonzerors per local 2350 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2351 In this case, the values of d_nz,o_nz are: 2352 .vb 2353 proc0 : dnz = 2, o_nz = 2 2354 proc1 : dnz = 3, o_nz = 2 2355 proc2 : dnz = 1, o_nz = 4 2356 .ve 2357 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2358 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2359 for proc3. i.e we are using 12+15+10=37 storage locations to store 2360 34 values. 2361 2362 When d_nnz, o_nnz parameters are specified, the storage is specified 2363 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2364 In the above case the values for d_nnz,o_nnz are: 2365 .vb 2366 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2367 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2368 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2369 .ve 2370 Here the space allocated is sum of all the above values i.e 34, and 2371 hence pre-allocation is perfect. 2372 2373 Level: intermediate 2374 2375 .keywords: matrix, aij, compressed row, sparse, parallel 2376 2377 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2378 @*/ 2379 int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2380 { 2381 int ierr,(*f)(Mat,int,const int[],int,const int[]); 2382 2383 PetscFunctionBegin; 2384 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2385 if (f) { 2386 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2387 } 2388 PetscFunctionReturn(0); 2389 } 2390 2391 #undef __FUNCT__ 2392 #define __FUNCT__ "MatCreateMPIAIJ" 2393 /*@C 2394 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2395 (the default parallel PETSc format). For good matrix assembly performance 2396 the user should preallocate the matrix storage by setting the parameters 2397 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2398 performance can be increased by more than a factor of 50. 2399 2400 Collective on MPI_Comm 2401 2402 Input Parameters: 2403 + comm - MPI communicator 2404 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2405 This value should be the same as the local size used in creating the 2406 y vector for the matrix-vector product y = Ax. 2407 . n - This value should be the same as the local size used in creating the 2408 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2409 calculated if N is given) For square matrices n is almost always m. 2410 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2411 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2412 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2413 (same value is used for all local rows) 2414 . d_nnz - array containing the number of nonzeros in the various rows of the 2415 DIAGONAL portion of the local submatrix (possibly different for each row) 2416 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2417 The size of this array is equal to the number of local rows, i.e 'm'. 2418 You must leave room for the diagonal entry even if it is zero. 2419 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2420 submatrix (same value is used for all local rows). 2421 - o_nnz - array containing the number of nonzeros in the various rows of the 2422 OFF-DIAGONAL portion of the local submatrix (possibly different for 2423 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2424 structure. The size of this array is equal to the number 2425 of local rows, i.e 'm'. 2426 2427 Output Parameter: 2428 . A - the matrix 2429 2430 Notes: 2431 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2432 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2433 storage requirements for this matrix. 2434 2435 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2436 processor than it must be used on all processors that share the object for 2437 that argument. 2438 2439 The AIJ format (also called the Yale sparse matrix format or 2440 compressed row storage), is fully compatible with standard Fortran 77 2441 storage. That is, the stored row and column indices can begin at 2442 either one (as in Fortran) or zero. See the users manual for details. 2443 2444 The user MUST specify either the local or global matrix dimensions 2445 (possibly both). 2446 2447 The parallel matrix is partitioned such that the first m0 rows belong to 2448 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2449 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2450 2451 The DIAGONAL portion of the local submatrix of a processor can be defined 2452 as the submatrix which is obtained by extraction the part corresponding 2453 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2454 first row that belongs to the processor, and r2 is the last row belonging 2455 to the this processor. This is a square mxm matrix. The remaining portion 2456 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2457 2458 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2459 2460 When calling this routine with a single process communicator, a matrix of 2461 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2462 type of communicator, use the construction mechanism: 2463 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2464 2465 By default, this format uses inodes (identical nodes) when possible. 2466 We search for consecutive rows with the same nonzero structure, thereby 2467 reusing matrix information to achieve increased efficiency. 2468 2469 Options Database Keys: 2470 + -mat_aij_no_inode - Do not use inodes 2471 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2472 - -mat_aij_oneindex - Internally use indexing starting at 1 2473 rather than 0. Note that when calling MatSetValues(), 2474 the user still MUST index entries starting at 0! 2475 2476 2477 Example usage: 2478 2479 Consider the following 8x8 matrix with 34 non-zero values, that is 2480 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2481 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2482 as follows: 2483 2484 .vb 2485 1 2 0 | 0 3 0 | 0 4 2486 Proc0 0 5 6 | 7 0 0 | 8 0 2487 9 0 10 | 11 0 0 | 12 0 2488 ------------------------------------- 2489 13 0 14 | 15 16 17 | 0 0 2490 Proc1 0 18 0 | 19 20 21 | 0 0 2491 0 0 0 | 22 23 0 | 24 0 2492 ------------------------------------- 2493 Proc2 25 26 27 | 0 0 28 | 29 0 2494 30 0 0 | 31 32 33 | 0 34 2495 .ve 2496 2497 This can be represented as a collection of submatrices as: 2498 2499 .vb 2500 A B C 2501 D E F 2502 G H I 2503 .ve 2504 2505 Where the submatrices A,B,C are owned by proc0, D,E,F are 2506 owned by proc1, G,H,I are owned by proc2. 2507 2508 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2509 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2510 The 'M','N' parameters are 8,8, and have the same values on all procs. 2511 2512 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2513 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2514 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2515 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2516 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2517 matrix, ans [DF] as another SeqAIJ matrix. 2518 2519 When d_nz, o_nz parameters are specified, d_nz storage elements are 2520 allocated for every row of the local diagonal submatrix, and o_nz 2521 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2522 One way to choose d_nz and o_nz is to use the max nonzerors per local 2523 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2524 In this case, the values of d_nz,o_nz are: 2525 .vb 2526 proc0 : dnz = 2, o_nz = 2 2527 proc1 : dnz = 3, o_nz = 2 2528 proc2 : dnz = 1, o_nz = 4 2529 .ve 2530 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2531 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2532 for proc3. i.e we are using 12+15+10=37 storage locations to store 2533 34 values. 2534 2535 When d_nnz, o_nnz parameters are specified, the storage is specified 2536 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2537 In the above case the values for d_nnz,o_nnz are: 2538 .vb 2539 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2540 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2541 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2542 .ve 2543 Here the space allocated is sum of all the above values i.e 34, and 2544 hence pre-allocation is perfect. 2545 2546 Level: intermediate 2547 2548 .keywords: matrix, aij, compressed row, sparse, parallel 2549 2550 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2551 @*/ 2552 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2553 { 2554 int ierr,size; 2555 2556 PetscFunctionBegin; 2557 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2558 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2559 if (size > 1) { 2560 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2561 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2562 } else { 2563 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2564 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2565 } 2566 PetscFunctionReturn(0); 2567 } 2568 2569 #undef __FUNCT__ 2570 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2571 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2572 { 2573 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2574 PetscFunctionBegin; 2575 *Ad = a->A; 2576 *Ao = a->B; 2577 *colmap = a->garray; 2578 PetscFunctionReturn(0); 2579 } 2580 2581 #undef __FUNCT__ 2582 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2583 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2584 { 2585 int ierr,i; 2586 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2587 2588 PetscFunctionBegin; 2589 if (coloring->ctype == IS_COLORING_LOCAL) { 2590 ISColoringValue *allcolors,*colors; 2591 ISColoring ocoloring; 2592 2593 /* set coloring for diagonal portion */ 2594 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2595 2596 /* set coloring for off-diagonal portion */ 2597 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2598 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2599 for (i=0; i<a->B->n; i++) { 2600 colors[i] = allcolors[a->garray[i]]; 2601 } 2602 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2603 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2604 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2605 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2606 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2607 ISColoringValue *colors; 2608 int *larray; 2609 ISColoring ocoloring; 2610 2611 /* set coloring for diagonal portion */ 2612 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2613 for (i=0; i<a->A->n; i++) { 2614 larray[i] = i + a->cstart; 2615 } 2616 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2617 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2618 for (i=0; i<a->A->n; i++) { 2619 colors[i] = coloring->colors[larray[i]]; 2620 } 2621 ierr = PetscFree(larray);CHKERRQ(ierr); 2622 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2623 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2624 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2625 2626 /* set coloring for off-diagonal portion */ 2627 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2628 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2629 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2630 for (i=0; i<a->B->n; i++) { 2631 colors[i] = coloring->colors[larray[i]]; 2632 } 2633 ierr = PetscFree(larray);CHKERRQ(ierr); 2634 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2635 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2636 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2637 } else { 2638 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2639 } 2640 2641 PetscFunctionReturn(0); 2642 } 2643 2644 #undef __FUNCT__ 2645 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2646 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2647 { 2648 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2649 int ierr; 2650 2651 PetscFunctionBegin; 2652 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2653 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2654 PetscFunctionReturn(0); 2655 } 2656 2657 #undef __FUNCT__ 2658 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2659 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2660 { 2661 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2662 int ierr; 2663 2664 PetscFunctionBegin; 2665 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2666 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2667 PetscFunctionReturn(0); 2668 } 2669 2670 #undef __FUNCT__ 2671 #define __FUNCT__ "MatMerge" 2672 /*@C 2673 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2674 matrices from each processor 2675 2676 Collective on MPI_Comm 2677 2678 Input Parameters: 2679 + comm - the communicators the parallel matrix will live on 2680 - inmat - the input sequential matrices 2681 2682 Output Parameter: 2683 . outmat - the parallel matrix generated 2684 2685 Level: advanced 2686 2687 Notes: The number of columns of the matrix in EACH of the seperate files 2688 MUST be the same. 2689 2690 @*/ 2691 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat) 2692 { 2693 int ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz; 2694 PetscScalar *values; 2695 PetscMap columnmap,rowmap; 2696 2697 PetscFunctionBegin; 2698 2699 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2700 2701 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2702 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2703 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2704 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2705 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2706 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2707 2708 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2709 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2710 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2711 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2712 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2713 2714 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2715 for (i=0;i<m;i++) { 2716 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2717 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2718 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2719 } 2720 /* This routine will ONLY return MPIAIJ type matrix */ 2721 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2722 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2723 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2724 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2725 2726 for (i=0;i<m;i++) { 2727 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2728 I = i + rstart; 2729 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2730 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2731 } 2732 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2733 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2734 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2735 2736 PetscFunctionReturn(0); 2737 } 2738 2739 #undef __FUNCT__ 2740 #define __FUNCT__ "MatFileSplit" 2741 int MatFileSplit(Mat A,char *outfile) 2742 { 2743 int ierr,rank,len,m,N,i,rstart,*indx,nnz; 2744 PetscViewer out; 2745 char *name; 2746 Mat B; 2747 PetscScalar *values; 2748 2749 PetscFunctionBegin; 2750 2751 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2752 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2753 /* Should this be the type of the diagonal block of A? */ 2754 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2755 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2756 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2757 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2758 for (i=0;i<m;i++) { 2759 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2760 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2761 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2762 } 2763 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2764 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2765 2766 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2767 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2768 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2769 sprintf(name,"%s.%d",outfile,rank); 2770 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2771 ierr = PetscFree(name); 2772 ierr = MatView(B,out);CHKERRQ(ierr); 2773 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2774 ierr = MatDestroy(B);CHKERRQ(ierr); 2775 PetscFunctionReturn(0); 2776 } 2777