1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 int CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 int n = aij->B->n,i,ierr; 19 20 PetscFunctionBegin; 21 #if defined (PETSC_USE_CTABLE) 22 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 23 for (i=0; i<n; i++){ 24 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 25 } 26 #else 27 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 28 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 29 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 30 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 31 #endif 32 PetscFunctionReturn(0); 33 } 34 35 #define CHUNKSIZE 15 36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 40 rmax = aimax[row]; nrow = ailen[row]; \ 41 col1 = col - shift; \ 42 \ 43 low = 0; high = nrow; \ 44 while (high-low > 5) { \ 45 t = (low+high)/2; \ 46 if (rp[t] > col) high = t; \ 47 else low = t; \ 48 } \ 49 for (_i=low; _i<high; _i++) { \ 50 if (rp[_i] > col1) break; \ 51 if (rp[_i] == col1) { \ 52 if (addv == ADD_VALUES) ap[_i] += value; \ 53 else ap[_i] = value; \ 54 goto a_noinsert; \ 55 } \ 56 } \ 57 if (nonew == 1) goto a_noinsert; \ 58 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 59 if (nrow >= rmax) { \ 60 /* there is no extra room in row, therefore enlarge */ \ 61 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 62 PetscScalar *new_a; \ 63 \ 64 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 65 \ 66 /* malloc new storage space */ \ 67 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 68 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 69 new_j = (int*)(new_a + new_nz); \ 70 new_i = new_j + new_nz; \ 71 \ 72 /* copy over old data into new slots */ \ 73 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 74 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 75 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 76 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 77 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 78 len*sizeof(int));CHKERRQ(ierr); \ 79 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 81 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 82 /* free up old matrix storage */ \ 83 \ 84 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 85 if (!a->singlemalloc) { \ 86 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 87 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 88 } \ 89 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 90 a->singlemalloc = PETSC_TRUE; \ 91 \ 92 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 93 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 94 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 95 a->maxnz += CHUNKSIZE; \ 96 a->reallocs++; \ 97 } \ 98 N = nrow++ - 1; a->nz++; \ 99 /* shift up all the later entries in this row */ \ 100 for (ii=N; ii>=_i; ii--) { \ 101 rp[ii+1] = rp[ii]; \ 102 ap[ii+1] = ap[ii]; \ 103 } \ 104 rp[_i] = col1; \ 105 ap[_i] = value; \ 106 a_noinsert: ; \ 107 ailen[row] = nrow; \ 108 } 109 110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 111 { \ 112 \ 113 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 114 rmax = bimax[row]; nrow = bilen[row]; \ 115 col1 = col - shift; \ 116 \ 117 low = 0; high = nrow; \ 118 while (high-low > 5) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > col) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > col1) break; \ 125 if (rp[_i] == col1) { \ 126 if (addv == ADD_VALUES) ap[_i] += value; \ 127 else ap[_i] = value; \ 128 goto b_noinsert; \ 129 } \ 130 } \ 131 if (nonew == 1) goto b_noinsert; \ 132 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 133 if (nrow >= rmax) { \ 134 /* there is no extra room in row, therefore enlarge */ \ 135 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 136 PetscScalar *new_a; \ 137 \ 138 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 139 \ 140 /* malloc new storage space */ \ 141 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 142 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 143 new_j = (int*)(new_a + new_nz); \ 144 new_i = new_j + new_nz; \ 145 \ 146 /* copy over old data into new slots */ \ 147 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 148 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 149 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 150 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 151 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 152 len*sizeof(int));CHKERRQ(ierr); \ 153 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 154 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 155 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 156 /* free up old matrix storage */ \ 157 \ 158 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 159 if (!b->singlemalloc) { \ 160 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 161 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 162 } \ 163 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 164 b->singlemalloc = PETSC_TRUE; \ 165 \ 166 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 167 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 168 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 169 b->maxnz += CHUNKSIZE; \ 170 b->reallocs++; \ 171 } \ 172 N = nrow++ - 1; b->nz++; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ap[ii+1] = ap[ii]; \ 177 } \ 178 rp[_i] = col1; \ 179 ap[_i] = value; \ 180 b_noinsert: ; \ 181 bilen[row] = nrow; \ 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatSetValues_MPIAIJ" 186 int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 187 { 188 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 189 PetscScalar value; 190 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 191 int cstart = aij->cstart,cend = aij->cend,row,col; 192 PetscTruth roworiented = aij->roworiented; 193 194 /* Some Variables required in the macro */ 195 Mat A = aij->A; 196 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 197 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 198 PetscScalar *aa = a->a; 199 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 200 Mat B = aij->B; 201 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 202 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 203 PetscScalar *ba = b->a; 204 205 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 206 int nonew = a->nonew,shift=0; 207 PetscScalar *ap; 208 209 PetscFunctionBegin; 210 for (i=0; i<m; i++) { 211 if (im[i] < 0) continue; 212 #if defined(PETSC_USE_BOPT_g) 213 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 214 #endif 215 if (im[i] >= rstart && im[i] < rend) { 216 row = im[i] - rstart; 217 for (j=0; j<n; j++) { 218 if (in[j] >= cstart && in[j] < cend){ 219 col = in[j] - cstart; 220 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 221 if (ignorezeroentries && value == 0.0) continue; 222 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 223 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 224 } else if (in[j] < 0) continue; 225 #if defined(PETSC_USE_BOPT_g) 226 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 227 #endif 228 else { 229 if (mat->was_assembled) { 230 if (!aij->colmap) { 231 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 232 } 233 #if defined (PETSC_USE_CTABLE) 234 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 235 col--; 236 #else 237 col = aij->colmap[in[j]] - 1; 238 #endif 239 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 240 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 241 col = in[j]; 242 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 243 B = aij->B; 244 b = (Mat_SeqAIJ*)B->data; 245 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 246 ba = b->a; 247 } 248 } else col = in[j]; 249 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 250 if (ignorezeroentries && value == 0.0) continue; 251 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 252 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 253 } 254 } 255 } else { 256 if (!aij->donotstash) { 257 if (roworiented) { 258 if (ignorezeroentries && v[i*n] == 0.0) continue; 259 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 260 } else { 261 if (ignorezeroentries && v[i] == 0.0) continue; 262 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 263 } 264 } 265 } 266 } 267 PetscFunctionReturn(0); 268 } 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatGetValues_MPIAIJ" 272 int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 273 { 274 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 275 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 276 int cstart = aij->cstart,cend = aij->cend,row,col; 277 278 PetscFunctionBegin; 279 for (i=0; i<m; i++) { 280 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 281 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 282 if (idxm[i] >= rstart && idxm[i] < rend) { 283 row = idxm[i] - rstart; 284 for (j=0; j<n; j++) { 285 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 286 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 287 if (idxn[j] >= cstart && idxn[j] < cend){ 288 col = idxn[j] - cstart; 289 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 290 } else { 291 if (!aij->colmap) { 292 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 293 } 294 #if defined (PETSC_USE_CTABLE) 295 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 296 col --; 297 #else 298 col = aij->colmap[idxn[j]] - 1; 299 #endif 300 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 301 else { 302 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 303 } 304 } 305 } 306 } else { 307 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 308 } 309 } 310 PetscFunctionReturn(0); 311 } 312 313 #undef __FUNCT__ 314 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 315 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 316 { 317 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 318 int ierr,nstash,reallocs; 319 InsertMode addv; 320 321 PetscFunctionBegin; 322 if (aij->donotstash) { 323 PetscFunctionReturn(0); 324 } 325 326 /* make sure all processors are either in INSERTMODE or ADDMODE */ 327 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 328 if (addv == (ADD_VALUES|INSERT_VALUES)) { 329 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 330 } 331 mat->insertmode = addv; /* in case this processor had no cache */ 332 333 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 334 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 335 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 336 PetscFunctionReturn(0); 337 } 338 339 340 #undef __FUNCT__ 341 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 342 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 343 { 344 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 345 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 346 int i,j,rstart,ncols,n,ierr,flg; 347 int *row,*col,other_disassembled; 348 PetscScalar *val; 349 InsertMode addv = mat->insertmode; 350 351 PetscFunctionBegin; 352 if (!aij->donotstash) { 353 while (1) { 354 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 355 if (!flg) break; 356 357 for (i=0; i<n;) { 358 /* Now identify the consecutive vals belonging to the same row */ 359 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 360 if (j < n) ncols = j-i; 361 else ncols = n-i; 362 /* Now assemble all these values with a single function call */ 363 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 364 i = j; 365 } 366 } 367 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 368 } 369 370 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 371 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 372 373 /* determine if any processor has disassembled, if so we must 374 also disassemble ourselfs, in order that we may reassemble. */ 375 /* 376 if nonzero structure of submatrix B cannot change then we know that 377 no processor disassembled thus we can skip this stuff 378 */ 379 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 380 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 381 if (mat->was_assembled && !other_disassembled) { 382 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 383 } 384 } 385 386 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 387 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 388 } 389 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 390 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 391 392 if (aij->rowvalues) { 393 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 394 aij->rowvalues = 0; 395 } 396 397 /* used by MatAXPY() */ 398 a->xtoy = 0; b->xtoy = 0; 399 a->XtoY = 0; b->XtoY = 0; 400 401 PetscFunctionReturn(0); 402 } 403 404 #undef __FUNCT__ 405 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 406 int MatZeroEntries_MPIAIJ(Mat A) 407 { 408 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 409 int ierr; 410 411 PetscFunctionBegin; 412 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 413 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNCT__ 418 #define __FUNCT__ "MatZeroRows_MPIAIJ" 419 int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 420 { 421 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 422 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 423 int *nprocs,j,idx,nsends,row; 424 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 425 int *rvalues,tag = A->tag,count,base,slen,n,*source; 426 int *lens,imdex,*lrows,*values,rstart=l->rstart; 427 MPI_Comm comm = A->comm; 428 MPI_Request *send_waits,*recv_waits; 429 MPI_Status recv_status,*send_status; 430 IS istmp; 431 PetscTruth found; 432 433 PetscFunctionBegin; 434 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 435 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 436 437 /* first count number of contributors to each processor */ 438 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 439 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 440 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 441 for (i=0; i<N; i++) { 442 idx = rows[i]; 443 found = PETSC_FALSE; 444 for (j=0; j<size; j++) { 445 if (idx >= owners[j] && idx < owners[j+1]) { 446 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 447 } 448 } 449 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 450 } 451 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 452 453 /* inform other processors of number of messages and max length*/ 454 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 455 456 /* post receives: */ 457 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 458 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 459 for (i=0; i<nrecvs; i++) { 460 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 461 } 462 463 /* do sends: 464 1) starts[i] gives the starting index in svalues for stuff going to 465 the ith processor 466 */ 467 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 468 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 469 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 470 starts[0] = 0; 471 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 472 for (i=0; i<N; i++) { 473 svalues[starts[owner[i]]++] = rows[i]; 474 } 475 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 476 477 starts[0] = 0; 478 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 479 count = 0; 480 for (i=0; i<size; i++) { 481 if (nprocs[2*i+1]) { 482 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 483 } 484 } 485 ierr = PetscFree(starts);CHKERRQ(ierr); 486 487 base = owners[rank]; 488 489 /* wait on receives */ 490 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 491 source = lens + nrecvs; 492 count = nrecvs; slen = 0; 493 while (count) { 494 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 495 /* unpack receives into our local space */ 496 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 497 source[imdex] = recv_status.MPI_SOURCE; 498 lens[imdex] = n; 499 slen += n; 500 count--; 501 } 502 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 503 504 /* move the data into the send scatter */ 505 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 506 count = 0; 507 for (i=0; i<nrecvs; i++) { 508 values = rvalues + i*nmax; 509 for (j=0; j<lens[i]; j++) { 510 lrows[count++] = values[j] - base; 511 } 512 } 513 ierr = PetscFree(rvalues);CHKERRQ(ierr); 514 ierr = PetscFree(lens);CHKERRQ(ierr); 515 ierr = PetscFree(owner);CHKERRQ(ierr); 516 ierr = PetscFree(nprocs);CHKERRQ(ierr); 517 518 /* actually zap the local rows */ 519 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 520 PetscLogObjectParent(A,istmp); 521 522 /* 523 Zero the required rows. If the "diagonal block" of the matrix 524 is square and the user wishes to set the diagonal we use seperate 525 code so that MatSetValues() is not called for each diagonal allocating 526 new memory, thus calling lots of mallocs and slowing things down. 527 528 Contributed by: Mathew Knepley 529 */ 530 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 531 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 532 if (diag && (l->A->M == l->A->N)) { 533 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 534 } else if (diag) { 535 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 536 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 537 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 538 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 539 } 540 for (i = 0; i < slen; i++) { 541 row = lrows[i] + rstart; 542 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 543 } 544 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 545 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 546 } else { 547 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 548 } 549 ierr = ISDestroy(istmp);CHKERRQ(ierr); 550 ierr = PetscFree(lrows);CHKERRQ(ierr); 551 552 /* wait on sends */ 553 if (nsends) { 554 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 555 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 556 ierr = PetscFree(send_status);CHKERRQ(ierr); 557 } 558 ierr = PetscFree(send_waits);CHKERRQ(ierr); 559 ierr = PetscFree(svalues);CHKERRQ(ierr); 560 561 PetscFunctionReturn(0); 562 } 563 564 #undef __FUNCT__ 565 #define __FUNCT__ "MatMult_MPIAIJ" 566 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 567 { 568 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 569 int ierr,nt; 570 571 PetscFunctionBegin; 572 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 573 if (nt != A->n) { 574 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 575 } 576 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 577 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 578 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 579 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 580 PetscFunctionReturn(0); 581 } 582 583 #undef __FUNCT__ 584 #define __FUNCT__ "MatMultAdd_MPIAIJ" 585 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 586 { 587 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 588 int ierr; 589 590 PetscFunctionBegin; 591 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 592 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 593 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 594 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 595 PetscFunctionReturn(0); 596 } 597 598 #undef __FUNCT__ 599 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 600 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 601 { 602 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 603 int ierr; 604 605 PetscFunctionBegin; 606 /* do nondiagonal part */ 607 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 608 /* send it on its way */ 609 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 610 /* do local part */ 611 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 612 /* receive remote parts: note this assumes the values are not actually */ 613 /* inserted in yy until the next line, which is true for my implementation*/ 614 /* but is not perhaps always true. */ 615 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 616 PetscFunctionReturn(0); 617 } 618 619 EXTERN_C_BEGIN 620 #undef __FUNCT__ 621 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 622 int MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f) 623 { 624 MPI_Comm comm; 625 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 626 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 627 IS Me,Notme; 628 int M,N,first,last,*notme,ntids,i, ierr; 629 630 PetscFunctionBegin; 631 632 /* Easy test: symmetric diagonal block */ 633 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 634 ierr = MatIsTranspose(Adia,Bdia,f); CHKERRQ(ierr); 635 if (!*f) PetscFunctionReturn(0); 636 ierr = PetscObjectGetComm((PetscObject)Amat,&comm); CHKERRQ(ierr); 637 ierr = MPI_Comm_size(comm,&ntids); CHKERRQ(ierr); 638 if (ntids==1) PetscFunctionReturn(0); 639 640 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 641 ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr); 642 ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr); 643 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me); CHKERRQ(ierr); 644 for (i=0; i<first; i++) notme[i] = i; 645 for (i=last; i<M; i++) notme[i-last+first] = i; 646 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr); 647 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr); 648 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs); CHKERRQ(ierr); 649 Aoff = Aoffs[0]; 650 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs); CHKERRQ(ierr); 651 Boff = Boffs[0]; 652 ierr = MatIsTranspose(Aoff,Boff,f); CHKERRQ(ierr); 653 ierr = MatDestroyMatrices(1,&Aoffs); CHKERRQ(ierr); 654 ierr = MatDestroyMatrices(1,&Boffs); CHKERRQ(ierr); 655 ierr = ISDestroy(Me); CHKERRQ(ierr); 656 ierr = ISDestroy(Notme); CHKERRQ(ierr); 657 658 PetscFunctionReturn(0); 659 } 660 EXTERN_C_END 661 662 #undef __FUNCT__ 663 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 664 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 665 { 666 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 667 int ierr; 668 669 PetscFunctionBegin; 670 /* do nondiagonal part */ 671 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 672 /* send it on its way */ 673 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 674 /* do local part */ 675 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 676 /* receive remote parts: note this assumes the values are not actually */ 677 /* inserted in yy until the next line, which is true for my implementation*/ 678 /* but is not perhaps always true. */ 679 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 680 PetscFunctionReturn(0); 681 } 682 683 /* 684 This only works correctly for square matrices where the subblock A->A is the 685 diagonal block 686 */ 687 #undef __FUNCT__ 688 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 689 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 690 { 691 int ierr; 692 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 693 694 PetscFunctionBegin; 695 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 696 if (a->rstart != a->cstart || a->rend != a->cend) { 697 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 698 } 699 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 700 PetscFunctionReturn(0); 701 } 702 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatScale_MPIAIJ" 705 int MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 706 { 707 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 708 int ierr; 709 710 PetscFunctionBegin; 711 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 712 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 713 PetscFunctionReturn(0); 714 } 715 716 #undef __FUNCT__ 717 #define __FUNCT__ "MatDestroy_MPIAIJ" 718 int MatDestroy_MPIAIJ(Mat mat) 719 { 720 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 721 int ierr; 722 723 PetscFunctionBegin; 724 #if defined(PETSC_USE_LOG) 725 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 726 #endif 727 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 728 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 729 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 730 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 731 #if defined (PETSC_USE_CTABLE) 732 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 733 #else 734 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 735 #endif 736 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 737 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 738 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 739 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 740 ierr = PetscFree(aij);CHKERRQ(ierr); 741 PetscFunctionReturn(0); 742 } 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "MatView_MPIAIJ_Binary" 746 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 747 { 748 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 749 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 750 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 751 int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 752 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 753 PetscScalar *column_values; 754 755 PetscFunctionBegin; 756 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 757 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 758 nz = A->nz + B->nz; 759 if (rank == 0) { 760 header[0] = MAT_FILE_COOKIE; 761 header[1] = mat->M; 762 header[2] = mat->N; 763 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 764 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 765 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 766 /* get largest number of rows any processor has */ 767 rlen = mat->m; 768 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 769 for (i=1; i<size; i++) { 770 rlen = PetscMax(rlen,range[i+1] - range[i]); 771 } 772 } else { 773 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 774 rlen = mat->m; 775 } 776 777 /* load up the local row counts */ 778 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 779 for (i=0; i<mat->m; i++) { 780 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 781 } 782 783 /* store the row lengths to the file */ 784 if (rank == 0) { 785 MPI_Status status; 786 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 787 for (i=1; i<size; i++) { 788 rlen = range[i+1] - range[i]; 789 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 790 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 791 } 792 } else { 793 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 794 } 795 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 796 797 /* load up the local column indices */ 798 nzmax = nz; /* )th processor needs space a largest processor needs */ 799 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 800 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 801 cnt = 0; 802 for (i=0; i<mat->m; i++) { 803 for (j=B->i[i]; j<B->i[i+1]; j++) { 804 if ( (col = garray[B->j[j]]) > cstart) break; 805 column_indices[cnt++] = col; 806 } 807 for (k=A->i[i]; k<A->i[i+1]; k++) { 808 column_indices[cnt++] = A->j[k] + cstart; 809 } 810 for (; j<B->i[i+1]; j++) { 811 column_indices[cnt++] = garray[B->j[j]]; 812 } 813 } 814 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 815 816 /* store the column indices to the file */ 817 if (rank == 0) { 818 MPI_Status status; 819 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 820 for (i=1; i<size; i++) { 821 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 822 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 823 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 824 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 825 } 826 } else { 827 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 828 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 829 } 830 ierr = PetscFree(column_indices);CHKERRQ(ierr); 831 832 /* load up the local column values */ 833 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 834 cnt = 0; 835 for (i=0; i<mat->m; i++) { 836 for (j=B->i[i]; j<B->i[i+1]; j++) { 837 if ( garray[B->j[j]] > cstart) break; 838 column_values[cnt++] = B->a[j]; 839 } 840 for (k=A->i[i]; k<A->i[i+1]; k++) { 841 column_values[cnt++] = A->a[k]; 842 } 843 for (; j<B->i[i+1]; j++) { 844 column_values[cnt++] = B->a[j]; 845 } 846 } 847 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 848 849 /* store the column values to the file */ 850 if (rank == 0) { 851 MPI_Status status; 852 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 853 for (i=1; i<size; i++) { 854 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 855 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 856 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 857 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 858 } 859 } else { 860 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 861 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 862 } 863 ierr = PetscFree(column_values);CHKERRQ(ierr); 864 PetscFunctionReturn(0); 865 } 866 867 #undef __FUNCT__ 868 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 869 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 870 { 871 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 872 int ierr,rank = aij->rank,size = aij->size; 873 PetscTruth isdraw,isascii,flg,isbinary; 874 PetscViewer sviewer; 875 PetscViewerFormat format; 876 877 PetscFunctionBegin; 878 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 880 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 881 if (isascii) { 882 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 883 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 884 MatInfo info; 885 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 886 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 887 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 888 if (flg) { 889 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 890 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 891 } else { 892 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 893 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 894 } 895 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 896 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 897 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 898 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 899 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 900 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } else if (format == PETSC_VIEWER_ASCII_INFO) { 903 PetscFunctionReturn(0); 904 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 905 PetscFunctionReturn(0); 906 } 907 } else if (isbinary) { 908 if (size == 1) { 909 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 910 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 911 } else { 912 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 913 } 914 PetscFunctionReturn(0); 915 } else if (isdraw) { 916 PetscDraw draw; 917 PetscTruth isnull; 918 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 919 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 920 } 921 922 if (size == 1) { 923 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 924 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 925 } else { 926 /* assemble the entire matrix onto first processor. */ 927 Mat A; 928 Mat_SeqAIJ *Aloc; 929 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 930 PetscScalar *a; 931 932 if (!rank) { 933 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 934 } else { 935 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 936 } 937 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 938 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 939 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 940 PetscLogObjectParent(mat,A); 941 942 /* copy over the A part */ 943 Aloc = (Mat_SeqAIJ*)aij->A->data; 944 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 945 row = aij->rstart; 946 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 947 for (i=0; i<m; i++) { 948 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 949 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 950 } 951 aj = Aloc->j; 952 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 953 954 /* copy over the B part */ 955 Aloc = (Mat_SeqAIJ*)aij->B->data; 956 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 957 row = aij->rstart; 958 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 959 ct = cols; 960 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 961 for (i=0; i<m; i++) { 962 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 963 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 964 } 965 ierr = PetscFree(ct);CHKERRQ(ierr); 966 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 967 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 968 /* 969 Everyone has to call to draw the matrix since the graphics waits are 970 synchronized across all processors that share the PetscDraw object 971 */ 972 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 973 if (!rank) { 974 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 975 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 976 } 977 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 978 ierr = MatDestroy(A);CHKERRQ(ierr); 979 } 980 PetscFunctionReturn(0); 981 } 982 983 #undef __FUNCT__ 984 #define __FUNCT__ "MatView_MPIAIJ" 985 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 986 { 987 int ierr; 988 PetscTruth isascii,isdraw,issocket,isbinary; 989 990 PetscFunctionBegin; 991 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 992 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 993 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 994 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 995 if (isascii || isdraw || isbinary || issocket) { 996 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 997 } else { 998 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 999 } 1000 PetscFunctionReturn(0); 1001 } 1002 1003 1004 1005 #undef __FUNCT__ 1006 #define __FUNCT__ "MatRelax_MPIAIJ" 1007 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1008 { 1009 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1010 int ierr; 1011 Vec bb1; 1012 PetscScalar mone=-1.0; 1013 1014 PetscFunctionBegin; 1015 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1016 1017 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1018 1019 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1020 if (flag & SOR_ZERO_INITIAL_GUESS) { 1021 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1022 its--; 1023 } 1024 1025 while (its--) { 1026 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1027 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1028 1029 /* update rhs: bb1 = bb - B*x */ 1030 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1031 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1032 1033 /* local sweep */ 1034 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1035 CHKERRQ(ierr); 1036 } 1037 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1038 if (flag & SOR_ZERO_INITIAL_GUESS) { 1039 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1040 its--; 1041 } 1042 while (its--) { 1043 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1044 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1045 1046 /* update rhs: bb1 = bb - B*x */ 1047 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1048 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1049 1050 /* local sweep */ 1051 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1052 CHKERRQ(ierr); 1053 } 1054 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1055 if (flag & SOR_ZERO_INITIAL_GUESS) { 1056 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1057 its--; 1058 } 1059 while (its--) { 1060 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1061 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1062 1063 /* update rhs: bb1 = bb - B*x */ 1064 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1065 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1066 1067 /* local sweep */ 1068 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1069 CHKERRQ(ierr); 1070 } 1071 } else { 1072 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1073 } 1074 1075 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1076 PetscFunctionReturn(0); 1077 } 1078 1079 #undef __FUNCT__ 1080 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1081 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1082 { 1083 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1084 Mat A = mat->A,B = mat->B; 1085 int ierr; 1086 PetscReal isend[5],irecv[5]; 1087 1088 PetscFunctionBegin; 1089 info->block_size = 1.0; 1090 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1091 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1092 isend[3] = info->memory; isend[4] = info->mallocs; 1093 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1094 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1095 isend[3] += info->memory; isend[4] += info->mallocs; 1096 if (flag == MAT_LOCAL) { 1097 info->nz_used = isend[0]; 1098 info->nz_allocated = isend[1]; 1099 info->nz_unneeded = isend[2]; 1100 info->memory = isend[3]; 1101 info->mallocs = isend[4]; 1102 } else if (flag == MAT_GLOBAL_MAX) { 1103 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1104 info->nz_used = irecv[0]; 1105 info->nz_allocated = irecv[1]; 1106 info->nz_unneeded = irecv[2]; 1107 info->memory = irecv[3]; 1108 info->mallocs = irecv[4]; 1109 } else if (flag == MAT_GLOBAL_SUM) { 1110 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1111 info->nz_used = irecv[0]; 1112 info->nz_allocated = irecv[1]; 1113 info->nz_unneeded = irecv[2]; 1114 info->memory = irecv[3]; 1115 info->mallocs = irecv[4]; 1116 } 1117 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1118 info->fill_ratio_needed = 0; 1119 info->factor_mallocs = 0; 1120 info->rows_global = (double)matin->M; 1121 info->columns_global = (double)matin->N; 1122 info->rows_local = (double)matin->m; 1123 info->columns_local = (double)matin->N; 1124 1125 PetscFunctionReturn(0); 1126 } 1127 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "MatSetOption_MPIAIJ" 1130 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1131 { 1132 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1133 int ierr; 1134 1135 PetscFunctionBegin; 1136 switch (op) { 1137 case MAT_NO_NEW_NONZERO_LOCATIONS: 1138 case MAT_YES_NEW_NONZERO_LOCATIONS: 1139 case MAT_COLUMNS_UNSORTED: 1140 case MAT_COLUMNS_SORTED: 1141 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1142 case MAT_KEEP_ZEROED_ROWS: 1143 case MAT_NEW_NONZERO_LOCATION_ERR: 1144 case MAT_USE_INODES: 1145 case MAT_DO_NOT_USE_INODES: 1146 case MAT_IGNORE_ZERO_ENTRIES: 1147 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1148 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1149 break; 1150 case MAT_ROW_ORIENTED: 1151 a->roworiented = PETSC_TRUE; 1152 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1153 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1154 break; 1155 case MAT_ROWS_SORTED: 1156 case MAT_ROWS_UNSORTED: 1157 case MAT_YES_NEW_DIAGONALS: 1158 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1159 break; 1160 case MAT_COLUMN_ORIENTED: 1161 a->roworiented = PETSC_FALSE; 1162 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1163 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1164 break; 1165 case MAT_IGNORE_OFF_PROC_ENTRIES: 1166 a->donotstash = PETSC_TRUE; 1167 break; 1168 case MAT_NO_NEW_DIAGONALS: 1169 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1170 case MAT_SYMMETRIC: 1171 case MAT_STRUCTURALLY_SYMMETRIC: 1172 case MAT_NOT_SYMMETRIC: 1173 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1174 case MAT_HERMITIAN: 1175 case MAT_NOT_HERMITIAN: 1176 case MAT_SYMMETRY_ETERNAL: 1177 case MAT_NOT_SYMMETRY_ETERNAL: 1178 break; 1179 default: 1180 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1181 } 1182 PetscFunctionReturn(0); 1183 } 1184 1185 #undef __FUNCT__ 1186 #define __FUNCT__ "MatGetRow_MPIAIJ" 1187 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1188 { 1189 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1190 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1191 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1192 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1193 int *cmap,*idx_p; 1194 1195 PetscFunctionBegin; 1196 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1197 mat->getrowactive = PETSC_TRUE; 1198 1199 if (!mat->rowvalues && (idx || v)) { 1200 /* 1201 allocate enough space to hold information from the longest row. 1202 */ 1203 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1204 int max = 1,tmp; 1205 for (i=0; i<matin->m; i++) { 1206 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1207 if (max < tmp) { max = tmp; } 1208 } 1209 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1210 mat->rowindices = (int*)(mat->rowvalues + max); 1211 } 1212 1213 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1214 lrow = row - rstart; 1215 1216 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1217 if (!v) {pvA = 0; pvB = 0;} 1218 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1219 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1220 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1221 nztot = nzA + nzB; 1222 1223 cmap = mat->garray; 1224 if (v || idx) { 1225 if (nztot) { 1226 /* Sort by increasing column numbers, assuming A and B already sorted */ 1227 int imark = -1; 1228 if (v) { 1229 *v = v_p = mat->rowvalues; 1230 for (i=0; i<nzB; i++) { 1231 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1232 else break; 1233 } 1234 imark = i; 1235 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1236 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1237 } 1238 if (idx) { 1239 *idx = idx_p = mat->rowindices; 1240 if (imark > -1) { 1241 for (i=0; i<imark; i++) { 1242 idx_p[i] = cmap[cworkB[i]]; 1243 } 1244 } else { 1245 for (i=0; i<nzB; i++) { 1246 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1247 else break; 1248 } 1249 imark = i; 1250 } 1251 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1252 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1253 } 1254 } else { 1255 if (idx) *idx = 0; 1256 if (v) *v = 0; 1257 } 1258 } 1259 *nz = nztot; 1260 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1261 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1262 PetscFunctionReturn(0); 1263 } 1264 1265 #undef __FUNCT__ 1266 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1267 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1268 { 1269 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1270 1271 PetscFunctionBegin; 1272 if (aij->getrowactive == PETSC_FALSE) { 1273 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1274 } 1275 aij->getrowactive = PETSC_FALSE; 1276 PetscFunctionReturn(0); 1277 } 1278 1279 #undef __FUNCT__ 1280 #define __FUNCT__ "MatNorm_MPIAIJ" 1281 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1282 { 1283 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1284 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1285 int ierr,i,j,cstart = aij->cstart; 1286 PetscReal sum = 0.0; 1287 PetscScalar *v; 1288 1289 PetscFunctionBegin; 1290 if (aij->size == 1) { 1291 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1292 } else { 1293 if (type == NORM_FROBENIUS) { 1294 v = amat->a; 1295 for (i=0; i<amat->nz; i++) { 1296 #if defined(PETSC_USE_COMPLEX) 1297 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1298 #else 1299 sum += (*v)*(*v); v++; 1300 #endif 1301 } 1302 v = bmat->a; 1303 for (i=0; i<bmat->nz; i++) { 1304 #if defined(PETSC_USE_COMPLEX) 1305 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1306 #else 1307 sum += (*v)*(*v); v++; 1308 #endif 1309 } 1310 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1311 *norm = sqrt(*norm); 1312 } else if (type == NORM_1) { /* max column norm */ 1313 PetscReal *tmp,*tmp2; 1314 int *jj,*garray = aij->garray; 1315 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1316 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1317 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1318 *norm = 0.0; 1319 v = amat->a; jj = amat->j; 1320 for (j=0; j<amat->nz; j++) { 1321 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1322 } 1323 v = bmat->a; jj = bmat->j; 1324 for (j=0; j<bmat->nz; j++) { 1325 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1326 } 1327 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1328 for (j=0; j<mat->N; j++) { 1329 if (tmp2[j] > *norm) *norm = tmp2[j]; 1330 } 1331 ierr = PetscFree(tmp);CHKERRQ(ierr); 1332 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1333 } else if (type == NORM_INFINITY) { /* max row norm */ 1334 PetscReal ntemp = 0.0; 1335 for (j=0; j<aij->A->m; j++) { 1336 v = amat->a + amat->i[j]; 1337 sum = 0.0; 1338 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1339 sum += PetscAbsScalar(*v); v++; 1340 } 1341 v = bmat->a + bmat->i[j]; 1342 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1343 sum += PetscAbsScalar(*v); v++; 1344 } 1345 if (sum > ntemp) ntemp = sum; 1346 } 1347 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1348 } else { 1349 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1350 } 1351 } 1352 PetscFunctionReturn(0); 1353 } 1354 1355 #undef __FUNCT__ 1356 #define __FUNCT__ "MatTranspose_MPIAIJ" 1357 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1358 { 1359 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1360 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1361 int ierr; 1362 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1363 Mat B; 1364 PetscScalar *array; 1365 1366 PetscFunctionBegin; 1367 if (!matout && M != N) { 1368 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1369 } 1370 1371 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1372 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1373 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1374 1375 /* copy over the A part */ 1376 Aloc = (Mat_SeqAIJ*)a->A->data; 1377 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1378 row = a->rstart; 1379 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1380 for (i=0; i<m; i++) { 1381 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1382 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1383 } 1384 aj = Aloc->j; 1385 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1386 1387 /* copy over the B part */ 1388 Aloc = (Mat_SeqAIJ*)a->B->data; 1389 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1390 row = a->rstart; 1391 ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr); 1392 ct = cols; 1393 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1394 for (i=0; i<m; i++) { 1395 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1396 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1397 } 1398 ierr = PetscFree(ct);CHKERRQ(ierr); 1399 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1400 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1401 if (matout) { 1402 *matout = B; 1403 } else { 1404 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1405 } 1406 PetscFunctionReturn(0); 1407 } 1408 1409 #undef __FUNCT__ 1410 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1411 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1412 { 1413 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1414 Mat a = aij->A,b = aij->B; 1415 int ierr,s1,s2,s3; 1416 1417 PetscFunctionBegin; 1418 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1419 if (rr) { 1420 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1421 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1422 /* Overlap communication with computation. */ 1423 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1424 } 1425 if (ll) { 1426 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1427 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1428 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1429 } 1430 /* scale the diagonal block */ 1431 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1432 1433 if (rr) { 1434 /* Do a scatter end and then right scale the off-diagonal block */ 1435 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1436 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1437 } 1438 1439 PetscFunctionReturn(0); 1440 } 1441 1442 1443 #undef __FUNCT__ 1444 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1445 int MatPrintHelp_MPIAIJ(Mat A) 1446 { 1447 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1448 int ierr; 1449 1450 PetscFunctionBegin; 1451 if (!a->rank) { 1452 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1453 } 1454 PetscFunctionReturn(0); 1455 } 1456 1457 #undef __FUNCT__ 1458 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1459 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1460 { 1461 PetscFunctionBegin; 1462 *bs = 1; 1463 PetscFunctionReturn(0); 1464 } 1465 #undef __FUNCT__ 1466 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1467 int MatSetUnfactored_MPIAIJ(Mat A) 1468 { 1469 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1470 int ierr; 1471 1472 PetscFunctionBegin; 1473 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1474 PetscFunctionReturn(0); 1475 } 1476 1477 #undef __FUNCT__ 1478 #define __FUNCT__ "MatEqual_MPIAIJ" 1479 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1480 { 1481 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1482 Mat a,b,c,d; 1483 PetscTruth flg; 1484 int ierr; 1485 1486 PetscFunctionBegin; 1487 a = matA->A; b = matA->B; 1488 c = matB->A; d = matB->B; 1489 1490 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1491 if (flg == PETSC_TRUE) { 1492 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1493 } 1494 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1495 PetscFunctionReturn(0); 1496 } 1497 1498 #undef __FUNCT__ 1499 #define __FUNCT__ "MatCopy_MPIAIJ" 1500 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1501 { 1502 int ierr; 1503 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1504 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1505 1506 PetscFunctionBegin; 1507 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1508 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1509 /* because of the column compression in the off-processor part of the matrix a->B, 1510 the number of columns in a->B and b->B may be different, hence we cannot call 1511 the MatCopy() directly on the two parts. If need be, we can provide a more 1512 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1513 then copying the submatrices */ 1514 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1515 } else { 1516 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1517 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1518 } 1519 PetscFunctionReturn(0); 1520 } 1521 1522 #undef __FUNCT__ 1523 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1524 int MatSetUpPreallocation_MPIAIJ(Mat A) 1525 { 1526 int ierr; 1527 1528 PetscFunctionBegin; 1529 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1530 PetscFunctionReturn(0); 1531 } 1532 1533 #include "petscblaslapack.h" 1534 #undef __FUNCT__ 1535 #define __FUNCT__ "MatAXPY_MPIAIJ" 1536 int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1537 { 1538 int ierr,one=1,i; 1539 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1540 Mat_SeqAIJ *x,*y; 1541 1542 PetscFunctionBegin; 1543 if (str == SAME_NONZERO_PATTERN) { 1544 x = (Mat_SeqAIJ *)xx->A->data; 1545 y = (Mat_SeqAIJ *)yy->A->data; 1546 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1547 x = (Mat_SeqAIJ *)xx->B->data; 1548 y = (Mat_SeqAIJ *)yy->B->data; 1549 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1550 } else if (str == SUBSET_NONZERO_PATTERN) { 1551 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1552 1553 x = (Mat_SeqAIJ *)xx->B->data; 1554 y = (Mat_SeqAIJ *)yy->B->data; 1555 if (y->xtoy && y->XtoY != xx->B) { 1556 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1557 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1558 } 1559 if (!y->xtoy) { /* get xtoy */ 1560 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1561 y->XtoY = xx->B; 1562 } 1563 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1564 } else { 1565 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1566 } 1567 PetscFunctionReturn(0); 1568 } 1569 1570 /* -------------------------------------------------------------------*/ 1571 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1572 MatGetRow_MPIAIJ, 1573 MatRestoreRow_MPIAIJ, 1574 MatMult_MPIAIJ, 1575 /* 4*/ MatMultAdd_MPIAIJ, 1576 MatMultTranspose_MPIAIJ, 1577 MatMultTransposeAdd_MPIAIJ, 1578 0, 1579 0, 1580 0, 1581 /*10*/ 0, 1582 0, 1583 0, 1584 MatRelax_MPIAIJ, 1585 MatTranspose_MPIAIJ, 1586 /*15*/ MatGetInfo_MPIAIJ, 1587 MatEqual_MPIAIJ, 1588 MatGetDiagonal_MPIAIJ, 1589 MatDiagonalScale_MPIAIJ, 1590 MatNorm_MPIAIJ, 1591 /*20*/ MatAssemblyBegin_MPIAIJ, 1592 MatAssemblyEnd_MPIAIJ, 1593 0, 1594 MatSetOption_MPIAIJ, 1595 MatZeroEntries_MPIAIJ, 1596 /*25*/ MatZeroRows_MPIAIJ, 1597 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1598 MatLUFactorSymbolic_MPIAIJ_TFS, 1599 #else 1600 0, 1601 #endif 1602 0, 1603 0, 1604 0, 1605 /*30*/ MatSetUpPreallocation_MPIAIJ, 1606 0, 1607 0, 1608 0, 1609 0, 1610 /*35*/ MatDuplicate_MPIAIJ, 1611 0, 1612 0, 1613 0, 1614 0, 1615 /*40*/ MatAXPY_MPIAIJ, 1616 MatGetSubMatrices_MPIAIJ, 1617 MatIncreaseOverlap_MPIAIJ, 1618 MatGetValues_MPIAIJ, 1619 MatCopy_MPIAIJ, 1620 /*45*/ MatPrintHelp_MPIAIJ, 1621 MatScale_MPIAIJ, 1622 0, 1623 0, 1624 0, 1625 /*50*/ MatGetBlockSize_MPIAIJ, 1626 0, 1627 0, 1628 0, 1629 0, 1630 /*55*/ MatFDColoringCreate_MPIAIJ, 1631 0, 1632 MatSetUnfactored_MPIAIJ, 1633 0, 1634 0, 1635 /*60*/ MatGetSubMatrix_MPIAIJ, 1636 MatDestroy_MPIAIJ, 1637 MatView_MPIAIJ, 1638 MatGetPetscMaps_Petsc, 1639 0, 1640 /*65*/ 0, 1641 0, 1642 0, 1643 0, 1644 0, 1645 /*70*/ 0, 1646 0, 1647 MatSetColoring_MPIAIJ, 1648 MatSetValuesAdic_MPIAIJ, 1649 MatSetValuesAdifor_MPIAIJ, 1650 /*75*/ 0, 1651 0, 1652 0, 1653 0, 1654 0, 1655 /*80*/ 0, 1656 0, 1657 0, 1658 0, 1659 /*85*/ MatLoad_MPIAIJ}; 1660 1661 /* ----------------------------------------------------------------------------------------*/ 1662 1663 EXTERN_C_BEGIN 1664 #undef __FUNCT__ 1665 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1666 int MatStoreValues_MPIAIJ(Mat mat) 1667 { 1668 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1669 int ierr; 1670 1671 PetscFunctionBegin; 1672 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1673 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1674 PetscFunctionReturn(0); 1675 } 1676 EXTERN_C_END 1677 1678 EXTERN_C_BEGIN 1679 #undef __FUNCT__ 1680 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1681 int MatRetrieveValues_MPIAIJ(Mat mat) 1682 { 1683 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1684 int ierr; 1685 1686 PetscFunctionBegin; 1687 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1688 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1689 PetscFunctionReturn(0); 1690 } 1691 EXTERN_C_END 1692 1693 #include "petscpc.h" 1694 EXTERN_C_BEGIN 1695 #undef __FUNCT__ 1696 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1697 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1698 { 1699 Mat_MPIAIJ *b; 1700 int ierr,i; 1701 1702 PetscFunctionBegin; 1703 B->preallocated = PETSC_TRUE; 1704 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1705 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1706 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1707 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1708 if (d_nnz) { 1709 for (i=0; i<B->m; i++) { 1710 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1711 } 1712 } 1713 if (o_nnz) { 1714 for (i=0; i<B->m; i++) { 1715 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1716 } 1717 } 1718 b = (Mat_MPIAIJ*)B->data; 1719 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1720 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1721 1722 PetscFunctionReturn(0); 1723 } 1724 EXTERN_C_END 1725 1726 /*MC 1727 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 1728 1729 Options Database Keys: 1730 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 1731 1732 Level: beginner 1733 1734 .seealso: MatCreateMPIAIJ 1735 M*/ 1736 1737 EXTERN_C_BEGIN 1738 #undef __FUNCT__ 1739 #define __FUNCT__ "MatCreate_MPIAIJ" 1740 int MatCreate_MPIAIJ(Mat B) 1741 { 1742 Mat_MPIAIJ *b; 1743 int ierr,i,size; 1744 1745 PetscFunctionBegin; 1746 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1747 1748 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1749 B->data = (void*)b; 1750 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1751 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1752 B->factor = 0; 1753 B->assembled = PETSC_FALSE; 1754 B->mapping = 0; 1755 1756 B->insertmode = NOT_SET_VALUES; 1757 b->size = size; 1758 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1759 1760 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1761 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1762 1763 /* the information in the maps duplicates the information computed below, eventually 1764 we should remove the duplicate information that is not contained in the maps */ 1765 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1766 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1767 1768 /* build local table of row and column ownerships */ 1769 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1770 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1771 b->cowners = b->rowners + b->size + 2; 1772 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1773 b->rowners[0] = 0; 1774 for (i=2; i<=b->size; i++) { 1775 b->rowners[i] += b->rowners[i-1]; 1776 } 1777 b->rstart = b->rowners[b->rank]; 1778 b->rend = b->rowners[b->rank+1]; 1779 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1780 b->cowners[0] = 0; 1781 for (i=2; i<=b->size; i++) { 1782 b->cowners[i] += b->cowners[i-1]; 1783 } 1784 b->cstart = b->cowners[b->rank]; 1785 b->cend = b->cowners[b->rank+1]; 1786 1787 /* build cache for off array entries formed */ 1788 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1789 b->donotstash = PETSC_FALSE; 1790 b->colmap = 0; 1791 b->garray = 0; 1792 b->roworiented = PETSC_TRUE; 1793 1794 /* stuff used for matrix vector multiply */ 1795 b->lvec = PETSC_NULL; 1796 b->Mvctx = PETSC_NULL; 1797 1798 /* stuff for MatGetRow() */ 1799 b->rowindices = 0; 1800 b->rowvalues = 0; 1801 b->getrowactive = PETSC_FALSE; 1802 1803 /* Explicitly create 2 MATSEQAIJ matrices. */ 1804 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 1805 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 1806 PetscLogObjectParent(B,b->A); 1807 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 1808 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 1809 PetscLogObjectParent(B,b->B); 1810 1811 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1812 "MatStoreValues_MPIAIJ", 1813 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1814 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1815 "MatRetrieveValues_MPIAIJ", 1816 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1817 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1818 "MatGetDiagonalBlock_MPIAIJ", 1819 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1820 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 1821 "MatIsTranspose_MPIAIJ", 1822 MatIsTranspose_MPIAIJ); CHKERRQ(ierr); 1823 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 1824 "MatMPIAIJSetPreallocation_MPIAIJ", 1825 MatMPIAIJSetPreallocation_MPIAIJ); CHKERRQ(ierr); 1826 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 1827 "MatDiagonalScaleLocal_MPIAIJ", 1828 MatDiagonalScaleLocal_MPIAIJ); CHKERRQ(ierr); 1829 PetscFunctionReturn(0); 1830 } 1831 EXTERN_C_END 1832 1833 /*MC 1834 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1835 1836 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1837 and MATMPIAIJ otherwise. 1838 1839 Options Database Keys: 1840 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 1841 1842 Level: beginner 1843 1844 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ 1845 M*/ 1846 1847 EXTERN_C_BEGIN 1848 #undef __FUNCT__ 1849 #define __FUNCT__ "MatCreate_AIJ" 1850 int MatCreate_AIJ(Mat A) { 1851 int ierr,size; 1852 1853 PetscFunctionBegin; 1854 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr); 1855 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1856 if (size == 1) { 1857 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1858 } else { 1859 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1860 } 1861 PetscFunctionReturn(0); 1862 } 1863 EXTERN_C_END 1864 1865 #undef __FUNCT__ 1866 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1867 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1868 { 1869 Mat mat; 1870 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1871 int ierr; 1872 1873 PetscFunctionBegin; 1874 *newmat = 0; 1875 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1876 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1877 a = (Mat_MPIAIJ*)mat->data; 1878 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1879 mat->factor = matin->factor; 1880 mat->assembled = PETSC_TRUE; 1881 mat->insertmode = NOT_SET_VALUES; 1882 mat->preallocated = PETSC_TRUE; 1883 1884 a->rstart = oldmat->rstart; 1885 a->rend = oldmat->rend; 1886 a->cstart = oldmat->cstart; 1887 a->cend = oldmat->cend; 1888 a->size = oldmat->size; 1889 a->rank = oldmat->rank; 1890 a->donotstash = oldmat->donotstash; 1891 a->roworiented = oldmat->roworiented; 1892 a->rowindices = 0; 1893 a->rowvalues = 0; 1894 a->getrowactive = PETSC_FALSE; 1895 1896 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1897 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1898 if (oldmat->colmap) { 1899 #if defined (PETSC_USE_CTABLE) 1900 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1901 #else 1902 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1903 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1904 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1905 #endif 1906 } else a->colmap = 0; 1907 if (oldmat->garray) { 1908 int len; 1909 len = oldmat->B->n; 1910 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1911 PetscLogObjectMemory(mat,len*sizeof(int)); 1912 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1913 } else a->garray = 0; 1914 1915 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1916 PetscLogObjectParent(mat,a->lvec); 1917 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1918 PetscLogObjectParent(mat,a->Mvctx); 1919 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1920 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1921 PetscLogObjectParent(mat,a->A); 1922 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1923 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1924 PetscLogObjectParent(mat,a->B); 1925 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1926 *newmat = mat; 1927 PetscFunctionReturn(0); 1928 } 1929 1930 #include "petscsys.h" 1931 1932 #undef __FUNCT__ 1933 #define __FUNCT__ "MatLoad_MPIAIJ" 1934 int MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1935 { 1936 Mat A; 1937 PetscScalar *vals,*svals; 1938 MPI_Comm comm = ((PetscObject)viewer)->comm; 1939 MPI_Status status; 1940 int i,nz,ierr,j,rstart,rend,fd; 1941 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1942 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1943 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1944 1945 PetscFunctionBegin; 1946 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1947 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1948 if (!rank) { 1949 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1950 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1951 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1952 if (header[3] < 0) { 1953 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1954 } 1955 } 1956 1957 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1958 M = header[1]; N = header[2]; 1959 /* determine ownership of all rows */ 1960 m = M/size + ((M % size) > rank); 1961 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1962 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1963 rowners[0] = 0; 1964 for (i=2; i<=size; i++) { 1965 rowners[i] += rowners[i-1]; 1966 } 1967 rstart = rowners[rank]; 1968 rend = rowners[rank+1]; 1969 1970 /* distribute row lengths to all processors */ 1971 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1972 offlens = ourlens + (rend-rstart); 1973 if (!rank) { 1974 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1975 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1976 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1977 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1978 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1979 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1980 } else { 1981 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1982 } 1983 1984 if (!rank) { 1985 /* calculate the number of nonzeros on each processor */ 1986 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1987 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1988 for (i=0; i<size; i++) { 1989 for (j=rowners[i]; j< rowners[i+1]; j++) { 1990 procsnz[i] += rowlengths[j]; 1991 } 1992 } 1993 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1994 1995 /* determine max buffer needed and allocate it */ 1996 maxnz = 0; 1997 for (i=0; i<size; i++) { 1998 maxnz = PetscMax(maxnz,procsnz[i]); 1999 } 2000 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2001 2002 /* read in my part of the matrix column indices */ 2003 nz = procsnz[0]; 2004 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 2005 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2006 2007 /* read in every one elses and ship off */ 2008 for (i=1; i<size; i++) { 2009 nz = procsnz[i]; 2010 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2011 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2012 } 2013 ierr = PetscFree(cols);CHKERRQ(ierr); 2014 } else { 2015 /* determine buffer space needed for message */ 2016 nz = 0; 2017 for (i=0; i<m; i++) { 2018 nz += ourlens[i]; 2019 } 2020 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 2021 2022 /* receive message of column indices*/ 2023 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2024 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2025 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2026 } 2027 2028 /* determine column ownership if matrix is not square */ 2029 if (N != M) { 2030 n = N/size + ((N % size) > rank); 2031 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2032 cstart = cend - n; 2033 } else { 2034 cstart = rstart; 2035 cend = rend; 2036 n = cend - cstart; 2037 } 2038 2039 /* loop over local rows, determining number of off diagonal entries */ 2040 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2041 jj = 0; 2042 for (i=0; i<m; i++) { 2043 for (j=0; j<ourlens[i]; j++) { 2044 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2045 jj++; 2046 } 2047 } 2048 2049 /* create our matrix */ 2050 for (i=0; i<m; i++) { 2051 ourlens[i] -= offlens[i]; 2052 } 2053 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2054 ierr = MatSetType(A,type);CHKERRQ(ierr); 2055 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2056 2057 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2058 for (i=0; i<m; i++) { 2059 ourlens[i] += offlens[i]; 2060 } 2061 2062 if (!rank) { 2063 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2064 2065 /* read in my part of the matrix numerical values */ 2066 nz = procsnz[0]; 2067 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2068 2069 /* insert into matrix */ 2070 jj = rstart; 2071 smycols = mycols; 2072 svals = vals; 2073 for (i=0; i<m; i++) { 2074 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2075 smycols += ourlens[i]; 2076 svals += ourlens[i]; 2077 jj++; 2078 } 2079 2080 /* read in other processors and ship out */ 2081 for (i=1; i<size; i++) { 2082 nz = procsnz[i]; 2083 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2084 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2085 } 2086 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2087 } else { 2088 /* receive numeric values */ 2089 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2090 2091 /* receive message of values*/ 2092 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2093 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2094 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2095 2096 /* insert into matrix */ 2097 jj = rstart; 2098 smycols = mycols; 2099 svals = vals; 2100 for (i=0; i<m; i++) { 2101 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2102 smycols += ourlens[i]; 2103 svals += ourlens[i]; 2104 jj++; 2105 } 2106 } 2107 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2108 ierr = PetscFree(vals);CHKERRQ(ierr); 2109 ierr = PetscFree(mycols);CHKERRQ(ierr); 2110 ierr = PetscFree(rowners);CHKERRQ(ierr); 2111 2112 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2113 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2114 *newmat = A; 2115 PetscFunctionReturn(0); 2116 } 2117 2118 #undef __FUNCT__ 2119 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2120 /* 2121 Not great since it makes two copies of the submatrix, first an SeqAIJ 2122 in local and then by concatenating the local matrices the end result. 2123 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2124 */ 2125 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2126 { 2127 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2128 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2129 Mat *local,M,Mreuse; 2130 PetscScalar *vwork,*aa; 2131 MPI_Comm comm = mat->comm; 2132 Mat_SeqAIJ *aij; 2133 2134 2135 PetscFunctionBegin; 2136 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2137 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2138 2139 if (call == MAT_REUSE_MATRIX) { 2140 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2141 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2142 local = &Mreuse; 2143 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2144 } else { 2145 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2146 Mreuse = *local; 2147 ierr = PetscFree(local);CHKERRQ(ierr); 2148 } 2149 2150 /* 2151 m - number of local rows 2152 n - number of columns (same on all processors) 2153 rstart - first row in new global matrix generated 2154 */ 2155 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2156 if (call == MAT_INITIAL_MATRIX) { 2157 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2158 ii = aij->i; 2159 jj = aij->j; 2160 2161 /* 2162 Determine the number of non-zeros in the diagonal and off-diagonal 2163 portions of the matrix in order to do correct preallocation 2164 */ 2165 2166 /* first get start and end of "diagonal" columns */ 2167 if (csize == PETSC_DECIDE) { 2168 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2169 if (mglobal == n) { /* square matrix */ 2170 nlocal = m; 2171 } else { 2172 nlocal = n/size + ((n % size) > rank); 2173 } 2174 } else { 2175 nlocal = csize; 2176 } 2177 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2178 rstart = rend - nlocal; 2179 if (rank == size - 1 && rend != n) { 2180 SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n); 2181 } 2182 2183 /* next, compute all the lengths */ 2184 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2185 olens = dlens + m; 2186 for (i=0; i<m; i++) { 2187 jend = ii[i+1] - ii[i]; 2188 olen = 0; 2189 dlen = 0; 2190 for (j=0; j<jend; j++) { 2191 if (*jj < rstart || *jj >= rend) olen++; 2192 else dlen++; 2193 jj++; 2194 } 2195 olens[i] = olen; 2196 dlens[i] = dlen; 2197 } 2198 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2199 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2200 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2201 ierr = PetscFree(dlens);CHKERRQ(ierr); 2202 } else { 2203 int ml,nl; 2204 2205 M = *newmat; 2206 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2207 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2208 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2209 /* 2210 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2211 rather than the slower MatSetValues(). 2212 */ 2213 M->was_assembled = PETSC_TRUE; 2214 M->assembled = PETSC_FALSE; 2215 } 2216 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2217 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2218 ii = aij->i; 2219 jj = aij->j; 2220 aa = aij->a; 2221 for (i=0; i<m; i++) { 2222 row = rstart + i; 2223 nz = ii[i+1] - ii[i]; 2224 cwork = jj; jj += nz; 2225 vwork = aa; aa += nz; 2226 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2227 } 2228 2229 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2230 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2231 *newmat = M; 2232 2233 /* save submatrix used in processor for next request */ 2234 if (call == MAT_INITIAL_MATRIX) { 2235 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2236 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2237 } 2238 2239 PetscFunctionReturn(0); 2240 } 2241 2242 #undef __FUNCT__ 2243 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2244 /*@C 2245 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2246 (the default parallel PETSc format). For good matrix assembly performance 2247 the user should preallocate the matrix storage by setting the parameters 2248 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2249 performance can be increased by more than a factor of 50. 2250 2251 Collective on MPI_Comm 2252 2253 Input Parameters: 2254 + A - the matrix 2255 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2256 (same value is used for all local rows) 2257 . d_nnz - array containing the number of nonzeros in the various rows of the 2258 DIAGONAL portion of the local submatrix (possibly different for each row) 2259 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2260 The size of this array is equal to the number of local rows, i.e 'm'. 2261 You must leave room for the diagonal entry even if it is zero. 2262 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2263 submatrix (same value is used for all local rows). 2264 - o_nnz - array containing the number of nonzeros in the various rows of the 2265 OFF-DIAGONAL portion of the local submatrix (possibly different for 2266 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2267 structure. The size of this array is equal to the number 2268 of local rows, i.e 'm'. 2269 2270 The AIJ format (also called the Yale sparse matrix format or 2271 compressed row storage), is fully compatible with standard Fortran 77 2272 storage. That is, the stored row and column indices can begin at 2273 either one (as in Fortran) or zero. See the users manual for details. 2274 2275 The user MUST specify either the local or global matrix dimensions 2276 (possibly both). 2277 2278 The parallel matrix is partitioned such that the first m0 rows belong to 2279 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2280 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2281 2282 The DIAGONAL portion of the local submatrix of a processor can be defined 2283 as the submatrix which is obtained by extraction the part corresponding 2284 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2285 first row that belongs to the processor, and r2 is the last row belonging 2286 to the this processor. This is a square mxm matrix. The remaining portion 2287 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2288 2289 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2290 2291 By default, this format uses inodes (identical nodes) when possible. 2292 We search for consecutive rows with the same nonzero structure, thereby 2293 reusing matrix information to achieve increased efficiency. 2294 2295 Options Database Keys: 2296 + -mat_aij_no_inode - Do not use inodes 2297 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2298 - -mat_aij_oneindex - Internally use indexing starting at 1 2299 rather than 0. Note that when calling MatSetValues(), 2300 the user still MUST index entries starting at 0! 2301 2302 Example usage: 2303 2304 Consider the following 8x8 matrix with 34 non-zero values, that is 2305 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2306 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2307 as follows: 2308 2309 .vb 2310 1 2 0 | 0 3 0 | 0 4 2311 Proc0 0 5 6 | 7 0 0 | 8 0 2312 9 0 10 | 11 0 0 | 12 0 2313 ------------------------------------- 2314 13 0 14 | 15 16 17 | 0 0 2315 Proc1 0 18 0 | 19 20 21 | 0 0 2316 0 0 0 | 22 23 0 | 24 0 2317 ------------------------------------- 2318 Proc2 25 26 27 | 0 0 28 | 29 0 2319 30 0 0 | 31 32 33 | 0 34 2320 .ve 2321 2322 This can be represented as a collection of submatrices as: 2323 2324 .vb 2325 A B C 2326 D E F 2327 G H I 2328 .ve 2329 2330 Where the submatrices A,B,C are owned by proc0, D,E,F are 2331 owned by proc1, G,H,I are owned by proc2. 2332 2333 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2334 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2335 The 'M','N' parameters are 8,8, and have the same values on all procs. 2336 2337 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2338 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2339 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2340 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2341 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2342 matrix, ans [DF] as another SeqAIJ matrix. 2343 2344 When d_nz, o_nz parameters are specified, d_nz storage elements are 2345 allocated for every row of the local diagonal submatrix, and o_nz 2346 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2347 One way to choose d_nz and o_nz is to use the max nonzerors per local 2348 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2349 In this case, the values of d_nz,o_nz are: 2350 .vb 2351 proc0 : dnz = 2, o_nz = 2 2352 proc1 : dnz = 3, o_nz = 2 2353 proc2 : dnz = 1, o_nz = 4 2354 .ve 2355 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2356 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2357 for proc3. i.e we are using 12+15+10=37 storage locations to store 2358 34 values. 2359 2360 When d_nnz, o_nnz parameters are specified, the storage is specified 2361 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2362 In the above case the values for d_nnz,o_nnz are: 2363 .vb 2364 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2365 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2366 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2367 .ve 2368 Here the space allocated is sum of all the above values i.e 34, and 2369 hence pre-allocation is perfect. 2370 2371 Level: intermediate 2372 2373 .keywords: matrix, aij, compressed row, sparse, parallel 2374 2375 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2376 @*/ 2377 int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2378 { 2379 int ierr,(*f)(Mat,int,const int[],int,const int[]); 2380 2381 PetscFunctionBegin; 2382 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2383 if (f) { 2384 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2385 } 2386 PetscFunctionReturn(0); 2387 } 2388 2389 #undef __FUNCT__ 2390 #define __FUNCT__ "MatCreateMPIAIJ" 2391 /*@C 2392 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2393 (the default parallel PETSc format). For good matrix assembly performance 2394 the user should preallocate the matrix storage by setting the parameters 2395 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2396 performance can be increased by more than a factor of 50. 2397 2398 Collective on MPI_Comm 2399 2400 Input Parameters: 2401 + comm - MPI communicator 2402 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2403 This value should be the same as the local size used in creating the 2404 y vector for the matrix-vector product y = Ax. 2405 . n - This value should be the same as the local size used in creating the 2406 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2407 calculated if N is given) For square matrices n is almost always m. 2408 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2409 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2410 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2411 (same value is used for all local rows) 2412 . d_nnz - array containing the number of nonzeros in the various rows of the 2413 DIAGONAL portion of the local submatrix (possibly different for each row) 2414 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2415 The size of this array is equal to the number of local rows, i.e 'm'. 2416 You must leave room for the diagonal entry even if it is zero. 2417 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2418 submatrix (same value is used for all local rows). 2419 - o_nnz - array containing the number of nonzeros in the various rows of the 2420 OFF-DIAGONAL portion of the local submatrix (possibly different for 2421 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2422 structure. The size of this array is equal to the number 2423 of local rows, i.e 'm'. 2424 2425 Output Parameter: 2426 . A - the matrix 2427 2428 Notes: 2429 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2430 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2431 storage requirements for this matrix. 2432 2433 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2434 processor than it must be used on all processors that share the object for 2435 that argument. 2436 2437 The AIJ format (also called the Yale sparse matrix format or 2438 compressed row storage), is fully compatible with standard Fortran 77 2439 storage. That is, the stored row and column indices can begin at 2440 either one (as in Fortran) or zero. See the users manual for details. 2441 2442 The user MUST specify either the local or global matrix dimensions 2443 (possibly both). 2444 2445 The parallel matrix is partitioned such that the first m0 rows belong to 2446 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2447 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2448 2449 The DIAGONAL portion of the local submatrix of a processor can be defined 2450 as the submatrix which is obtained by extraction the part corresponding 2451 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2452 first row that belongs to the processor, and r2 is the last row belonging 2453 to the this processor. This is a square mxm matrix. The remaining portion 2454 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2455 2456 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2457 2458 When calling this routine with a single process communicator, a matrix of 2459 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2460 type of communicator, use the construction mechanism: 2461 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2462 2463 By default, this format uses inodes (identical nodes) when possible. 2464 We search for consecutive rows with the same nonzero structure, thereby 2465 reusing matrix information to achieve increased efficiency. 2466 2467 Options Database Keys: 2468 + -mat_aij_no_inode - Do not use inodes 2469 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2470 - -mat_aij_oneindex - Internally use indexing starting at 1 2471 rather than 0. Note that when calling MatSetValues(), 2472 the user still MUST index entries starting at 0! 2473 2474 2475 Example usage: 2476 2477 Consider the following 8x8 matrix with 34 non-zero values, that is 2478 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2479 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2480 as follows: 2481 2482 .vb 2483 1 2 0 | 0 3 0 | 0 4 2484 Proc0 0 5 6 | 7 0 0 | 8 0 2485 9 0 10 | 11 0 0 | 12 0 2486 ------------------------------------- 2487 13 0 14 | 15 16 17 | 0 0 2488 Proc1 0 18 0 | 19 20 21 | 0 0 2489 0 0 0 | 22 23 0 | 24 0 2490 ------------------------------------- 2491 Proc2 25 26 27 | 0 0 28 | 29 0 2492 30 0 0 | 31 32 33 | 0 34 2493 .ve 2494 2495 This can be represented as a collection of submatrices as: 2496 2497 .vb 2498 A B C 2499 D E F 2500 G H I 2501 .ve 2502 2503 Where the submatrices A,B,C are owned by proc0, D,E,F are 2504 owned by proc1, G,H,I are owned by proc2. 2505 2506 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2507 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2508 The 'M','N' parameters are 8,8, and have the same values on all procs. 2509 2510 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2511 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2512 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2513 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2514 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2515 matrix, ans [DF] as another SeqAIJ matrix. 2516 2517 When d_nz, o_nz parameters are specified, d_nz storage elements are 2518 allocated for every row of the local diagonal submatrix, and o_nz 2519 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2520 One way to choose d_nz and o_nz is to use the max nonzerors per local 2521 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2522 In this case, the values of d_nz,o_nz are: 2523 .vb 2524 proc0 : dnz = 2, o_nz = 2 2525 proc1 : dnz = 3, o_nz = 2 2526 proc2 : dnz = 1, o_nz = 4 2527 .ve 2528 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2529 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2530 for proc3. i.e we are using 12+15+10=37 storage locations to store 2531 34 values. 2532 2533 When d_nnz, o_nnz parameters are specified, the storage is specified 2534 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2535 In the above case the values for d_nnz,o_nnz are: 2536 .vb 2537 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2538 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2539 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2540 .ve 2541 Here the space allocated is sum of all the above values i.e 34, and 2542 hence pre-allocation is perfect. 2543 2544 Level: intermediate 2545 2546 .keywords: matrix, aij, compressed row, sparse, parallel 2547 2548 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2549 @*/ 2550 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2551 { 2552 int ierr,size; 2553 2554 PetscFunctionBegin; 2555 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2556 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2557 if (size > 1) { 2558 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2559 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2560 } else { 2561 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2562 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2563 } 2564 PetscFunctionReturn(0); 2565 } 2566 2567 #undef __FUNCT__ 2568 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2569 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2570 { 2571 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2572 PetscFunctionBegin; 2573 *Ad = a->A; 2574 *Ao = a->B; 2575 *colmap = a->garray; 2576 PetscFunctionReturn(0); 2577 } 2578 2579 #undef __FUNCT__ 2580 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2581 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2582 { 2583 int ierr,i; 2584 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2585 2586 PetscFunctionBegin; 2587 if (coloring->ctype == IS_COLORING_LOCAL) { 2588 ISColoringValue *allcolors,*colors; 2589 ISColoring ocoloring; 2590 2591 /* set coloring for diagonal portion */ 2592 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2593 2594 /* set coloring for off-diagonal portion */ 2595 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2596 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2597 for (i=0; i<a->B->n; i++) { 2598 colors[i] = allcolors[a->garray[i]]; 2599 } 2600 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2601 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2602 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2603 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2604 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2605 ISColoringValue *colors; 2606 int *larray; 2607 ISColoring ocoloring; 2608 2609 /* set coloring for diagonal portion */ 2610 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2611 for (i=0; i<a->A->n; i++) { 2612 larray[i] = i + a->cstart; 2613 } 2614 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2615 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2616 for (i=0; i<a->A->n; i++) { 2617 colors[i] = coloring->colors[larray[i]]; 2618 } 2619 ierr = PetscFree(larray);CHKERRQ(ierr); 2620 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2621 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2622 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2623 2624 /* set coloring for off-diagonal portion */ 2625 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2626 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2627 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2628 for (i=0; i<a->B->n; i++) { 2629 colors[i] = coloring->colors[larray[i]]; 2630 } 2631 ierr = PetscFree(larray);CHKERRQ(ierr); 2632 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2633 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2634 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2635 } else { 2636 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2637 } 2638 2639 PetscFunctionReturn(0); 2640 } 2641 2642 #undef __FUNCT__ 2643 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2644 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2645 { 2646 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2647 int ierr; 2648 2649 PetscFunctionBegin; 2650 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2651 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2652 PetscFunctionReturn(0); 2653 } 2654 2655 #undef __FUNCT__ 2656 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2657 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2658 { 2659 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2660 int ierr; 2661 2662 PetscFunctionBegin; 2663 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2664 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2665 PetscFunctionReturn(0); 2666 } 2667 2668 #undef __FUNCT__ 2669 #define __FUNCT__ "MatMerge" 2670 /*@C 2671 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2672 matrices from each processor 2673 2674 Collective on MPI_Comm 2675 2676 Input Parameters: 2677 + comm - the communicators the parallel matrix will live on 2678 - inmat - the input sequential matrices 2679 2680 Output Parameter: 2681 . outmat - the parallel matrix generated 2682 2683 Level: advanced 2684 2685 Notes: The number of columns of the matrix in EACH of the seperate files 2686 MUST be the same. 2687 2688 @*/ 2689 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat) 2690 { 2691 int ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz; 2692 PetscScalar *values; 2693 PetscMap columnmap,rowmap; 2694 2695 PetscFunctionBegin; 2696 2697 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2698 2699 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2700 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2701 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2702 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2703 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2704 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2705 2706 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2707 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2708 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2709 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2710 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2711 2712 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2713 for (i=0;i<m;i++) { 2714 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2715 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2716 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2717 } 2718 /* This routine will ONLY return MPIAIJ type matrix */ 2719 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2720 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2721 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2722 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2723 2724 for (i=0;i<m;i++) { 2725 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2726 I = i + rstart; 2727 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2728 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2729 } 2730 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2731 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2732 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2733 2734 PetscFunctionReturn(0); 2735 } 2736 2737 #undef __FUNCT__ 2738 #define __FUNCT__ "MatFileSplit" 2739 int MatFileSplit(Mat A,char *outfile) 2740 { 2741 int ierr,rank,len,m,N,i,rstart,*indx,nnz; 2742 PetscViewer out; 2743 char *name; 2744 Mat B; 2745 PetscScalar *values; 2746 2747 PetscFunctionBegin; 2748 2749 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2750 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2751 /* Should this be the type of the diagonal block of A? */ 2752 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2753 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2754 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2755 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2756 for (i=0;i<m;i++) { 2757 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2758 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2759 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2760 } 2761 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2762 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2763 2764 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2765 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2766 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2767 sprintf(name,"%s.%d",outfile,rank); 2768 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2769 ierr = PetscFree(name); 2770 ierr = MatView(B,out);CHKERRQ(ierr); 2771 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2772 ierr = MatDestroy(B);CHKERRQ(ierr); 2773 PetscFunctionReturn(0); 2774 } 2775