1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 int CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 int n = aij->B->n,i,ierr; 19 20 PetscFunctionBegin; 21 #if defined (PETSC_USE_CTABLE) 22 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 23 for (i=0; i<n; i++){ 24 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 25 } 26 #else 27 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 28 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 29 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 30 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 31 #endif 32 PetscFunctionReturn(0); 33 } 34 35 #define CHUNKSIZE 15 36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 40 rmax = aimax[row]; nrow = ailen[row]; \ 41 col1 = col - shift; \ 42 \ 43 low = 0; high = nrow; \ 44 while (high-low > 5) { \ 45 t = (low+high)/2; \ 46 if (rp[t] > col) high = t; \ 47 else low = t; \ 48 } \ 49 for (_i=low; _i<high; _i++) { \ 50 if (rp[_i] > col1) break; \ 51 if (rp[_i] == col1) { \ 52 if (addv == ADD_VALUES) ap[_i] += value; \ 53 else ap[_i] = value; \ 54 goto a_noinsert; \ 55 } \ 56 } \ 57 if (nonew == 1) goto a_noinsert; \ 58 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 59 if (nrow >= rmax) { \ 60 /* there is no extra room in row, therefore enlarge */ \ 61 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 62 PetscScalar *new_a; \ 63 \ 64 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 65 \ 66 /* malloc new storage space */ \ 67 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 68 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 69 new_j = (int*)(new_a + new_nz); \ 70 new_i = new_j + new_nz; \ 71 \ 72 /* copy over old data into new slots */ \ 73 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 74 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 75 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 76 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 77 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 78 len*sizeof(int));CHKERRQ(ierr); \ 79 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 81 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 82 /* free up old matrix storage */ \ 83 \ 84 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 85 if (!a->singlemalloc) { \ 86 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 87 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 88 } \ 89 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 90 a->singlemalloc = PETSC_TRUE; \ 91 \ 92 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 93 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 94 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 95 a->maxnz += CHUNKSIZE; \ 96 a->reallocs++; \ 97 } \ 98 N = nrow++ - 1; a->nz++; \ 99 /* shift up all the later entries in this row */ \ 100 for (ii=N; ii>=_i; ii--) { \ 101 rp[ii+1] = rp[ii]; \ 102 ap[ii+1] = ap[ii]; \ 103 } \ 104 rp[_i] = col1; \ 105 ap[_i] = value; \ 106 a_noinsert: ; \ 107 ailen[row] = nrow; \ 108 } 109 110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 111 { \ 112 \ 113 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 114 rmax = bimax[row]; nrow = bilen[row]; \ 115 col1 = col - shift; \ 116 \ 117 low = 0; high = nrow; \ 118 while (high-low > 5) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > col) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > col1) break; \ 125 if (rp[_i] == col1) { \ 126 if (addv == ADD_VALUES) ap[_i] += value; \ 127 else ap[_i] = value; \ 128 goto b_noinsert; \ 129 } \ 130 } \ 131 if (nonew == 1) goto b_noinsert; \ 132 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 133 if (nrow >= rmax) { \ 134 /* there is no extra room in row, therefore enlarge */ \ 135 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 136 PetscScalar *new_a; \ 137 \ 138 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 139 \ 140 /* malloc new storage space */ \ 141 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 142 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 143 new_j = (int*)(new_a + new_nz); \ 144 new_i = new_j + new_nz; \ 145 \ 146 /* copy over old data into new slots */ \ 147 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 148 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 149 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 150 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 151 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 152 len*sizeof(int));CHKERRQ(ierr); \ 153 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 154 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 155 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 156 /* free up old matrix storage */ \ 157 \ 158 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 159 if (!b->singlemalloc) { \ 160 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 161 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 162 } \ 163 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 164 b->singlemalloc = PETSC_TRUE; \ 165 \ 166 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 167 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 168 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 169 b->maxnz += CHUNKSIZE; \ 170 b->reallocs++; \ 171 } \ 172 N = nrow++ - 1; b->nz++; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ap[ii+1] = ap[ii]; \ 177 } \ 178 rp[_i] = col1; \ 179 ap[_i] = value; \ 180 b_noinsert: ; \ 181 bilen[row] = nrow; \ 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatSetValues_MPIAIJ" 186 int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 187 { 188 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 189 PetscScalar value; 190 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 191 int cstart = aij->cstart,cend = aij->cend,row,col; 192 PetscTruth roworiented = aij->roworiented; 193 194 /* Some Variables required in the macro */ 195 Mat A = aij->A; 196 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 197 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 198 PetscScalar *aa = a->a; 199 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 200 Mat B = aij->B; 201 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 202 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 203 PetscScalar *ba = b->a; 204 205 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 206 int nonew = a->nonew,shift=0; 207 PetscScalar *ap; 208 209 PetscFunctionBegin; 210 for (i=0; i<m; i++) { 211 if (im[i] < 0) continue; 212 #if defined(PETSC_USE_BOPT_g) 213 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 214 #endif 215 if (im[i] >= rstart && im[i] < rend) { 216 row = im[i] - rstart; 217 for (j=0; j<n; j++) { 218 if (in[j] >= cstart && in[j] < cend){ 219 col = in[j] - cstart; 220 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 221 if (ignorezeroentries && value == 0.0) continue; 222 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 223 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 224 } else if (in[j] < 0) continue; 225 #if defined(PETSC_USE_BOPT_g) 226 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 227 #endif 228 else { 229 if (mat->was_assembled) { 230 if (!aij->colmap) { 231 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 232 } 233 #if defined (PETSC_USE_CTABLE) 234 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 235 col--; 236 #else 237 col = aij->colmap[in[j]] - 1; 238 #endif 239 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 240 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 241 col = in[j]; 242 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 243 B = aij->B; 244 b = (Mat_SeqAIJ*)B->data; 245 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 246 ba = b->a; 247 } 248 } else col = in[j]; 249 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 250 if (ignorezeroentries && value == 0.0) continue; 251 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 252 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 253 } 254 } 255 } else { 256 if (!aij->donotstash) { 257 if (roworiented) { 258 if (ignorezeroentries && v[i*n] == 0.0) continue; 259 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 260 } else { 261 if (ignorezeroentries && v[i] == 0.0) continue; 262 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 263 } 264 } 265 } 266 } 267 PetscFunctionReturn(0); 268 } 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatGetValues_MPIAIJ" 272 int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 273 { 274 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 275 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 276 int cstart = aij->cstart,cend = aij->cend,row,col; 277 278 PetscFunctionBegin; 279 for (i=0; i<m; i++) { 280 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 281 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 282 if (idxm[i] >= rstart && idxm[i] < rend) { 283 row = idxm[i] - rstart; 284 for (j=0; j<n; j++) { 285 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 286 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 287 if (idxn[j] >= cstart && idxn[j] < cend){ 288 col = idxn[j] - cstart; 289 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 290 } else { 291 if (!aij->colmap) { 292 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 293 } 294 #if defined (PETSC_USE_CTABLE) 295 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 296 col --; 297 #else 298 col = aij->colmap[idxn[j]] - 1; 299 #endif 300 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 301 else { 302 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 303 } 304 } 305 } 306 } else { 307 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 308 } 309 } 310 PetscFunctionReturn(0); 311 } 312 313 #undef __FUNCT__ 314 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 315 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 316 { 317 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 318 int ierr,nstash,reallocs; 319 InsertMode addv; 320 321 PetscFunctionBegin; 322 if (aij->donotstash) { 323 PetscFunctionReturn(0); 324 } 325 326 /* make sure all processors are either in INSERTMODE or ADDMODE */ 327 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 328 if (addv == (ADD_VALUES|INSERT_VALUES)) { 329 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 330 } 331 mat->insertmode = addv; /* in case this processor had no cache */ 332 333 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 334 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 335 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 336 PetscFunctionReturn(0); 337 } 338 339 340 #undef __FUNCT__ 341 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 342 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 343 { 344 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 345 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 346 int i,j,rstart,ncols,n,ierr,flg; 347 int *row,*col,other_disassembled; 348 PetscScalar *val; 349 InsertMode addv = mat->insertmode; 350 351 PetscFunctionBegin; 352 if (!aij->donotstash) { 353 while (1) { 354 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 355 if (!flg) break; 356 357 for (i=0; i<n;) { 358 /* Now identify the consecutive vals belonging to the same row */ 359 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 360 if (j < n) ncols = j-i; 361 else ncols = n-i; 362 /* Now assemble all these values with a single function call */ 363 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 364 i = j; 365 } 366 } 367 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 368 } 369 370 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 371 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 372 373 /* determine if any processor has disassembled, if so we must 374 also disassemble ourselfs, in order that we may reassemble. */ 375 /* 376 if nonzero structure of submatrix B cannot change then we know that 377 no processor disassembled thus we can skip this stuff 378 */ 379 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 380 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 381 if (mat->was_assembled && !other_disassembled) { 382 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 383 } 384 } 385 386 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 387 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 388 } 389 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 390 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 391 392 if (aij->rowvalues) { 393 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 394 aij->rowvalues = 0; 395 } 396 397 /* used by MatAXPY() */ 398 a->xtoy = 0; b->xtoy = 0; 399 a->XtoY = 0; b->XtoY = 0; 400 401 PetscFunctionReturn(0); 402 } 403 404 #undef __FUNCT__ 405 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 406 int MatZeroEntries_MPIAIJ(Mat A) 407 { 408 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 409 int ierr; 410 411 PetscFunctionBegin; 412 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 413 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNCT__ 418 #define __FUNCT__ "MatZeroRows_MPIAIJ" 419 int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 420 { 421 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 422 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 423 int *nprocs,j,idx,nsends,row; 424 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 425 int *rvalues,tag = A->tag,count,base,slen,n,*source; 426 int *lens,imdex,*lrows,*values,rstart=l->rstart; 427 MPI_Comm comm = A->comm; 428 MPI_Request *send_waits,*recv_waits; 429 MPI_Status recv_status,*send_status; 430 IS istmp; 431 PetscTruth found; 432 433 PetscFunctionBegin; 434 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 435 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 436 437 /* first count number of contributors to each processor */ 438 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 439 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 440 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 441 for (i=0; i<N; i++) { 442 idx = rows[i]; 443 found = PETSC_FALSE; 444 for (j=0; j<size; j++) { 445 if (idx >= owners[j] && idx < owners[j+1]) { 446 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 447 } 448 } 449 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 450 } 451 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 452 453 /* inform other processors of number of messages and max length*/ 454 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 455 456 /* post receives: */ 457 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 458 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 459 for (i=0; i<nrecvs; i++) { 460 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 461 } 462 463 /* do sends: 464 1) starts[i] gives the starting index in svalues for stuff going to 465 the ith processor 466 */ 467 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 468 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 469 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 470 starts[0] = 0; 471 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 472 for (i=0; i<N; i++) { 473 svalues[starts[owner[i]]++] = rows[i]; 474 } 475 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 476 477 starts[0] = 0; 478 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 479 count = 0; 480 for (i=0; i<size; i++) { 481 if (nprocs[2*i+1]) { 482 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 483 } 484 } 485 ierr = PetscFree(starts);CHKERRQ(ierr); 486 487 base = owners[rank]; 488 489 /* wait on receives */ 490 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 491 source = lens + nrecvs; 492 count = nrecvs; slen = 0; 493 while (count) { 494 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 495 /* unpack receives into our local space */ 496 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 497 source[imdex] = recv_status.MPI_SOURCE; 498 lens[imdex] = n; 499 slen += n; 500 count--; 501 } 502 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 503 504 /* move the data into the send scatter */ 505 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 506 count = 0; 507 for (i=0; i<nrecvs; i++) { 508 values = rvalues + i*nmax; 509 for (j=0; j<lens[i]; j++) { 510 lrows[count++] = values[j] - base; 511 } 512 } 513 ierr = PetscFree(rvalues);CHKERRQ(ierr); 514 ierr = PetscFree(lens);CHKERRQ(ierr); 515 ierr = PetscFree(owner);CHKERRQ(ierr); 516 ierr = PetscFree(nprocs);CHKERRQ(ierr); 517 518 /* actually zap the local rows */ 519 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 520 PetscLogObjectParent(A,istmp); 521 522 /* 523 Zero the required rows. If the "diagonal block" of the matrix 524 is square and the user wishes to set the diagonal we use seperate 525 code so that MatSetValues() is not called for each diagonal allocating 526 new memory, thus calling lots of mallocs and slowing things down. 527 528 Contributed by: Mathew Knepley 529 */ 530 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 531 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 532 if (diag && (l->A->M == l->A->N)) { 533 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 534 } else if (diag) { 535 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 536 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 537 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 538 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 539 } 540 for (i = 0; i < slen; i++) { 541 row = lrows[i] + rstart; 542 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 543 } 544 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 545 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 546 } else { 547 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 548 } 549 ierr = ISDestroy(istmp);CHKERRQ(ierr); 550 ierr = PetscFree(lrows);CHKERRQ(ierr); 551 552 /* wait on sends */ 553 if (nsends) { 554 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 555 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 556 ierr = PetscFree(send_status);CHKERRQ(ierr); 557 } 558 ierr = PetscFree(send_waits);CHKERRQ(ierr); 559 ierr = PetscFree(svalues);CHKERRQ(ierr); 560 561 PetscFunctionReturn(0); 562 } 563 564 #undef __FUNCT__ 565 #define __FUNCT__ "MatMult_MPIAIJ" 566 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 567 { 568 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 569 int ierr,nt; 570 571 PetscFunctionBegin; 572 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 573 if (nt != A->n) { 574 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 575 } 576 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 577 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 578 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 579 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 580 PetscFunctionReturn(0); 581 } 582 583 #undef __FUNCT__ 584 #define __FUNCT__ "MatMultAdd_MPIAIJ" 585 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 586 { 587 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 588 int ierr; 589 590 PetscFunctionBegin; 591 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 592 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 593 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 594 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 595 PetscFunctionReturn(0); 596 } 597 598 #undef __FUNCT__ 599 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 600 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 601 { 602 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 603 int ierr; 604 605 PetscFunctionBegin; 606 /* do nondiagonal part */ 607 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 608 /* send it on its way */ 609 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 610 /* do local part */ 611 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 612 /* receive remote parts: note this assumes the values are not actually */ 613 /* inserted in yy until the next line, which is true for my implementation*/ 614 /* but is not perhaps always true. */ 615 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 616 PetscFunctionReturn(0); 617 } 618 619 EXTERN_C_BEGIN 620 #undef __FUNCT__ 621 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 622 int MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f) 623 { 624 MPI_Comm comm; 625 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 626 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 627 IS Me,Notme; 628 int M,N,first,last,*notme,ntids,i, ierr; 629 630 PetscFunctionBegin; 631 632 /* Easy test: symmetric diagonal block */ 633 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 634 ierr = MatIsTranspose(Adia,Bdia,f); CHKERRQ(ierr); 635 if (!*f) PetscFunctionReturn(0); 636 ierr = PetscObjectGetComm((PetscObject)Amat,&comm); CHKERRQ(ierr); 637 ierr = MPI_Comm_size(comm,&ntids); CHKERRQ(ierr); 638 if (ntids==1) PetscFunctionReturn(0); 639 640 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 641 ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr); 642 ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr); 643 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me); CHKERRQ(ierr); 644 for (i=0; i<first; i++) notme[i] = i; 645 for (i=last; i<M; i++) notme[i-last+first] = i; 646 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr); 647 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr); 648 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs); CHKERRQ(ierr); 649 Aoff = Aoffs[0]; 650 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs); CHKERRQ(ierr); 651 Boff = Boffs[0]; 652 ierr = MatIsTranspose(Aoff,Boff,f); CHKERRQ(ierr); 653 ierr = MatDestroyMatrices(1,&Aoffs); CHKERRQ(ierr); 654 ierr = MatDestroyMatrices(1,&Boffs); CHKERRQ(ierr); 655 ierr = ISDestroy(Me); CHKERRQ(ierr); 656 ierr = ISDestroy(Notme); CHKERRQ(ierr); 657 658 PetscFunctionReturn(0); 659 } 660 EXTERN_C_END 661 662 #undef __FUNCT__ 663 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 664 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 665 { 666 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 667 int ierr; 668 669 PetscFunctionBegin; 670 /* do nondiagonal part */ 671 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 672 /* send it on its way */ 673 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 674 /* do local part */ 675 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 676 /* receive remote parts: note this assumes the values are not actually */ 677 /* inserted in yy until the next line, which is true for my implementation*/ 678 /* but is not perhaps always true. */ 679 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 680 PetscFunctionReturn(0); 681 } 682 683 /* 684 This only works correctly for square matrices where the subblock A->A is the 685 diagonal block 686 */ 687 #undef __FUNCT__ 688 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 689 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 690 { 691 int ierr; 692 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 693 694 PetscFunctionBegin; 695 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 696 if (a->rstart != a->cstart || a->rend != a->cend) { 697 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 698 } 699 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 700 PetscFunctionReturn(0); 701 } 702 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatScale_MPIAIJ" 705 int MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 706 { 707 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 708 int ierr; 709 710 PetscFunctionBegin; 711 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 712 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 713 PetscFunctionReturn(0); 714 } 715 716 #undef __FUNCT__ 717 #define __FUNCT__ "MatDestroy_MPIAIJ" 718 int MatDestroy_MPIAIJ(Mat mat) 719 { 720 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 721 int ierr; 722 723 PetscFunctionBegin; 724 #if defined(PETSC_USE_LOG) 725 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 726 #endif 727 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 728 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 729 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 730 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 731 #if defined (PETSC_USE_CTABLE) 732 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 733 #else 734 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 735 #endif 736 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 737 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 738 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 739 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 740 ierr = PetscFree(aij);CHKERRQ(ierr); 741 PetscFunctionReturn(0); 742 } 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "MatView_MPIAIJ_Binary" 746 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 747 { 748 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 749 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 750 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 751 int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 752 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 753 PetscScalar *column_values; 754 755 PetscFunctionBegin; 756 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 757 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 758 nz = A->nz + B->nz; 759 if (rank == 0) { 760 header[0] = MAT_FILE_COOKIE; 761 header[1] = mat->M; 762 header[2] = mat->N; 763 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 764 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 765 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 766 /* get largest number of rows any processor has */ 767 rlen = mat->m; 768 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 769 for (i=1; i<size; i++) { 770 rlen = PetscMax(rlen,range[i+1] - range[i]); 771 } 772 } else { 773 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 774 rlen = mat->m; 775 } 776 777 /* load up the local row counts */ 778 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 779 for (i=0; i<mat->m; i++) { 780 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 781 } 782 783 /* store the row lengths to the file */ 784 if (rank == 0) { 785 MPI_Status status; 786 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 787 for (i=1; i<size; i++) { 788 rlen = range[i+1] - range[i]; 789 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 790 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 791 } 792 } else { 793 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 794 } 795 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 796 797 /* load up the local column indices */ 798 nzmax = nz; /* )th processor needs space a largest processor needs */ 799 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 800 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 801 cnt = 0; 802 for (i=0; i<mat->m; i++) { 803 for (j=B->i[i]; j<B->i[i+1]; j++) { 804 if ( (col = garray[B->j[j]]) > cstart) break; 805 column_indices[cnt++] = col; 806 } 807 for (k=A->i[i]; k<A->i[i+1]; k++) { 808 column_indices[cnt++] = A->j[k] + cstart; 809 } 810 for (; j<B->i[i+1]; j++) { 811 column_indices[cnt++] = garray[B->j[j]]; 812 } 813 } 814 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 815 816 /* store the column indices to the file */ 817 if (rank == 0) { 818 MPI_Status status; 819 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 820 for (i=1; i<size; i++) { 821 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 822 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 823 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 824 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 825 } 826 } else { 827 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 828 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 829 } 830 ierr = PetscFree(column_indices);CHKERRQ(ierr); 831 832 /* load up the local column values */ 833 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 834 cnt = 0; 835 for (i=0; i<mat->m; i++) { 836 for (j=B->i[i]; j<B->i[i+1]; j++) { 837 if ( garray[B->j[j]] > cstart) break; 838 column_values[cnt++] = B->a[j]; 839 } 840 for (k=A->i[i]; k<A->i[i+1]; k++) { 841 column_values[cnt++] = A->a[k]; 842 } 843 for (; j<B->i[i+1]; j++) { 844 column_values[cnt++] = B->a[j]; 845 } 846 } 847 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 848 849 /* store the column values to the file */ 850 if (rank == 0) { 851 MPI_Status status; 852 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 853 for (i=1; i<size; i++) { 854 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 855 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 856 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 857 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 858 } 859 } else { 860 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 861 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 862 } 863 ierr = PetscFree(column_values);CHKERRQ(ierr); 864 PetscFunctionReturn(0); 865 } 866 867 #undef __FUNCT__ 868 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 869 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 870 { 871 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 872 int ierr,rank = aij->rank,size = aij->size; 873 PetscTruth isdraw,isascii,flg,isbinary; 874 PetscViewer sviewer; 875 PetscViewerFormat format; 876 877 PetscFunctionBegin; 878 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 880 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 881 if (isascii) { 882 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 883 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 884 MatInfo info; 885 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 886 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 887 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 888 if (flg) { 889 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 890 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 891 } else { 892 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 893 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 894 } 895 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 896 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 897 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 898 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 899 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 900 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } else if (format == PETSC_VIEWER_ASCII_INFO) { 903 PetscFunctionReturn(0); 904 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 905 PetscFunctionReturn(0); 906 } 907 } else if (isbinary) { 908 if (size == 1) { 909 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 910 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 911 } else { 912 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 913 } 914 PetscFunctionReturn(0); 915 } else if (isdraw) { 916 PetscDraw draw; 917 PetscTruth isnull; 918 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 919 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 920 } 921 922 if (size == 1) { 923 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 924 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 925 } else { 926 /* assemble the entire matrix onto first processor. */ 927 Mat A; 928 Mat_SeqAIJ *Aloc; 929 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 930 PetscScalar *a; 931 932 if (!rank) { 933 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 934 } else { 935 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 936 } 937 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 938 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 939 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 940 PetscLogObjectParent(mat,A); 941 942 /* copy over the A part */ 943 Aloc = (Mat_SeqAIJ*)aij->A->data; 944 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 945 row = aij->rstart; 946 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 947 for (i=0; i<m; i++) { 948 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 949 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 950 } 951 aj = Aloc->j; 952 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 953 954 /* copy over the B part */ 955 Aloc = (Mat_SeqAIJ*)aij->B->data; 956 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 957 row = aij->rstart; 958 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 959 ct = cols; 960 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 961 for (i=0; i<m; i++) { 962 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 963 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 964 } 965 ierr = PetscFree(ct);CHKERRQ(ierr); 966 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 967 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 968 /* 969 Everyone has to call to draw the matrix since the graphics waits are 970 synchronized across all processors that share the PetscDraw object 971 */ 972 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 973 if (!rank) { 974 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 975 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 976 } 977 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 978 ierr = MatDestroy(A);CHKERRQ(ierr); 979 } 980 PetscFunctionReturn(0); 981 } 982 983 #undef __FUNCT__ 984 #define __FUNCT__ "MatView_MPIAIJ" 985 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 986 { 987 int ierr; 988 PetscTruth isascii,isdraw,issocket,isbinary; 989 990 PetscFunctionBegin; 991 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 992 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 993 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 994 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 995 if (isascii || isdraw || isbinary || issocket) { 996 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 997 } else { 998 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 999 } 1000 PetscFunctionReturn(0); 1001 } 1002 1003 1004 1005 #undef __FUNCT__ 1006 #define __FUNCT__ "MatRelax_MPIAIJ" 1007 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1008 { 1009 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1010 int ierr; 1011 Vec bb1; 1012 PetscScalar mone=-1.0; 1013 1014 PetscFunctionBegin; 1015 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1016 1017 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1018 1019 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1020 if (flag & SOR_ZERO_INITIAL_GUESS) { 1021 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1022 its--; 1023 } 1024 1025 while (its--) { 1026 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1027 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1028 1029 /* update rhs: bb1 = bb - B*x */ 1030 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1031 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1032 1033 /* local sweep */ 1034 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1035 CHKERRQ(ierr); 1036 } 1037 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1038 if (flag & SOR_ZERO_INITIAL_GUESS) { 1039 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1040 its--; 1041 } 1042 while (its--) { 1043 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1044 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1045 1046 /* update rhs: bb1 = bb - B*x */ 1047 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1048 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1049 1050 /* local sweep */ 1051 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1052 CHKERRQ(ierr); 1053 } 1054 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1055 if (flag & SOR_ZERO_INITIAL_GUESS) { 1056 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1057 its--; 1058 } 1059 while (its--) { 1060 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1061 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1062 1063 /* update rhs: bb1 = bb - B*x */ 1064 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1065 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1066 1067 /* local sweep */ 1068 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1069 CHKERRQ(ierr); 1070 } 1071 } else { 1072 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1073 } 1074 1075 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1076 PetscFunctionReturn(0); 1077 } 1078 1079 #undef __FUNCT__ 1080 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1081 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1082 { 1083 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1084 Mat A = mat->A,B = mat->B; 1085 int ierr; 1086 PetscReal isend[5],irecv[5]; 1087 1088 PetscFunctionBegin; 1089 info->block_size = 1.0; 1090 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1091 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1092 isend[3] = info->memory; isend[4] = info->mallocs; 1093 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1094 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1095 isend[3] += info->memory; isend[4] += info->mallocs; 1096 if (flag == MAT_LOCAL) { 1097 info->nz_used = isend[0]; 1098 info->nz_allocated = isend[1]; 1099 info->nz_unneeded = isend[2]; 1100 info->memory = isend[3]; 1101 info->mallocs = isend[4]; 1102 } else if (flag == MAT_GLOBAL_MAX) { 1103 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1104 info->nz_used = irecv[0]; 1105 info->nz_allocated = irecv[1]; 1106 info->nz_unneeded = irecv[2]; 1107 info->memory = irecv[3]; 1108 info->mallocs = irecv[4]; 1109 } else if (flag == MAT_GLOBAL_SUM) { 1110 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1111 info->nz_used = irecv[0]; 1112 info->nz_allocated = irecv[1]; 1113 info->nz_unneeded = irecv[2]; 1114 info->memory = irecv[3]; 1115 info->mallocs = irecv[4]; 1116 } 1117 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1118 info->fill_ratio_needed = 0; 1119 info->factor_mallocs = 0; 1120 info->rows_global = (double)matin->M; 1121 info->columns_global = (double)matin->N; 1122 info->rows_local = (double)matin->m; 1123 info->columns_local = (double)matin->N; 1124 1125 PetscFunctionReturn(0); 1126 } 1127 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "MatSetOption_MPIAIJ" 1130 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1131 { 1132 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1133 int ierr; 1134 1135 PetscFunctionBegin; 1136 switch (op) { 1137 case MAT_NO_NEW_NONZERO_LOCATIONS: 1138 case MAT_YES_NEW_NONZERO_LOCATIONS: 1139 case MAT_COLUMNS_UNSORTED: 1140 case MAT_COLUMNS_SORTED: 1141 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1142 case MAT_KEEP_ZEROED_ROWS: 1143 case MAT_NEW_NONZERO_LOCATION_ERR: 1144 case MAT_USE_INODES: 1145 case MAT_DO_NOT_USE_INODES: 1146 case MAT_IGNORE_ZERO_ENTRIES: 1147 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1148 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1149 break; 1150 case MAT_ROW_ORIENTED: 1151 a->roworiented = PETSC_TRUE; 1152 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1153 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1154 break; 1155 case MAT_ROWS_SORTED: 1156 case MAT_ROWS_UNSORTED: 1157 case MAT_YES_NEW_DIAGONALS: 1158 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1159 break; 1160 case MAT_COLUMN_ORIENTED: 1161 a->roworiented = PETSC_FALSE; 1162 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1163 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1164 break; 1165 case MAT_IGNORE_OFF_PROC_ENTRIES: 1166 a->donotstash = PETSC_TRUE; 1167 break; 1168 case MAT_NO_NEW_DIAGONALS: 1169 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1170 case MAT_SYMMETRIC: 1171 case MAT_STRUCTURALLY_SYMMETRIC: 1172 case MAT_HERMITIAN: 1173 case MAT_SYMMETRY_ETERNAL: 1174 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1175 break; 1176 case MAT_NOT_SYMMETRIC: 1177 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1178 case MAT_NOT_HERMITIAN: 1179 case MAT_NOT_SYMMETRY_ETERNAL: 1180 break; 1181 default: 1182 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1183 } 1184 PetscFunctionReturn(0); 1185 } 1186 1187 #undef __FUNCT__ 1188 #define __FUNCT__ "MatGetRow_MPIAIJ" 1189 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1190 { 1191 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1192 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1193 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1194 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1195 int *cmap,*idx_p; 1196 1197 PetscFunctionBegin; 1198 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1199 mat->getrowactive = PETSC_TRUE; 1200 1201 if (!mat->rowvalues && (idx || v)) { 1202 /* 1203 allocate enough space to hold information from the longest row. 1204 */ 1205 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1206 int max = 1,tmp; 1207 for (i=0; i<matin->m; i++) { 1208 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1209 if (max < tmp) { max = tmp; } 1210 } 1211 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1212 mat->rowindices = (int*)(mat->rowvalues + max); 1213 } 1214 1215 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1216 lrow = row - rstart; 1217 1218 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1219 if (!v) {pvA = 0; pvB = 0;} 1220 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1221 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1222 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1223 nztot = nzA + nzB; 1224 1225 cmap = mat->garray; 1226 if (v || idx) { 1227 if (nztot) { 1228 /* Sort by increasing column numbers, assuming A and B already sorted */ 1229 int imark = -1; 1230 if (v) { 1231 *v = v_p = mat->rowvalues; 1232 for (i=0; i<nzB; i++) { 1233 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1234 else break; 1235 } 1236 imark = i; 1237 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1238 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1239 } 1240 if (idx) { 1241 *idx = idx_p = mat->rowindices; 1242 if (imark > -1) { 1243 for (i=0; i<imark; i++) { 1244 idx_p[i] = cmap[cworkB[i]]; 1245 } 1246 } else { 1247 for (i=0; i<nzB; i++) { 1248 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1249 else break; 1250 } 1251 imark = i; 1252 } 1253 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1254 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1255 } 1256 } else { 1257 if (idx) *idx = 0; 1258 if (v) *v = 0; 1259 } 1260 } 1261 *nz = nztot; 1262 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1263 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1264 PetscFunctionReturn(0); 1265 } 1266 1267 #undef __FUNCT__ 1268 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1269 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1270 { 1271 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1272 1273 PetscFunctionBegin; 1274 if (aij->getrowactive == PETSC_FALSE) { 1275 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1276 } 1277 aij->getrowactive = PETSC_FALSE; 1278 PetscFunctionReturn(0); 1279 } 1280 1281 #undef __FUNCT__ 1282 #define __FUNCT__ "MatNorm_MPIAIJ" 1283 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1284 { 1285 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1286 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1287 int ierr,i,j,cstart = aij->cstart; 1288 PetscReal sum = 0.0; 1289 PetscScalar *v; 1290 1291 PetscFunctionBegin; 1292 if (aij->size == 1) { 1293 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1294 } else { 1295 if (type == NORM_FROBENIUS) { 1296 v = amat->a; 1297 for (i=0; i<amat->nz; i++) { 1298 #if defined(PETSC_USE_COMPLEX) 1299 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1300 #else 1301 sum += (*v)*(*v); v++; 1302 #endif 1303 } 1304 v = bmat->a; 1305 for (i=0; i<bmat->nz; i++) { 1306 #if defined(PETSC_USE_COMPLEX) 1307 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1308 #else 1309 sum += (*v)*(*v); v++; 1310 #endif 1311 } 1312 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1313 *norm = sqrt(*norm); 1314 } else if (type == NORM_1) { /* max column norm */ 1315 PetscReal *tmp,*tmp2; 1316 int *jj,*garray = aij->garray; 1317 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1318 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1319 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1320 *norm = 0.0; 1321 v = amat->a; jj = amat->j; 1322 for (j=0; j<amat->nz; j++) { 1323 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1324 } 1325 v = bmat->a; jj = bmat->j; 1326 for (j=0; j<bmat->nz; j++) { 1327 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1328 } 1329 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1330 for (j=0; j<mat->N; j++) { 1331 if (tmp2[j] > *norm) *norm = tmp2[j]; 1332 } 1333 ierr = PetscFree(tmp);CHKERRQ(ierr); 1334 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1335 } else if (type == NORM_INFINITY) { /* max row norm */ 1336 PetscReal ntemp = 0.0; 1337 for (j=0; j<aij->A->m; j++) { 1338 v = amat->a + amat->i[j]; 1339 sum = 0.0; 1340 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1341 sum += PetscAbsScalar(*v); v++; 1342 } 1343 v = bmat->a + bmat->i[j]; 1344 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1345 sum += PetscAbsScalar(*v); v++; 1346 } 1347 if (sum > ntemp) ntemp = sum; 1348 } 1349 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1350 } else { 1351 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1352 } 1353 } 1354 PetscFunctionReturn(0); 1355 } 1356 1357 #undef __FUNCT__ 1358 #define __FUNCT__ "MatTranspose_MPIAIJ" 1359 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1360 { 1361 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1362 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1363 int ierr; 1364 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1365 Mat B; 1366 PetscScalar *array; 1367 1368 PetscFunctionBegin; 1369 if (!matout && M != N) { 1370 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1371 } 1372 1373 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1374 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1375 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1376 1377 /* copy over the A part */ 1378 Aloc = (Mat_SeqAIJ*)a->A->data; 1379 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1380 row = a->rstart; 1381 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1382 for (i=0; i<m; i++) { 1383 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1384 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1385 } 1386 aj = Aloc->j; 1387 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1388 1389 /* copy over the B part */ 1390 Aloc = (Mat_SeqAIJ*)a->B->data; 1391 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1392 row = a->rstart; 1393 ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr); 1394 ct = cols; 1395 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1396 for (i=0; i<m; i++) { 1397 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1398 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1399 } 1400 ierr = PetscFree(ct);CHKERRQ(ierr); 1401 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1402 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1403 if (matout) { 1404 *matout = B; 1405 } else { 1406 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1407 } 1408 PetscFunctionReturn(0); 1409 } 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1413 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1414 { 1415 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1416 Mat a = aij->A,b = aij->B; 1417 int ierr,s1,s2,s3; 1418 1419 PetscFunctionBegin; 1420 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1421 if (rr) { 1422 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1423 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1424 /* Overlap communication with computation. */ 1425 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1426 } 1427 if (ll) { 1428 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1429 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1430 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1431 } 1432 /* scale the diagonal block */ 1433 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1434 1435 if (rr) { 1436 /* Do a scatter end and then right scale the off-diagonal block */ 1437 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1438 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1439 } 1440 1441 PetscFunctionReturn(0); 1442 } 1443 1444 1445 #undef __FUNCT__ 1446 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1447 int MatPrintHelp_MPIAIJ(Mat A) 1448 { 1449 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1450 int ierr; 1451 1452 PetscFunctionBegin; 1453 if (!a->rank) { 1454 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1455 } 1456 PetscFunctionReturn(0); 1457 } 1458 1459 #undef __FUNCT__ 1460 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1461 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1462 { 1463 PetscFunctionBegin; 1464 *bs = 1; 1465 PetscFunctionReturn(0); 1466 } 1467 #undef __FUNCT__ 1468 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1469 int MatSetUnfactored_MPIAIJ(Mat A) 1470 { 1471 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1472 int ierr; 1473 1474 PetscFunctionBegin; 1475 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1476 PetscFunctionReturn(0); 1477 } 1478 1479 #undef __FUNCT__ 1480 #define __FUNCT__ "MatEqual_MPIAIJ" 1481 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1482 { 1483 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1484 Mat a,b,c,d; 1485 PetscTruth flg; 1486 int ierr; 1487 1488 PetscFunctionBegin; 1489 a = matA->A; b = matA->B; 1490 c = matB->A; d = matB->B; 1491 1492 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1493 if (flg == PETSC_TRUE) { 1494 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1495 } 1496 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1497 PetscFunctionReturn(0); 1498 } 1499 1500 #undef __FUNCT__ 1501 #define __FUNCT__ "MatCopy_MPIAIJ" 1502 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1503 { 1504 int ierr; 1505 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1506 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1507 1508 PetscFunctionBegin; 1509 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1510 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1511 /* because of the column compression in the off-processor part of the matrix a->B, 1512 the number of columns in a->B and b->B may be different, hence we cannot call 1513 the MatCopy() directly on the two parts. If need be, we can provide a more 1514 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1515 then copying the submatrices */ 1516 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1517 } else { 1518 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1519 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1520 } 1521 PetscFunctionReturn(0); 1522 } 1523 1524 #undef __FUNCT__ 1525 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1526 int MatSetUpPreallocation_MPIAIJ(Mat A) 1527 { 1528 int ierr; 1529 1530 PetscFunctionBegin; 1531 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1532 PetscFunctionReturn(0); 1533 } 1534 1535 #include "petscblaslapack.h" 1536 #undef __FUNCT__ 1537 #define __FUNCT__ "MatAXPY_MPIAIJ" 1538 int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1539 { 1540 int ierr,one=1,i; 1541 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1542 Mat_SeqAIJ *x,*y; 1543 1544 PetscFunctionBegin; 1545 if (str == SAME_NONZERO_PATTERN) { 1546 x = (Mat_SeqAIJ *)xx->A->data; 1547 y = (Mat_SeqAIJ *)yy->A->data; 1548 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1549 x = (Mat_SeqAIJ *)xx->B->data; 1550 y = (Mat_SeqAIJ *)yy->B->data; 1551 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1552 } else if (str == SUBSET_NONZERO_PATTERN) { 1553 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1554 1555 x = (Mat_SeqAIJ *)xx->B->data; 1556 y = (Mat_SeqAIJ *)yy->B->data; 1557 if (y->xtoy && y->XtoY != xx->B) { 1558 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1559 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1560 } 1561 if (!y->xtoy) { /* get xtoy */ 1562 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1563 y->XtoY = xx->B; 1564 } 1565 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1566 } else { 1567 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1568 } 1569 PetscFunctionReturn(0); 1570 } 1571 1572 /* -------------------------------------------------------------------*/ 1573 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1574 MatGetRow_MPIAIJ, 1575 MatRestoreRow_MPIAIJ, 1576 MatMult_MPIAIJ, 1577 /* 4*/ MatMultAdd_MPIAIJ, 1578 MatMultTranspose_MPIAIJ, 1579 MatMultTransposeAdd_MPIAIJ, 1580 0, 1581 0, 1582 0, 1583 /*10*/ 0, 1584 0, 1585 0, 1586 MatRelax_MPIAIJ, 1587 MatTranspose_MPIAIJ, 1588 /*15*/ MatGetInfo_MPIAIJ, 1589 MatEqual_MPIAIJ, 1590 MatGetDiagonal_MPIAIJ, 1591 MatDiagonalScale_MPIAIJ, 1592 MatNorm_MPIAIJ, 1593 /*20*/ MatAssemblyBegin_MPIAIJ, 1594 MatAssemblyEnd_MPIAIJ, 1595 0, 1596 MatSetOption_MPIAIJ, 1597 MatZeroEntries_MPIAIJ, 1598 /*25*/ MatZeroRows_MPIAIJ, 1599 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1600 MatLUFactorSymbolic_MPIAIJ_TFS, 1601 #else 1602 0, 1603 #endif 1604 0, 1605 0, 1606 0, 1607 /*30*/ MatSetUpPreallocation_MPIAIJ, 1608 0, 1609 0, 1610 0, 1611 0, 1612 /*35*/ MatDuplicate_MPIAIJ, 1613 0, 1614 0, 1615 0, 1616 0, 1617 /*40*/ MatAXPY_MPIAIJ, 1618 MatGetSubMatrices_MPIAIJ, 1619 MatIncreaseOverlap_MPIAIJ, 1620 MatGetValues_MPIAIJ, 1621 MatCopy_MPIAIJ, 1622 /*45*/ MatPrintHelp_MPIAIJ, 1623 MatScale_MPIAIJ, 1624 0, 1625 0, 1626 0, 1627 /*50*/ MatGetBlockSize_MPIAIJ, 1628 0, 1629 0, 1630 0, 1631 0, 1632 /*55*/ MatFDColoringCreate_MPIAIJ, 1633 0, 1634 MatSetUnfactored_MPIAIJ, 1635 0, 1636 0, 1637 /*60*/ MatGetSubMatrix_MPIAIJ, 1638 MatDestroy_MPIAIJ, 1639 MatView_MPIAIJ, 1640 MatGetPetscMaps_Petsc, 1641 0, 1642 /*65*/ 0, 1643 0, 1644 0, 1645 0, 1646 0, 1647 /*70*/ 0, 1648 0, 1649 MatSetColoring_MPIAIJ, 1650 MatSetValuesAdic_MPIAIJ, 1651 MatSetValuesAdifor_MPIAIJ, 1652 /*75*/ 0, 1653 0, 1654 0, 1655 0, 1656 0, 1657 /*80*/ 0, 1658 0, 1659 0, 1660 0, 1661 /*85*/ MatLoad_MPIAIJ}; 1662 1663 /* ----------------------------------------------------------------------------------------*/ 1664 1665 EXTERN_C_BEGIN 1666 #undef __FUNCT__ 1667 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1668 int MatStoreValues_MPIAIJ(Mat mat) 1669 { 1670 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1671 int ierr; 1672 1673 PetscFunctionBegin; 1674 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1675 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1676 PetscFunctionReturn(0); 1677 } 1678 EXTERN_C_END 1679 1680 EXTERN_C_BEGIN 1681 #undef __FUNCT__ 1682 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1683 int MatRetrieveValues_MPIAIJ(Mat mat) 1684 { 1685 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1686 int ierr; 1687 1688 PetscFunctionBegin; 1689 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1690 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1691 PetscFunctionReturn(0); 1692 } 1693 EXTERN_C_END 1694 1695 #include "petscpc.h" 1696 EXTERN_C_BEGIN 1697 #undef __FUNCT__ 1698 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1699 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1700 { 1701 Mat_MPIAIJ *b; 1702 int ierr,i; 1703 1704 PetscFunctionBegin; 1705 B->preallocated = PETSC_TRUE; 1706 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1707 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1708 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1709 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1710 if (d_nnz) { 1711 for (i=0; i<B->m; i++) { 1712 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1713 } 1714 } 1715 if (o_nnz) { 1716 for (i=0; i<B->m; i++) { 1717 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1718 } 1719 } 1720 b = (Mat_MPIAIJ*)B->data; 1721 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1722 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1723 1724 PetscFunctionReturn(0); 1725 } 1726 EXTERN_C_END 1727 1728 /*MC 1729 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 1730 1731 Options Database Keys: 1732 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 1733 1734 Level: beginner 1735 1736 .seealso: MatCreateMPIAIJ 1737 M*/ 1738 1739 EXTERN_C_BEGIN 1740 #undef __FUNCT__ 1741 #define __FUNCT__ "MatCreate_MPIAIJ" 1742 int MatCreate_MPIAIJ(Mat B) 1743 { 1744 Mat_MPIAIJ *b; 1745 int ierr,i,size; 1746 1747 PetscFunctionBegin; 1748 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1749 1750 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1751 B->data = (void*)b; 1752 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1753 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1754 B->factor = 0; 1755 B->assembled = PETSC_FALSE; 1756 B->mapping = 0; 1757 1758 B->insertmode = NOT_SET_VALUES; 1759 b->size = size; 1760 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1761 1762 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1763 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1764 1765 /* the information in the maps duplicates the information computed below, eventually 1766 we should remove the duplicate information that is not contained in the maps */ 1767 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1768 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1769 1770 /* build local table of row and column ownerships */ 1771 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1772 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1773 b->cowners = b->rowners + b->size + 2; 1774 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1775 b->rowners[0] = 0; 1776 for (i=2; i<=b->size; i++) { 1777 b->rowners[i] += b->rowners[i-1]; 1778 } 1779 b->rstart = b->rowners[b->rank]; 1780 b->rend = b->rowners[b->rank+1]; 1781 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1782 b->cowners[0] = 0; 1783 for (i=2; i<=b->size; i++) { 1784 b->cowners[i] += b->cowners[i-1]; 1785 } 1786 b->cstart = b->cowners[b->rank]; 1787 b->cend = b->cowners[b->rank+1]; 1788 1789 /* build cache for off array entries formed */ 1790 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1791 b->donotstash = PETSC_FALSE; 1792 b->colmap = 0; 1793 b->garray = 0; 1794 b->roworiented = PETSC_TRUE; 1795 1796 /* stuff used for matrix vector multiply */ 1797 b->lvec = PETSC_NULL; 1798 b->Mvctx = PETSC_NULL; 1799 1800 /* stuff for MatGetRow() */ 1801 b->rowindices = 0; 1802 b->rowvalues = 0; 1803 b->getrowactive = PETSC_FALSE; 1804 1805 /* Explicitly create 2 MATSEQAIJ matrices. */ 1806 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 1807 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 1808 PetscLogObjectParent(B,b->A); 1809 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 1810 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 1811 PetscLogObjectParent(B,b->B); 1812 1813 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1814 "MatStoreValues_MPIAIJ", 1815 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1816 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1817 "MatRetrieveValues_MPIAIJ", 1818 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1819 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1820 "MatGetDiagonalBlock_MPIAIJ", 1821 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1822 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 1823 "MatIsTranspose_MPIAIJ", 1824 MatIsTranspose_MPIAIJ); CHKERRQ(ierr); 1825 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 1826 "MatMPIAIJSetPreallocation_MPIAIJ", 1827 MatMPIAIJSetPreallocation_MPIAIJ); CHKERRQ(ierr); 1828 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 1829 "MatDiagonalScaleLocal_MPIAIJ", 1830 MatDiagonalScaleLocal_MPIAIJ); CHKERRQ(ierr); 1831 PetscFunctionReturn(0); 1832 } 1833 EXTERN_C_END 1834 1835 /*MC 1836 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1837 1838 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1839 and MATMPIAIJ otherwise. As a result, for single process communicators, 1840 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 1841 for communicators controlling multiple processes. It is recommended that you call both of 1842 the above preallocation routines for simplicity. 1843 1844 Options Database Keys: 1845 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 1846 1847 Level: beginner 1848 1849 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ 1850 M*/ 1851 1852 EXTERN_C_BEGIN 1853 #undef __FUNCT__ 1854 #define __FUNCT__ "MatCreate_AIJ" 1855 int MatCreate_AIJ(Mat A) { 1856 int ierr,size; 1857 1858 PetscFunctionBegin; 1859 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr); 1860 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1861 if (size == 1) { 1862 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1863 } else { 1864 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1865 } 1866 PetscFunctionReturn(0); 1867 } 1868 EXTERN_C_END 1869 1870 #undef __FUNCT__ 1871 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1872 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1873 { 1874 Mat mat; 1875 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1876 int ierr; 1877 1878 PetscFunctionBegin; 1879 *newmat = 0; 1880 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1881 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1882 a = (Mat_MPIAIJ*)mat->data; 1883 1884 mat->factor = matin->factor; 1885 mat->assembled = PETSC_TRUE; 1886 mat->insertmode = NOT_SET_VALUES; 1887 mat->preallocated = PETSC_TRUE; 1888 1889 a->rstart = oldmat->rstart; 1890 a->rend = oldmat->rend; 1891 a->cstart = oldmat->cstart; 1892 a->cend = oldmat->cend; 1893 a->size = oldmat->size; 1894 a->rank = oldmat->rank; 1895 a->donotstash = oldmat->donotstash; 1896 a->roworiented = oldmat->roworiented; 1897 a->rowindices = 0; 1898 a->rowvalues = 0; 1899 a->getrowactive = PETSC_FALSE; 1900 1901 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1902 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1903 if (oldmat->colmap) { 1904 #if defined (PETSC_USE_CTABLE) 1905 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1906 #else 1907 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1908 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1909 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1910 #endif 1911 } else a->colmap = 0; 1912 if (oldmat->garray) { 1913 int len; 1914 len = oldmat->B->n; 1915 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1916 PetscLogObjectMemory(mat,len*sizeof(int)); 1917 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1918 } else a->garray = 0; 1919 1920 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1921 PetscLogObjectParent(mat,a->lvec); 1922 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1923 PetscLogObjectParent(mat,a->Mvctx); 1924 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1925 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1926 PetscLogObjectParent(mat,a->A); 1927 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1928 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1929 PetscLogObjectParent(mat,a->B); 1930 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1931 *newmat = mat; 1932 PetscFunctionReturn(0); 1933 } 1934 1935 #include "petscsys.h" 1936 1937 #undef __FUNCT__ 1938 #define __FUNCT__ "MatLoad_MPIAIJ" 1939 int MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1940 { 1941 Mat A; 1942 PetscScalar *vals,*svals; 1943 MPI_Comm comm = ((PetscObject)viewer)->comm; 1944 MPI_Status status; 1945 int i,nz,ierr,j,rstart,rend,fd; 1946 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1947 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1948 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1949 1950 PetscFunctionBegin; 1951 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1952 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1953 if (!rank) { 1954 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1955 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1956 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1957 if (header[3] < 0) { 1958 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1959 } 1960 } 1961 1962 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1963 M = header[1]; N = header[2]; 1964 /* determine ownership of all rows */ 1965 m = M/size + ((M % size) > rank); 1966 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1967 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1968 rowners[0] = 0; 1969 for (i=2; i<=size; i++) { 1970 rowners[i] += rowners[i-1]; 1971 } 1972 rstart = rowners[rank]; 1973 rend = rowners[rank+1]; 1974 1975 /* distribute row lengths to all processors */ 1976 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1977 offlens = ourlens + (rend-rstart); 1978 if (!rank) { 1979 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1980 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1981 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1982 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1983 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1984 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1985 } else { 1986 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1987 } 1988 1989 if (!rank) { 1990 /* calculate the number of nonzeros on each processor */ 1991 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1992 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1993 for (i=0; i<size; i++) { 1994 for (j=rowners[i]; j< rowners[i+1]; j++) { 1995 procsnz[i] += rowlengths[j]; 1996 } 1997 } 1998 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1999 2000 /* determine max buffer needed and allocate it */ 2001 maxnz = 0; 2002 for (i=0; i<size; i++) { 2003 maxnz = PetscMax(maxnz,procsnz[i]); 2004 } 2005 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2006 2007 /* read in my part of the matrix column indices */ 2008 nz = procsnz[0]; 2009 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 2010 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2011 2012 /* read in every one elses and ship off */ 2013 for (i=1; i<size; i++) { 2014 nz = procsnz[i]; 2015 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2016 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2017 } 2018 ierr = PetscFree(cols);CHKERRQ(ierr); 2019 } else { 2020 /* determine buffer space needed for message */ 2021 nz = 0; 2022 for (i=0; i<m; i++) { 2023 nz += ourlens[i]; 2024 } 2025 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 2026 2027 /* receive message of column indices*/ 2028 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2029 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2030 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2031 } 2032 2033 /* determine column ownership if matrix is not square */ 2034 if (N != M) { 2035 n = N/size + ((N % size) > rank); 2036 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2037 cstart = cend - n; 2038 } else { 2039 cstart = rstart; 2040 cend = rend; 2041 n = cend - cstart; 2042 } 2043 2044 /* loop over local rows, determining number of off diagonal entries */ 2045 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2046 jj = 0; 2047 for (i=0; i<m; i++) { 2048 for (j=0; j<ourlens[i]; j++) { 2049 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2050 jj++; 2051 } 2052 } 2053 2054 /* create our matrix */ 2055 for (i=0; i<m; i++) { 2056 ourlens[i] -= offlens[i]; 2057 } 2058 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2059 ierr = MatSetType(A,type);CHKERRQ(ierr); 2060 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2061 2062 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2063 for (i=0; i<m; i++) { 2064 ourlens[i] += offlens[i]; 2065 } 2066 2067 if (!rank) { 2068 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2069 2070 /* read in my part of the matrix numerical values */ 2071 nz = procsnz[0]; 2072 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2073 2074 /* insert into matrix */ 2075 jj = rstart; 2076 smycols = mycols; 2077 svals = vals; 2078 for (i=0; i<m; i++) { 2079 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2080 smycols += ourlens[i]; 2081 svals += ourlens[i]; 2082 jj++; 2083 } 2084 2085 /* read in other processors and ship out */ 2086 for (i=1; i<size; i++) { 2087 nz = procsnz[i]; 2088 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2089 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2090 } 2091 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2092 } else { 2093 /* receive numeric values */ 2094 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2095 2096 /* receive message of values*/ 2097 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2098 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2099 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2100 2101 /* insert into matrix */ 2102 jj = rstart; 2103 smycols = mycols; 2104 svals = vals; 2105 for (i=0; i<m; i++) { 2106 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2107 smycols += ourlens[i]; 2108 svals += ourlens[i]; 2109 jj++; 2110 } 2111 } 2112 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2113 ierr = PetscFree(vals);CHKERRQ(ierr); 2114 ierr = PetscFree(mycols);CHKERRQ(ierr); 2115 ierr = PetscFree(rowners);CHKERRQ(ierr); 2116 2117 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2118 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2119 *newmat = A; 2120 PetscFunctionReturn(0); 2121 } 2122 2123 #undef __FUNCT__ 2124 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2125 /* 2126 Not great since it makes two copies of the submatrix, first an SeqAIJ 2127 in local and then by concatenating the local matrices the end result. 2128 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2129 */ 2130 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2131 { 2132 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2133 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2134 Mat *local,M,Mreuse; 2135 PetscScalar *vwork,*aa; 2136 MPI_Comm comm = mat->comm; 2137 Mat_SeqAIJ *aij; 2138 2139 2140 PetscFunctionBegin; 2141 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2142 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2143 2144 if (call == MAT_REUSE_MATRIX) { 2145 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2146 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2147 local = &Mreuse; 2148 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2149 } else { 2150 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2151 Mreuse = *local; 2152 ierr = PetscFree(local);CHKERRQ(ierr); 2153 } 2154 2155 /* 2156 m - number of local rows 2157 n - number of columns (same on all processors) 2158 rstart - first row in new global matrix generated 2159 */ 2160 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2161 if (call == MAT_INITIAL_MATRIX) { 2162 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2163 ii = aij->i; 2164 jj = aij->j; 2165 2166 /* 2167 Determine the number of non-zeros in the diagonal and off-diagonal 2168 portions of the matrix in order to do correct preallocation 2169 */ 2170 2171 /* first get start and end of "diagonal" columns */ 2172 if (csize == PETSC_DECIDE) { 2173 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2174 if (mglobal == n) { /* square matrix */ 2175 nlocal = m; 2176 } else { 2177 nlocal = n/size + ((n % size) > rank); 2178 } 2179 } else { 2180 nlocal = csize; 2181 } 2182 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2183 rstart = rend - nlocal; 2184 if (rank == size - 1 && rend != n) { 2185 SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n); 2186 } 2187 2188 /* next, compute all the lengths */ 2189 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2190 olens = dlens + m; 2191 for (i=0; i<m; i++) { 2192 jend = ii[i+1] - ii[i]; 2193 olen = 0; 2194 dlen = 0; 2195 for (j=0; j<jend; j++) { 2196 if (*jj < rstart || *jj >= rend) olen++; 2197 else dlen++; 2198 jj++; 2199 } 2200 olens[i] = olen; 2201 dlens[i] = dlen; 2202 } 2203 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2204 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2205 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2206 ierr = PetscFree(dlens);CHKERRQ(ierr); 2207 } else { 2208 int ml,nl; 2209 2210 M = *newmat; 2211 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2212 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2213 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2214 /* 2215 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2216 rather than the slower MatSetValues(). 2217 */ 2218 M->was_assembled = PETSC_TRUE; 2219 M->assembled = PETSC_FALSE; 2220 } 2221 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2222 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2223 ii = aij->i; 2224 jj = aij->j; 2225 aa = aij->a; 2226 for (i=0; i<m; i++) { 2227 row = rstart + i; 2228 nz = ii[i+1] - ii[i]; 2229 cwork = jj; jj += nz; 2230 vwork = aa; aa += nz; 2231 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2232 } 2233 2234 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2235 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2236 *newmat = M; 2237 2238 /* save submatrix used in processor for next request */ 2239 if (call == MAT_INITIAL_MATRIX) { 2240 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2241 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2242 } 2243 2244 PetscFunctionReturn(0); 2245 } 2246 2247 #undef __FUNCT__ 2248 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2249 /*@C 2250 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2251 (the default parallel PETSc format). For good matrix assembly performance 2252 the user should preallocate the matrix storage by setting the parameters 2253 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2254 performance can be increased by more than a factor of 50. 2255 2256 Collective on MPI_Comm 2257 2258 Input Parameters: 2259 + A - the matrix 2260 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2261 (same value is used for all local rows) 2262 . d_nnz - array containing the number of nonzeros in the various rows of the 2263 DIAGONAL portion of the local submatrix (possibly different for each row) 2264 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2265 The size of this array is equal to the number of local rows, i.e 'm'. 2266 You must leave room for the diagonal entry even if it is zero. 2267 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2268 submatrix (same value is used for all local rows). 2269 - o_nnz - array containing the number of nonzeros in the various rows of the 2270 OFF-DIAGONAL portion of the local submatrix (possibly different for 2271 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2272 structure. The size of this array is equal to the number 2273 of local rows, i.e 'm'. 2274 2275 The AIJ format (also called the Yale sparse matrix format or 2276 compressed row storage), is fully compatible with standard Fortran 77 2277 storage. That is, the stored row and column indices can begin at 2278 either one (as in Fortran) or zero. See the users manual for details. 2279 2280 The user MUST specify either the local or global matrix dimensions 2281 (possibly both). 2282 2283 The parallel matrix is partitioned such that the first m0 rows belong to 2284 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2285 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2286 2287 The DIAGONAL portion of the local submatrix of a processor can be defined 2288 as the submatrix which is obtained by extraction the part corresponding 2289 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2290 first row that belongs to the processor, and r2 is the last row belonging 2291 to the this processor. This is a square mxm matrix. The remaining portion 2292 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2293 2294 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2295 2296 By default, this format uses inodes (identical nodes) when possible. 2297 We search for consecutive rows with the same nonzero structure, thereby 2298 reusing matrix information to achieve increased efficiency. 2299 2300 Options Database Keys: 2301 + -mat_aij_no_inode - Do not use inodes 2302 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2303 - -mat_aij_oneindex - Internally use indexing starting at 1 2304 rather than 0. Note that when calling MatSetValues(), 2305 the user still MUST index entries starting at 0! 2306 2307 Example usage: 2308 2309 Consider the following 8x8 matrix with 34 non-zero values, that is 2310 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2311 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2312 as follows: 2313 2314 .vb 2315 1 2 0 | 0 3 0 | 0 4 2316 Proc0 0 5 6 | 7 0 0 | 8 0 2317 9 0 10 | 11 0 0 | 12 0 2318 ------------------------------------- 2319 13 0 14 | 15 16 17 | 0 0 2320 Proc1 0 18 0 | 19 20 21 | 0 0 2321 0 0 0 | 22 23 0 | 24 0 2322 ------------------------------------- 2323 Proc2 25 26 27 | 0 0 28 | 29 0 2324 30 0 0 | 31 32 33 | 0 34 2325 .ve 2326 2327 This can be represented as a collection of submatrices as: 2328 2329 .vb 2330 A B C 2331 D E F 2332 G H I 2333 .ve 2334 2335 Where the submatrices A,B,C are owned by proc0, D,E,F are 2336 owned by proc1, G,H,I are owned by proc2. 2337 2338 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2339 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2340 The 'M','N' parameters are 8,8, and have the same values on all procs. 2341 2342 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2343 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2344 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2345 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2346 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2347 matrix, ans [DF] as another SeqAIJ matrix. 2348 2349 When d_nz, o_nz parameters are specified, d_nz storage elements are 2350 allocated for every row of the local diagonal submatrix, and o_nz 2351 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2352 One way to choose d_nz and o_nz is to use the max nonzerors per local 2353 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2354 In this case, the values of d_nz,o_nz are: 2355 .vb 2356 proc0 : dnz = 2, o_nz = 2 2357 proc1 : dnz = 3, o_nz = 2 2358 proc2 : dnz = 1, o_nz = 4 2359 .ve 2360 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2361 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2362 for proc3. i.e we are using 12+15+10=37 storage locations to store 2363 34 values. 2364 2365 When d_nnz, o_nnz parameters are specified, the storage is specified 2366 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2367 In the above case the values for d_nnz,o_nnz are: 2368 .vb 2369 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2370 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2371 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2372 .ve 2373 Here the space allocated is sum of all the above values i.e 34, and 2374 hence pre-allocation is perfect. 2375 2376 Level: intermediate 2377 2378 .keywords: matrix, aij, compressed row, sparse, parallel 2379 2380 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2381 @*/ 2382 int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2383 { 2384 int ierr,(*f)(Mat,int,const int[],int,const int[]); 2385 2386 PetscFunctionBegin; 2387 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2388 if (f) { 2389 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2390 } 2391 PetscFunctionReturn(0); 2392 } 2393 2394 #undef __FUNCT__ 2395 #define __FUNCT__ "MatCreateMPIAIJ" 2396 /*@C 2397 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2398 (the default parallel PETSc format). For good matrix assembly performance 2399 the user should preallocate the matrix storage by setting the parameters 2400 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2401 performance can be increased by more than a factor of 50. 2402 2403 Collective on MPI_Comm 2404 2405 Input Parameters: 2406 + comm - MPI communicator 2407 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2408 This value should be the same as the local size used in creating the 2409 y vector for the matrix-vector product y = Ax. 2410 . n - This value should be the same as the local size used in creating the 2411 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2412 calculated if N is given) For square matrices n is almost always m. 2413 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2414 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2415 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2416 (same value is used for all local rows) 2417 . d_nnz - array containing the number of nonzeros in the various rows of the 2418 DIAGONAL portion of the local submatrix (possibly different for each row) 2419 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2420 The size of this array is equal to the number of local rows, i.e 'm'. 2421 You must leave room for the diagonal entry even if it is zero. 2422 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2423 submatrix (same value is used for all local rows). 2424 - o_nnz - array containing the number of nonzeros in the various rows of the 2425 OFF-DIAGONAL portion of the local submatrix (possibly different for 2426 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2427 structure. The size of this array is equal to the number 2428 of local rows, i.e 'm'. 2429 2430 Output Parameter: 2431 . A - the matrix 2432 2433 Notes: 2434 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2435 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2436 storage requirements for this matrix. 2437 2438 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2439 processor than it must be used on all processors that share the object for 2440 that argument. 2441 2442 The AIJ format (also called the Yale sparse matrix format or 2443 compressed row storage), is fully compatible with standard Fortran 77 2444 storage. That is, the stored row and column indices can begin at 2445 either one (as in Fortran) or zero. See the users manual for details. 2446 2447 The user MUST specify either the local or global matrix dimensions 2448 (possibly both). 2449 2450 The parallel matrix is partitioned such that the first m0 rows belong to 2451 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2452 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2453 2454 The DIAGONAL portion of the local submatrix of a processor can be defined 2455 as the submatrix which is obtained by extraction the part corresponding 2456 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2457 first row that belongs to the processor, and r2 is the last row belonging 2458 to the this processor. This is a square mxm matrix. The remaining portion 2459 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2460 2461 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2462 2463 When calling this routine with a single process communicator, a matrix of 2464 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2465 type of communicator, use the construction mechanism: 2466 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2467 2468 By default, this format uses inodes (identical nodes) when possible. 2469 We search for consecutive rows with the same nonzero structure, thereby 2470 reusing matrix information to achieve increased efficiency. 2471 2472 Options Database Keys: 2473 + -mat_aij_no_inode - Do not use inodes 2474 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2475 - -mat_aij_oneindex - Internally use indexing starting at 1 2476 rather than 0. Note that when calling MatSetValues(), 2477 the user still MUST index entries starting at 0! 2478 2479 2480 Example usage: 2481 2482 Consider the following 8x8 matrix with 34 non-zero values, that is 2483 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2484 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2485 as follows: 2486 2487 .vb 2488 1 2 0 | 0 3 0 | 0 4 2489 Proc0 0 5 6 | 7 0 0 | 8 0 2490 9 0 10 | 11 0 0 | 12 0 2491 ------------------------------------- 2492 13 0 14 | 15 16 17 | 0 0 2493 Proc1 0 18 0 | 19 20 21 | 0 0 2494 0 0 0 | 22 23 0 | 24 0 2495 ------------------------------------- 2496 Proc2 25 26 27 | 0 0 28 | 29 0 2497 30 0 0 | 31 32 33 | 0 34 2498 .ve 2499 2500 This can be represented as a collection of submatrices as: 2501 2502 .vb 2503 A B C 2504 D E F 2505 G H I 2506 .ve 2507 2508 Where the submatrices A,B,C are owned by proc0, D,E,F are 2509 owned by proc1, G,H,I are owned by proc2. 2510 2511 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2512 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2513 The 'M','N' parameters are 8,8, and have the same values on all procs. 2514 2515 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2516 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2517 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2518 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2519 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2520 matrix, ans [DF] as another SeqAIJ matrix. 2521 2522 When d_nz, o_nz parameters are specified, d_nz storage elements are 2523 allocated for every row of the local diagonal submatrix, and o_nz 2524 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2525 One way to choose d_nz and o_nz is to use the max nonzerors per local 2526 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2527 In this case, the values of d_nz,o_nz are: 2528 .vb 2529 proc0 : dnz = 2, o_nz = 2 2530 proc1 : dnz = 3, o_nz = 2 2531 proc2 : dnz = 1, o_nz = 4 2532 .ve 2533 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2534 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2535 for proc3. i.e we are using 12+15+10=37 storage locations to store 2536 34 values. 2537 2538 When d_nnz, o_nnz parameters are specified, the storage is specified 2539 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2540 In the above case the values for d_nnz,o_nnz are: 2541 .vb 2542 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2543 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2544 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2545 .ve 2546 Here the space allocated is sum of all the above values i.e 34, and 2547 hence pre-allocation is perfect. 2548 2549 Level: intermediate 2550 2551 .keywords: matrix, aij, compressed row, sparse, parallel 2552 2553 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2554 @*/ 2555 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2556 { 2557 int ierr,size; 2558 2559 PetscFunctionBegin; 2560 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2561 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2562 if (size > 1) { 2563 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2564 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2565 } else { 2566 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2567 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2568 } 2569 PetscFunctionReturn(0); 2570 } 2571 2572 #undef __FUNCT__ 2573 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2574 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2575 { 2576 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2577 PetscFunctionBegin; 2578 *Ad = a->A; 2579 *Ao = a->B; 2580 *colmap = a->garray; 2581 PetscFunctionReturn(0); 2582 } 2583 2584 #undef __FUNCT__ 2585 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2586 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2587 { 2588 int ierr,i; 2589 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2590 2591 PetscFunctionBegin; 2592 if (coloring->ctype == IS_COLORING_LOCAL) { 2593 ISColoringValue *allcolors,*colors; 2594 ISColoring ocoloring; 2595 2596 /* set coloring for diagonal portion */ 2597 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2598 2599 /* set coloring for off-diagonal portion */ 2600 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2601 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2602 for (i=0; i<a->B->n; i++) { 2603 colors[i] = allcolors[a->garray[i]]; 2604 } 2605 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2606 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2607 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2608 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2609 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2610 ISColoringValue *colors; 2611 int *larray; 2612 ISColoring ocoloring; 2613 2614 /* set coloring for diagonal portion */ 2615 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2616 for (i=0; i<a->A->n; i++) { 2617 larray[i] = i + a->cstart; 2618 } 2619 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2620 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2621 for (i=0; i<a->A->n; i++) { 2622 colors[i] = coloring->colors[larray[i]]; 2623 } 2624 ierr = PetscFree(larray);CHKERRQ(ierr); 2625 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2626 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2627 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2628 2629 /* set coloring for off-diagonal portion */ 2630 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2631 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2632 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2633 for (i=0; i<a->B->n; i++) { 2634 colors[i] = coloring->colors[larray[i]]; 2635 } 2636 ierr = PetscFree(larray);CHKERRQ(ierr); 2637 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2638 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2639 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2640 } else { 2641 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2642 } 2643 2644 PetscFunctionReturn(0); 2645 } 2646 2647 #undef __FUNCT__ 2648 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2649 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2650 { 2651 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2652 int ierr; 2653 2654 PetscFunctionBegin; 2655 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2656 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2657 PetscFunctionReturn(0); 2658 } 2659 2660 #undef __FUNCT__ 2661 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2662 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2663 { 2664 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2665 int ierr; 2666 2667 PetscFunctionBegin; 2668 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2669 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2670 PetscFunctionReturn(0); 2671 } 2672 2673 #undef __FUNCT__ 2674 #define __FUNCT__ "MatMerge" 2675 /*@C 2676 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2677 matrices from each processor 2678 2679 Collective on MPI_Comm 2680 2681 Input Parameters: 2682 + comm - the communicators the parallel matrix will live on 2683 - inmat - the input sequential matrices 2684 2685 Output Parameter: 2686 . outmat - the parallel matrix generated 2687 2688 Level: advanced 2689 2690 Notes: The number of columns of the matrix in EACH of the seperate files 2691 MUST be the same. 2692 2693 @*/ 2694 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat) 2695 { 2696 int ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz; 2697 PetscScalar *values; 2698 PetscMap columnmap,rowmap; 2699 2700 PetscFunctionBegin; 2701 2702 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2703 2704 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2705 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2706 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2707 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2708 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2709 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2710 2711 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2712 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2713 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2714 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2715 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2716 2717 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2718 for (i=0;i<m;i++) { 2719 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2720 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2721 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2722 } 2723 /* This routine will ONLY return MPIAIJ type matrix */ 2724 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2725 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2726 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2727 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2728 2729 for (i=0;i<m;i++) { 2730 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2731 I = i + rstart; 2732 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2733 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2734 } 2735 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2736 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2737 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2738 2739 PetscFunctionReturn(0); 2740 } 2741 2742 #undef __FUNCT__ 2743 #define __FUNCT__ "MatFileSplit" 2744 int MatFileSplit(Mat A,char *outfile) 2745 { 2746 int ierr,rank,len,m,N,i,rstart,*indx,nnz; 2747 PetscViewer out; 2748 char *name; 2749 Mat B; 2750 PetscScalar *values; 2751 2752 PetscFunctionBegin; 2753 2754 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2755 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2756 /* Should this be the type of the diagonal block of A? */ 2757 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2758 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2759 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2760 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2761 for (i=0;i<m;i++) { 2762 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2763 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2764 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2765 } 2766 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2767 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2768 2769 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2770 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2771 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2772 sprintf(name,"%s.%d",outfile,rank); 2773 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2774 ierr = PetscFree(name); 2775 ierr = MatView(B,out);CHKERRQ(ierr); 2776 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2777 ierr = MatDestroy(B);CHKERRQ(ierr); 2778 PetscFunctionReturn(0); 2779 } 2780