1 2 #include "src/mat/impls/aij/mpi/mpiaij.h" 3 #include "src/inline/spops.h" 4 5 /* 6 Local utility routine that creates a mapping from the global column 7 number to the local number in the off-diagonal part of the local 8 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 9 a slightly higher hash table cost; without it it is not scalable (each processor 10 has an order N integer array but is fast to acess. 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 17 PetscErrorCode ierr; 18 int n = aij->B->n,i; 19 20 PetscFunctionBegin; 21 #if defined (PETSC_USE_CTABLE) 22 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 23 for (i=0; i<n; i++){ 24 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 25 } 26 #else 27 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 28 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 29 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 30 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 31 #endif 32 PetscFunctionReturn(0); 33 } 34 35 #define CHUNKSIZE 15 36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 40 rmax = aimax[row]; nrow = ailen[row]; \ 41 col1 = col - shift; \ 42 \ 43 low = 0; high = nrow; \ 44 while (high-low > 5) { \ 45 t = (low+high)/2; \ 46 if (rp[t] > col) high = t; \ 47 else low = t; \ 48 } \ 49 for (_i=low; _i<high; _i++) { \ 50 if (rp[_i] > col1) break; \ 51 if (rp[_i] == col1) { \ 52 if (addv == ADD_VALUES) ap[_i] += value; \ 53 else ap[_i] = value; \ 54 goto a_noinsert; \ 55 } \ 56 } \ 57 if (nonew == 1) goto a_noinsert; \ 58 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 59 if (nrow >= rmax) { \ 60 /* there is no extra room in row, therefore enlarge */ \ 61 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 62 PetscScalar *new_a; \ 63 \ 64 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 65 \ 66 /* malloc new storage space */ \ 67 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 68 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 69 new_j = (int*)(new_a + new_nz); \ 70 new_i = new_j + new_nz; \ 71 \ 72 /* copy over old data into new slots */ \ 73 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 74 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 75 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 76 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 77 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 78 len*sizeof(int));CHKERRQ(ierr); \ 79 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 81 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 82 /* free up old matrix storage */ \ 83 \ 84 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 85 if (!a->singlemalloc) { \ 86 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 87 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 88 } \ 89 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 90 a->singlemalloc = PETSC_TRUE; \ 91 \ 92 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 93 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 94 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 95 a->maxnz += CHUNKSIZE; \ 96 a->reallocs++; \ 97 } \ 98 N = nrow++ - 1; a->nz++; \ 99 /* shift up all the later entries in this row */ \ 100 for (ii=N; ii>=_i; ii--) { \ 101 rp[ii+1] = rp[ii]; \ 102 ap[ii+1] = ap[ii]; \ 103 } \ 104 rp[_i] = col1; \ 105 ap[_i] = value; \ 106 a_noinsert: ; \ 107 ailen[row] = nrow; \ 108 } 109 110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 111 { \ 112 \ 113 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 114 rmax = bimax[row]; nrow = bilen[row]; \ 115 col1 = col - shift; \ 116 \ 117 low = 0; high = nrow; \ 118 while (high-low > 5) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > col) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > col1) break; \ 125 if (rp[_i] == col1) { \ 126 if (addv == ADD_VALUES) ap[_i] += value; \ 127 else ap[_i] = value; \ 128 goto b_noinsert; \ 129 } \ 130 } \ 131 if (nonew == 1) goto b_noinsert; \ 132 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 133 if (nrow >= rmax) { \ 134 /* there is no extra room in row, therefore enlarge */ \ 135 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 136 PetscScalar *new_a; \ 137 \ 138 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 139 \ 140 /* malloc new storage space */ \ 141 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 142 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 143 new_j = (int*)(new_a + new_nz); \ 144 new_i = new_j + new_nz; \ 145 \ 146 /* copy over old data into new slots */ \ 147 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 148 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 149 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 150 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 151 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 152 len*sizeof(int));CHKERRQ(ierr); \ 153 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 154 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 155 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 156 /* free up old matrix storage */ \ 157 \ 158 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 159 if (!b->singlemalloc) { \ 160 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 161 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 162 } \ 163 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 164 b->singlemalloc = PETSC_TRUE; \ 165 \ 166 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 167 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 168 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 169 b->maxnz += CHUNKSIZE; \ 170 b->reallocs++; \ 171 } \ 172 N = nrow++ - 1; b->nz++; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ap[ii+1] = ap[ii]; \ 177 } \ 178 rp[_i] = col1; \ 179 ap[_i] = value; \ 180 b_noinsert: ; \ 181 bilen[row] = nrow; \ 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatSetValues_MPIAIJ" 186 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 187 { 188 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 189 PetscScalar value; 190 PetscErrorCode ierr; 191 int i,j,rstart = aij->rstart,rend = aij->rend; 192 int cstart = aij->cstart,cend = aij->cend,row,col; 193 PetscTruth roworiented = aij->roworiented; 194 195 /* Some Variables required in the macro */ 196 Mat A = aij->A; 197 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 198 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 199 PetscScalar *aa = a->a; 200 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 201 Mat B = aij->B; 202 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 203 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 204 PetscScalar *ba = b->a; 205 206 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 207 int nonew = a->nonew,shift=0; 208 PetscScalar *ap; 209 210 PetscFunctionBegin; 211 for (i=0; i<m; i++) { 212 if (im[i] < 0) continue; 213 #if defined(PETSC_USE_BOPT_g) 214 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 215 #endif 216 if (im[i] >= rstart && im[i] < rend) { 217 row = im[i] - rstart; 218 for (j=0; j<n; j++) { 219 if (in[j] >= cstart && in[j] < cend){ 220 col = in[j] - cstart; 221 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 222 if (ignorezeroentries && value == 0.0) continue; 223 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 224 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 225 } else if (in[j] < 0) continue; 226 #if defined(PETSC_USE_BOPT_g) 227 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 228 #endif 229 else { 230 if (mat->was_assembled) { 231 if (!aij->colmap) { 232 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 233 } 234 #if defined (PETSC_USE_CTABLE) 235 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 236 col--; 237 #else 238 col = aij->colmap[in[j]] - 1; 239 #endif 240 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 241 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 244 B = aij->B; 245 b = (Mat_SeqAIJ*)B->data; 246 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 247 ba = b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 if (ignorezeroentries && value == 0.0) continue; 252 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 253 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 254 } 255 } 256 } else { 257 if (!aij->donotstash) { 258 if (roworiented) { 259 if (ignorezeroentries && v[i*n] == 0.0) continue; 260 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 261 } else { 262 if (ignorezeroentries && v[i] == 0.0) continue; 263 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatGetValues_MPIAIJ" 273 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 274 { 275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 276 PetscErrorCode ierr; 277 int i,j,rstart = aij->rstart,rend = aij->rend; 278 int cstart = aij->cstart,cend = aij->cend,row,col; 279 280 PetscFunctionBegin; 281 for (i=0; i<m; i++) { 282 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 283 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 284 if (idxm[i] >= rstart && idxm[i] < rend) { 285 row = idxm[i] - rstart; 286 for (j=0; j<n; j++) { 287 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 288 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 289 if (idxn[j] >= cstart && idxn[j] < cend){ 290 col = idxn[j] - cstart; 291 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 292 } else { 293 if (!aij->colmap) { 294 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 295 } 296 #if defined (PETSC_USE_CTABLE) 297 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 298 col --; 299 #else 300 col = aij->colmap[idxn[j]] - 1; 301 #endif 302 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 303 else { 304 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 305 } 306 } 307 } 308 } else { 309 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 310 } 311 } 312 PetscFunctionReturn(0); 313 } 314 315 #undef __FUNCT__ 316 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 317 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 318 { 319 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 320 PetscErrorCode ierr; 321 int nstash,reallocs; 322 InsertMode addv; 323 324 PetscFunctionBegin; 325 if (aij->donotstash) { 326 PetscFunctionReturn(0); 327 } 328 329 /* make sure all processors are either in INSERTMODE or ADDMODE */ 330 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 331 if (addv == (ADD_VALUES|INSERT_VALUES)) { 332 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 333 } 334 mat->insertmode = addv; /* in case this processor had no cache */ 335 336 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 337 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 338 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 339 PetscFunctionReturn(0); 340 } 341 342 343 #undef __FUNCT__ 344 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 345 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 346 { 347 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 348 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 349 PetscErrorCode ierr; 350 int i,j,rstart,ncols,n,flg; 351 int *row,*col,other_disassembled; 352 PetscScalar *val; 353 InsertMode addv = mat->insertmode; 354 355 PetscFunctionBegin; 356 if (!aij->donotstash) { 357 while (1) { 358 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 359 if (!flg) break; 360 361 for (i=0; i<n;) { 362 /* Now identify the consecutive vals belonging to the same row */ 363 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 364 if (j < n) ncols = j-i; 365 else ncols = n-i; 366 /* Now assemble all these values with a single function call */ 367 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 368 i = j; 369 } 370 } 371 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 372 } 373 374 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 375 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 376 377 /* determine if any processor has disassembled, if so we must 378 also disassemble ourselfs, in order that we may reassemble. */ 379 /* 380 if nonzero structure of submatrix B cannot change then we know that 381 no processor disassembled thus we can skip this stuff 382 */ 383 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 384 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 385 if (mat->was_assembled && !other_disassembled) { 386 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 387 } 388 } 389 390 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 391 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 392 } 393 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 394 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 395 396 if (aij->rowvalues) { 397 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 398 aij->rowvalues = 0; 399 } 400 401 /* used by MatAXPY() */ 402 a->xtoy = 0; b->xtoy = 0; 403 a->XtoY = 0; b->XtoY = 0; 404 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 410 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 411 { 412 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 413 PetscErrorCode ierr; 414 415 PetscFunctionBegin; 416 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 417 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 418 PetscFunctionReturn(0); 419 } 420 421 #undef __FUNCT__ 422 #define __FUNCT__ "MatZeroRows_MPIAIJ" 423 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 424 { 425 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 426 PetscErrorCode ierr; 427 int i,N,*rows,*owners = l->rowners,size = l->size; 428 int *nprocs,j,idx,nsends,row; 429 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 430 int *rvalues,tag = A->tag,count,base,slen,n,*source; 431 int *lens,imdex,*lrows,*values,rstart=l->rstart; 432 MPI_Comm comm = A->comm; 433 MPI_Request *send_waits,*recv_waits; 434 MPI_Status recv_status,*send_status; 435 IS istmp; 436 PetscTruth found; 437 438 PetscFunctionBegin; 439 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 440 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 441 442 /* first count number of contributors to each processor */ 443 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 444 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 445 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 446 for (i=0; i<N; i++) { 447 idx = rows[i]; 448 found = PETSC_FALSE; 449 for (j=0; j<size; j++) { 450 if (idx >= owners[j] && idx < owners[j+1]) { 451 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 452 } 453 } 454 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 455 } 456 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 457 458 /* inform other processors of number of messages and max length*/ 459 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 460 461 /* post receives: */ 462 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 463 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 464 for (i=0; i<nrecvs; i++) { 465 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 466 } 467 468 /* do sends: 469 1) starts[i] gives the starting index in svalues for stuff going to 470 the ith processor 471 */ 472 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 473 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 474 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 475 starts[0] = 0; 476 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 477 for (i=0; i<N; i++) { 478 svalues[starts[owner[i]]++] = rows[i]; 479 } 480 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 481 482 starts[0] = 0; 483 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 484 count = 0; 485 for (i=0; i<size; i++) { 486 if (nprocs[2*i+1]) { 487 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 488 } 489 } 490 ierr = PetscFree(starts);CHKERRQ(ierr); 491 492 base = owners[rank]; 493 494 /* wait on receives */ 495 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 496 source = lens + nrecvs; 497 count = nrecvs; slen = 0; 498 while (count) { 499 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 500 /* unpack receives into our local space */ 501 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 502 source[imdex] = recv_status.MPI_SOURCE; 503 lens[imdex] = n; 504 slen += n; 505 count--; 506 } 507 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 508 509 /* move the data into the send scatter */ 510 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 511 count = 0; 512 for (i=0; i<nrecvs; i++) { 513 values = rvalues + i*nmax; 514 for (j=0; j<lens[i]; j++) { 515 lrows[count++] = values[j] - base; 516 } 517 } 518 ierr = PetscFree(rvalues);CHKERRQ(ierr); 519 ierr = PetscFree(lens);CHKERRQ(ierr); 520 ierr = PetscFree(owner);CHKERRQ(ierr); 521 ierr = PetscFree(nprocs);CHKERRQ(ierr); 522 523 /* actually zap the local rows */ 524 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 525 PetscLogObjectParent(A,istmp); 526 527 /* 528 Zero the required rows. If the "diagonal block" of the matrix 529 is square and the user wishes to set the diagonal we use seperate 530 code so that MatSetValues() is not called for each diagonal allocating 531 new memory, thus calling lots of mallocs and slowing things down. 532 533 Contributed by: Mathew Knepley 534 */ 535 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 536 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 537 if (diag && (l->A->M == l->A->N)) { 538 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 539 } else if (diag) { 540 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 541 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 542 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 543 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 544 } 545 for (i = 0; i < slen; i++) { 546 row = lrows[i] + rstart; 547 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 548 } 549 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 550 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 551 } else { 552 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 553 } 554 ierr = ISDestroy(istmp);CHKERRQ(ierr); 555 ierr = PetscFree(lrows);CHKERRQ(ierr); 556 557 /* wait on sends */ 558 if (nsends) { 559 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 560 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 561 ierr = PetscFree(send_status);CHKERRQ(ierr); 562 } 563 ierr = PetscFree(send_waits);CHKERRQ(ierr); 564 ierr = PetscFree(svalues);CHKERRQ(ierr); 565 566 PetscFunctionReturn(0); 567 } 568 569 #undef __FUNCT__ 570 #define __FUNCT__ "MatMult_MPIAIJ" 571 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 572 { 573 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 574 PetscErrorCode ierr; 575 int nt; 576 577 PetscFunctionBegin; 578 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 579 if (nt != A->n) { 580 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 581 } 582 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 583 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 584 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 585 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatMultAdd_MPIAIJ" 591 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 592 { 593 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 594 PetscErrorCode ierr; 595 596 PetscFunctionBegin; 597 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 598 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 599 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 600 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 601 PetscFunctionReturn(0); 602 } 603 604 #undef __FUNCT__ 605 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 606 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 607 { 608 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 609 PetscErrorCode ierr; 610 611 PetscFunctionBegin; 612 /* do nondiagonal part */ 613 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 614 /* send it on its way */ 615 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 616 /* do local part */ 617 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 618 /* receive remote parts: note this assumes the values are not actually */ 619 /* inserted in yy until the next line, which is true for my implementation*/ 620 /* but is not perhaps always true. */ 621 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 622 PetscFunctionReturn(0); 623 } 624 625 EXTERN_C_BEGIN 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 628 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 629 { 630 MPI_Comm comm; 631 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 632 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 633 IS Me,Notme; 634 PetscErrorCode ierr; 635 int M,N,first,last,*notme,ntids,i; 636 637 PetscFunctionBegin; 638 639 /* Easy test: symmetric diagonal block */ 640 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 641 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 642 if (!*f) PetscFunctionReturn(0); 643 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 644 ierr = MPI_Comm_size(comm,&ntids);CHKERRQ(ierr); 645 if (ntids==1) PetscFunctionReturn(0); 646 647 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 648 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 649 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 650 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me);CHKERRQ(ierr); 651 for (i=0; i<first; i++) notme[i] = i; 652 for (i=last; i<M; i++) notme[i-last+first] = i; 653 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 654 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 655 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 656 Aoff = Aoffs[0]; 657 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 658 Boff = Boffs[0]; 659 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 660 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 661 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 662 ierr = ISDestroy(Me);CHKERRQ(ierr); 663 ierr = ISDestroy(Notme);CHKERRQ(ierr); 664 665 PetscFunctionReturn(0); 666 } 667 EXTERN_C_END 668 669 #undef __FUNCT__ 670 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 671 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 672 { 673 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 674 PetscErrorCode ierr; 675 676 PetscFunctionBegin; 677 /* do nondiagonal part */ 678 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 679 /* send it on its way */ 680 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 681 /* do local part */ 682 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 683 /* receive remote parts: note this assumes the values are not actually */ 684 /* inserted in yy until the next line, which is true for my implementation*/ 685 /* but is not perhaps always true. */ 686 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 687 PetscFunctionReturn(0); 688 } 689 690 /* 691 This only works correctly for square matrices where the subblock A->A is the 692 diagonal block 693 */ 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 696 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 697 { 698 PetscErrorCode ierr; 699 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 700 701 PetscFunctionBegin; 702 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 703 if (a->rstart != a->cstart || a->rend != a->cend) { 704 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 705 } 706 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 707 PetscFunctionReturn(0); 708 } 709 710 #undef __FUNCT__ 711 #define __FUNCT__ "MatScale_MPIAIJ" 712 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 713 { 714 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 715 PetscErrorCode ierr; 716 717 PetscFunctionBegin; 718 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 719 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 720 PetscFunctionReturn(0); 721 } 722 723 #undef __FUNCT__ 724 #define __FUNCT__ "MatDestroy_MPIAIJ" 725 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 726 { 727 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 728 PetscErrorCode ierr; 729 730 PetscFunctionBegin; 731 #if defined(PETSC_USE_LOG) 732 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 733 #endif 734 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 735 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 736 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 737 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 738 #if defined (PETSC_USE_CTABLE) 739 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 740 #else 741 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 742 #endif 743 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 744 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 745 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 746 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 747 ierr = PetscFree(aij);CHKERRQ(ierr); 748 PetscFunctionReturn(0); 749 } 750 751 #undef __FUNCT__ 752 #define __FUNCT__ "MatView_MPIAIJ_Binary" 753 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 754 { 755 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 756 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 757 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 758 PetscErrorCode ierr; 759 int nz,fd,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 760 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 761 PetscScalar *column_values; 762 763 PetscFunctionBegin; 764 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 765 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 766 nz = A->nz + B->nz; 767 if (!rank) { 768 header[0] = MAT_FILE_COOKIE; 769 header[1] = mat->M; 770 header[2] = mat->N; 771 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 772 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 773 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 774 /* get largest number of rows any processor has */ 775 rlen = mat->m; 776 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 777 for (i=1; i<size; i++) { 778 rlen = PetscMax(rlen,range[i+1] - range[i]); 779 } 780 } else { 781 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 782 rlen = mat->m; 783 } 784 785 /* load up the local row counts */ 786 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 787 for (i=0; i<mat->m; i++) { 788 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 789 } 790 791 /* store the row lengths to the file */ 792 if (!rank) { 793 MPI_Status status; 794 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 795 for (i=1; i<size; i++) { 796 rlen = range[i+1] - range[i]; 797 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 798 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 799 } 800 } else { 801 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 802 } 803 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 804 805 /* load up the local column indices */ 806 nzmax = nz; /* )th processor needs space a largest processor needs */ 807 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 808 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 809 cnt = 0; 810 for (i=0; i<mat->m; i++) { 811 for (j=B->i[i]; j<B->i[i+1]; j++) { 812 if ( (col = garray[B->j[j]]) > cstart) break; 813 column_indices[cnt++] = col; 814 } 815 for (k=A->i[i]; k<A->i[i+1]; k++) { 816 column_indices[cnt++] = A->j[k] + cstart; 817 } 818 for (; j<B->i[i+1]; j++) { 819 column_indices[cnt++] = garray[B->j[j]]; 820 } 821 } 822 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 823 824 /* store the column indices to the file */ 825 if (!rank) { 826 MPI_Status status; 827 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 828 for (i=1; i<size; i++) { 829 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 830 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 831 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 832 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 833 } 834 } else { 835 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 836 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 837 } 838 ierr = PetscFree(column_indices);CHKERRQ(ierr); 839 840 /* load up the local column values */ 841 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 842 cnt = 0; 843 for (i=0; i<mat->m; i++) { 844 for (j=B->i[i]; j<B->i[i+1]; j++) { 845 if ( garray[B->j[j]] > cstart) break; 846 column_values[cnt++] = B->a[j]; 847 } 848 for (k=A->i[i]; k<A->i[i+1]; k++) { 849 column_values[cnt++] = A->a[k]; 850 } 851 for (; j<B->i[i+1]; j++) { 852 column_values[cnt++] = B->a[j]; 853 } 854 } 855 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 856 857 /* store the column values to the file */ 858 if (!rank) { 859 MPI_Status status; 860 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 861 for (i=1; i<size; i++) { 862 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 863 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 864 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 865 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 866 } 867 } else { 868 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 869 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 870 } 871 ierr = PetscFree(column_values);CHKERRQ(ierr); 872 PetscFunctionReturn(0); 873 } 874 875 #undef __FUNCT__ 876 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 877 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 878 { 879 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 880 PetscErrorCode ierr; 881 int rank = aij->rank,size = aij->size; 882 PetscTruth isdraw,iascii,flg,isbinary; 883 PetscViewer sviewer; 884 PetscViewerFormat format; 885 886 PetscFunctionBegin; 887 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 888 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 889 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 890 if (iascii) { 891 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 892 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 893 MatInfo info; 894 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 895 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 896 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 897 if (flg) { 898 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 899 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 900 } else { 901 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 902 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 903 } 904 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 905 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 906 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 907 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 908 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 909 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 910 PetscFunctionReturn(0); 911 } else if (format == PETSC_VIEWER_ASCII_INFO) { 912 PetscFunctionReturn(0); 913 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 914 PetscFunctionReturn(0); 915 } 916 } else if (isbinary) { 917 if (size == 1) { 918 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 919 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 920 } else { 921 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 922 } 923 PetscFunctionReturn(0); 924 } else if (isdraw) { 925 PetscDraw draw; 926 PetscTruth isnull; 927 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 928 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 929 } 930 931 if (size == 1) { 932 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 933 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 934 } else { 935 /* assemble the entire matrix onto first processor. */ 936 Mat A; 937 Mat_SeqAIJ *Aloc; 938 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 939 PetscScalar *a; 940 941 if (!rank) { 942 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 943 } else { 944 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 945 } 946 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 947 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 948 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 949 PetscLogObjectParent(mat,A); 950 951 /* copy over the A part */ 952 Aloc = (Mat_SeqAIJ*)aij->A->data; 953 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 954 row = aij->rstart; 955 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 956 for (i=0; i<m; i++) { 957 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 958 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 959 } 960 aj = Aloc->j; 961 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 962 963 /* copy over the B part */ 964 Aloc = (Mat_SeqAIJ*)aij->B->data; 965 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 966 row = aij->rstart; 967 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 968 ct = cols; 969 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 970 for (i=0; i<m; i++) { 971 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 972 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 973 } 974 ierr = PetscFree(ct);CHKERRQ(ierr); 975 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 976 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 /* 978 Everyone has to call to draw the matrix since the graphics waits are 979 synchronized across all processors that share the PetscDraw object 980 */ 981 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 982 if (!rank) { 983 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 984 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 985 } 986 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 987 ierr = MatDestroy(A);CHKERRQ(ierr); 988 } 989 PetscFunctionReturn(0); 990 } 991 992 #undef __FUNCT__ 993 #define __FUNCT__ "MatView_MPIAIJ" 994 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 995 { 996 PetscErrorCode ierr; 997 PetscTruth iascii,isdraw,issocket,isbinary; 998 999 PetscFunctionBegin; 1000 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1001 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1002 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1003 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1004 if (iascii || isdraw || isbinary || issocket) { 1005 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1006 } else { 1007 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1008 } 1009 PetscFunctionReturn(0); 1010 } 1011 1012 1013 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "MatRelax_MPIAIJ" 1016 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1017 { 1018 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1019 PetscErrorCode ierr; 1020 Vec bb1; 1021 PetscScalar mone=-1.0; 1022 1023 PetscFunctionBegin; 1024 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1025 1026 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1027 1028 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1029 if (flag & SOR_ZERO_INITIAL_GUESS) { 1030 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1031 its--; 1032 } 1033 1034 while (its--) { 1035 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1036 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1037 1038 /* update rhs: bb1 = bb - B*x */ 1039 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1040 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1041 1042 /* local sweep */ 1043 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1044 CHKERRQ(ierr); 1045 } 1046 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1047 if (flag & SOR_ZERO_INITIAL_GUESS) { 1048 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1049 its--; 1050 } 1051 while (its--) { 1052 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1053 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1054 1055 /* update rhs: bb1 = bb - B*x */ 1056 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1057 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1058 1059 /* local sweep */ 1060 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1061 CHKERRQ(ierr); 1062 } 1063 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1064 if (flag & SOR_ZERO_INITIAL_GUESS) { 1065 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1066 its--; 1067 } 1068 while (its--) { 1069 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1070 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1071 1072 /* update rhs: bb1 = bb - B*x */ 1073 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1074 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1075 1076 /* local sweep */ 1077 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1078 CHKERRQ(ierr); 1079 } 1080 } else { 1081 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1082 } 1083 1084 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1090 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1091 { 1092 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1093 Mat A = mat->A,B = mat->B; 1094 PetscErrorCode ierr; 1095 PetscReal isend[5],irecv[5]; 1096 1097 PetscFunctionBegin; 1098 info->block_size = 1.0; 1099 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1100 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1101 isend[3] = info->memory; isend[4] = info->mallocs; 1102 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1103 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1104 isend[3] += info->memory; isend[4] += info->mallocs; 1105 if (flag == MAT_LOCAL) { 1106 info->nz_used = isend[0]; 1107 info->nz_allocated = isend[1]; 1108 info->nz_unneeded = isend[2]; 1109 info->memory = isend[3]; 1110 info->mallocs = isend[4]; 1111 } else if (flag == MAT_GLOBAL_MAX) { 1112 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1113 info->nz_used = irecv[0]; 1114 info->nz_allocated = irecv[1]; 1115 info->nz_unneeded = irecv[2]; 1116 info->memory = irecv[3]; 1117 info->mallocs = irecv[4]; 1118 } else if (flag == MAT_GLOBAL_SUM) { 1119 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1120 info->nz_used = irecv[0]; 1121 info->nz_allocated = irecv[1]; 1122 info->nz_unneeded = irecv[2]; 1123 info->memory = irecv[3]; 1124 info->mallocs = irecv[4]; 1125 } 1126 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1127 info->fill_ratio_needed = 0; 1128 info->factor_mallocs = 0; 1129 info->rows_global = (double)matin->M; 1130 info->columns_global = (double)matin->N; 1131 info->rows_local = (double)matin->m; 1132 info->columns_local = (double)matin->N; 1133 1134 PetscFunctionReturn(0); 1135 } 1136 1137 #undef __FUNCT__ 1138 #define __FUNCT__ "MatSetOption_MPIAIJ" 1139 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1140 { 1141 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1142 PetscErrorCode ierr; 1143 1144 PetscFunctionBegin; 1145 switch (op) { 1146 case MAT_NO_NEW_NONZERO_LOCATIONS: 1147 case MAT_YES_NEW_NONZERO_LOCATIONS: 1148 case MAT_COLUMNS_UNSORTED: 1149 case MAT_COLUMNS_SORTED: 1150 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1151 case MAT_KEEP_ZEROED_ROWS: 1152 case MAT_NEW_NONZERO_LOCATION_ERR: 1153 case MAT_USE_INODES: 1154 case MAT_DO_NOT_USE_INODES: 1155 case MAT_IGNORE_ZERO_ENTRIES: 1156 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1157 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1158 break; 1159 case MAT_ROW_ORIENTED: 1160 a->roworiented = PETSC_TRUE; 1161 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1162 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1163 break; 1164 case MAT_ROWS_SORTED: 1165 case MAT_ROWS_UNSORTED: 1166 case MAT_YES_NEW_DIAGONALS: 1167 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1168 break; 1169 case MAT_COLUMN_ORIENTED: 1170 a->roworiented = PETSC_FALSE; 1171 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1172 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1173 break; 1174 case MAT_IGNORE_OFF_PROC_ENTRIES: 1175 a->donotstash = PETSC_TRUE; 1176 break; 1177 case MAT_NO_NEW_DIAGONALS: 1178 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1179 case MAT_SYMMETRIC: 1180 case MAT_STRUCTURALLY_SYMMETRIC: 1181 case MAT_HERMITIAN: 1182 case MAT_SYMMETRY_ETERNAL: 1183 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1184 break; 1185 case MAT_NOT_SYMMETRIC: 1186 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1187 case MAT_NOT_HERMITIAN: 1188 case MAT_NOT_SYMMETRY_ETERNAL: 1189 break; 1190 default: 1191 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1192 } 1193 PetscFunctionReturn(0); 1194 } 1195 1196 #undef __FUNCT__ 1197 #define __FUNCT__ "MatGetRow_MPIAIJ" 1198 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1199 { 1200 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1201 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1202 PetscErrorCode ierr; 1203 int i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1204 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1205 int *cmap,*idx_p; 1206 1207 PetscFunctionBegin; 1208 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1209 mat->getrowactive = PETSC_TRUE; 1210 1211 if (!mat->rowvalues && (idx || v)) { 1212 /* 1213 allocate enough space to hold information from the longest row. 1214 */ 1215 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1216 int max = 1,tmp; 1217 for (i=0; i<matin->m; i++) { 1218 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1219 if (max < tmp) { max = tmp; } 1220 } 1221 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1222 mat->rowindices = (int*)(mat->rowvalues + max); 1223 } 1224 1225 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1226 lrow = row - rstart; 1227 1228 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1229 if (!v) {pvA = 0; pvB = 0;} 1230 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1231 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1232 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1233 nztot = nzA + nzB; 1234 1235 cmap = mat->garray; 1236 if (v || idx) { 1237 if (nztot) { 1238 /* Sort by increasing column numbers, assuming A and B already sorted */ 1239 int imark = -1; 1240 if (v) { 1241 *v = v_p = mat->rowvalues; 1242 for (i=0; i<nzB; i++) { 1243 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1244 else break; 1245 } 1246 imark = i; 1247 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1248 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1249 } 1250 if (idx) { 1251 *idx = idx_p = mat->rowindices; 1252 if (imark > -1) { 1253 for (i=0; i<imark; i++) { 1254 idx_p[i] = cmap[cworkB[i]]; 1255 } 1256 } else { 1257 for (i=0; i<nzB; i++) { 1258 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1259 else break; 1260 } 1261 imark = i; 1262 } 1263 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1264 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1265 } 1266 } else { 1267 if (idx) *idx = 0; 1268 if (v) *v = 0; 1269 } 1270 } 1271 *nz = nztot; 1272 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1273 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1274 PetscFunctionReturn(0); 1275 } 1276 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1279 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1280 { 1281 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1282 1283 PetscFunctionBegin; 1284 if (aij->getrowactive == PETSC_FALSE) { 1285 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1286 } 1287 aij->getrowactive = PETSC_FALSE; 1288 PetscFunctionReturn(0); 1289 } 1290 1291 #undef __FUNCT__ 1292 #define __FUNCT__ "MatNorm_MPIAIJ" 1293 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1294 { 1295 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1296 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1297 PetscErrorCode ierr; 1298 int i,j,cstart = aij->cstart; 1299 PetscReal sum = 0.0; 1300 PetscScalar *v; 1301 1302 PetscFunctionBegin; 1303 if (aij->size == 1) { 1304 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1305 } else { 1306 if (type == NORM_FROBENIUS) { 1307 v = amat->a; 1308 for (i=0; i<amat->nz; i++) { 1309 #if defined(PETSC_USE_COMPLEX) 1310 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1311 #else 1312 sum += (*v)*(*v); v++; 1313 #endif 1314 } 1315 v = bmat->a; 1316 for (i=0; i<bmat->nz; i++) { 1317 #if defined(PETSC_USE_COMPLEX) 1318 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1319 #else 1320 sum += (*v)*(*v); v++; 1321 #endif 1322 } 1323 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1324 *norm = sqrt(*norm); 1325 } else if (type == NORM_1) { /* max column norm */ 1326 PetscReal *tmp,*tmp2; 1327 int *jj,*garray = aij->garray; 1328 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1329 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1330 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1331 *norm = 0.0; 1332 v = amat->a; jj = amat->j; 1333 for (j=0; j<amat->nz; j++) { 1334 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1335 } 1336 v = bmat->a; jj = bmat->j; 1337 for (j=0; j<bmat->nz; j++) { 1338 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1339 } 1340 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1341 for (j=0; j<mat->N; j++) { 1342 if (tmp2[j] > *norm) *norm = tmp2[j]; 1343 } 1344 ierr = PetscFree(tmp);CHKERRQ(ierr); 1345 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1346 } else if (type == NORM_INFINITY) { /* max row norm */ 1347 PetscReal ntemp = 0.0; 1348 for (j=0; j<aij->A->m; j++) { 1349 v = amat->a + amat->i[j]; 1350 sum = 0.0; 1351 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1352 sum += PetscAbsScalar(*v); v++; 1353 } 1354 v = bmat->a + bmat->i[j]; 1355 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1356 sum += PetscAbsScalar(*v); v++; 1357 } 1358 if (sum > ntemp) ntemp = sum; 1359 } 1360 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1361 } else { 1362 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1363 } 1364 } 1365 PetscFunctionReturn(0); 1366 } 1367 1368 #undef __FUNCT__ 1369 #define __FUNCT__ "MatTranspose_MPIAIJ" 1370 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1371 { 1372 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1373 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1374 PetscErrorCode ierr; 1375 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1376 Mat B; 1377 PetscScalar *array; 1378 1379 PetscFunctionBegin; 1380 if (!matout && M != N) { 1381 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1382 } 1383 1384 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1385 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1386 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1387 1388 /* copy over the A part */ 1389 Aloc = (Mat_SeqAIJ*)a->A->data; 1390 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1391 row = a->rstart; 1392 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1393 for (i=0; i<m; i++) { 1394 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1395 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1396 } 1397 aj = Aloc->j; 1398 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1399 1400 /* copy over the B part */ 1401 Aloc = (Mat_SeqAIJ*)a->B->data; 1402 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1403 row = a->rstart; 1404 ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr); 1405 ct = cols; 1406 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1407 for (i=0; i<m; i++) { 1408 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1409 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1410 } 1411 ierr = PetscFree(ct);CHKERRQ(ierr); 1412 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1413 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1414 if (matout) { 1415 *matout = B; 1416 } else { 1417 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1418 } 1419 PetscFunctionReturn(0); 1420 } 1421 1422 #undef __FUNCT__ 1423 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1424 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1425 { 1426 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1427 Mat a = aij->A,b = aij->B; 1428 PetscErrorCode ierr; 1429 int s1,s2,s3; 1430 1431 PetscFunctionBegin; 1432 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1433 if (rr) { 1434 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1435 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1436 /* Overlap communication with computation. */ 1437 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1438 } 1439 if (ll) { 1440 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1441 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1442 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1443 } 1444 /* scale the diagonal block */ 1445 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1446 1447 if (rr) { 1448 /* Do a scatter end and then right scale the off-diagonal block */ 1449 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1450 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1451 } 1452 1453 PetscFunctionReturn(0); 1454 } 1455 1456 1457 #undef __FUNCT__ 1458 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1459 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1460 { 1461 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1462 PetscErrorCode ierr; 1463 1464 PetscFunctionBegin; 1465 if (!a->rank) { 1466 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1467 } 1468 PetscFunctionReturn(0); 1469 } 1470 1471 #undef __FUNCT__ 1472 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1473 PetscErrorCode MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1474 { 1475 PetscFunctionBegin; 1476 *bs = 1; 1477 PetscFunctionReturn(0); 1478 } 1479 #undef __FUNCT__ 1480 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1481 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1482 { 1483 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1484 PetscErrorCode ierr; 1485 1486 PetscFunctionBegin; 1487 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1488 PetscFunctionReturn(0); 1489 } 1490 1491 #undef __FUNCT__ 1492 #define __FUNCT__ "MatEqual_MPIAIJ" 1493 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1494 { 1495 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1496 Mat a,b,c,d; 1497 PetscTruth flg; 1498 PetscErrorCode ierr; 1499 1500 PetscFunctionBegin; 1501 a = matA->A; b = matA->B; 1502 c = matB->A; d = matB->B; 1503 1504 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1505 if (flg == PETSC_TRUE) { 1506 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1507 } 1508 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1509 PetscFunctionReturn(0); 1510 } 1511 1512 #undef __FUNCT__ 1513 #define __FUNCT__ "MatCopy_MPIAIJ" 1514 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1515 { 1516 PetscErrorCode ierr; 1517 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1518 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1519 1520 PetscFunctionBegin; 1521 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1522 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1523 /* because of the column compression in the off-processor part of the matrix a->B, 1524 the number of columns in a->B and b->B may be different, hence we cannot call 1525 the MatCopy() directly on the two parts. If need be, we can provide a more 1526 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1527 then copying the submatrices */ 1528 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1529 } else { 1530 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1531 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1532 } 1533 PetscFunctionReturn(0); 1534 } 1535 1536 #undef __FUNCT__ 1537 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1538 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1539 { 1540 PetscErrorCode ierr; 1541 1542 PetscFunctionBegin; 1543 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1544 PetscFunctionReturn(0); 1545 } 1546 1547 #include "petscblaslapack.h" 1548 #undef __FUNCT__ 1549 #define __FUNCT__ "MatAXPY_MPIAIJ" 1550 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1551 { 1552 PetscErrorCode ierr; 1553 int i; 1554 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1555 PetscBLASInt bnz,one=1; 1556 Mat_SeqAIJ *x,*y; 1557 1558 PetscFunctionBegin; 1559 if (str == SAME_NONZERO_PATTERN) { 1560 x = (Mat_SeqAIJ *)xx->A->data; 1561 y = (Mat_SeqAIJ *)yy->A->data; 1562 bnz = (PetscBLASInt)x->nz; 1563 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1564 x = (Mat_SeqAIJ *)xx->B->data; 1565 y = (Mat_SeqAIJ *)yy->B->data; 1566 bnz = (PetscBLASInt)x->nz; 1567 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1568 } else if (str == SUBSET_NONZERO_PATTERN) { 1569 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1570 1571 x = (Mat_SeqAIJ *)xx->B->data; 1572 y = (Mat_SeqAIJ *)yy->B->data; 1573 if (y->xtoy && y->XtoY != xx->B) { 1574 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1575 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1576 } 1577 if (!y->xtoy) { /* get xtoy */ 1578 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1579 y->XtoY = xx->B; 1580 } 1581 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1582 } else { 1583 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1584 } 1585 PetscFunctionReturn(0); 1586 } 1587 1588 /* -------------------------------------------------------------------*/ 1589 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1590 MatGetRow_MPIAIJ, 1591 MatRestoreRow_MPIAIJ, 1592 MatMult_MPIAIJ, 1593 /* 4*/ MatMultAdd_MPIAIJ, 1594 MatMultTranspose_MPIAIJ, 1595 MatMultTransposeAdd_MPIAIJ, 1596 0, 1597 0, 1598 0, 1599 /*10*/ 0, 1600 0, 1601 0, 1602 MatRelax_MPIAIJ, 1603 MatTranspose_MPIAIJ, 1604 /*15*/ MatGetInfo_MPIAIJ, 1605 MatEqual_MPIAIJ, 1606 MatGetDiagonal_MPIAIJ, 1607 MatDiagonalScale_MPIAIJ, 1608 MatNorm_MPIAIJ, 1609 /*20*/ MatAssemblyBegin_MPIAIJ, 1610 MatAssemblyEnd_MPIAIJ, 1611 0, 1612 MatSetOption_MPIAIJ, 1613 MatZeroEntries_MPIAIJ, 1614 /*25*/ MatZeroRows_MPIAIJ, 1615 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1616 MatLUFactorSymbolic_MPIAIJ_TFS, 1617 #else 1618 0, 1619 #endif 1620 0, 1621 0, 1622 0, 1623 /*30*/ MatSetUpPreallocation_MPIAIJ, 1624 0, 1625 0, 1626 0, 1627 0, 1628 /*35*/ MatDuplicate_MPIAIJ, 1629 0, 1630 0, 1631 0, 1632 0, 1633 /*40*/ MatAXPY_MPIAIJ, 1634 MatGetSubMatrices_MPIAIJ, 1635 MatIncreaseOverlap_MPIAIJ, 1636 MatGetValues_MPIAIJ, 1637 MatCopy_MPIAIJ, 1638 /*45*/ MatPrintHelp_MPIAIJ, 1639 MatScale_MPIAIJ, 1640 0, 1641 0, 1642 0, 1643 /*50*/ MatGetBlockSize_MPIAIJ, 1644 0, 1645 0, 1646 0, 1647 0, 1648 /*55*/ MatFDColoringCreate_MPIAIJ, 1649 0, 1650 MatSetUnfactored_MPIAIJ, 1651 0, 1652 0, 1653 /*60*/ MatGetSubMatrix_MPIAIJ, 1654 MatDestroy_MPIAIJ, 1655 MatView_MPIAIJ, 1656 MatGetPetscMaps_Petsc, 1657 0, 1658 /*65*/ 0, 1659 0, 1660 0, 1661 0, 1662 0, 1663 /*70*/ 0, 1664 0, 1665 MatSetColoring_MPIAIJ, 1666 MatSetValuesAdic_MPIAIJ, 1667 MatSetValuesAdifor_MPIAIJ, 1668 /*75*/ 0, 1669 0, 1670 0, 1671 0, 1672 0, 1673 /*80*/ 0, 1674 0, 1675 0, 1676 0, 1677 /*84*/ MatLoad_MPIAIJ, 1678 0, 1679 0, 1680 0, 1681 0, 1682 /*89*/ 0, 1683 MatMatMult_MPIAIJ_MPIAIJ, 1684 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1685 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1686 }; 1687 1688 /* ----------------------------------------------------------------------------------------*/ 1689 1690 EXTERN_C_BEGIN 1691 #undef __FUNCT__ 1692 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1693 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 1694 { 1695 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1696 PetscErrorCode ierr; 1697 1698 PetscFunctionBegin; 1699 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1700 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1701 PetscFunctionReturn(0); 1702 } 1703 EXTERN_C_END 1704 1705 EXTERN_C_BEGIN 1706 #undef __FUNCT__ 1707 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1708 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 1709 { 1710 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1711 PetscErrorCode ierr; 1712 1713 PetscFunctionBegin; 1714 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1715 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1716 PetscFunctionReturn(0); 1717 } 1718 EXTERN_C_END 1719 1720 #include "petscpc.h" 1721 EXTERN_C_BEGIN 1722 #undef __FUNCT__ 1723 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1724 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1725 { 1726 Mat_MPIAIJ *b; 1727 PetscErrorCode ierr; 1728 int i; 1729 1730 PetscFunctionBegin; 1731 B->preallocated = PETSC_TRUE; 1732 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1733 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1734 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1735 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1736 if (d_nnz) { 1737 for (i=0; i<B->m; i++) { 1738 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1739 } 1740 } 1741 if (o_nnz) { 1742 for (i=0; i<B->m; i++) { 1743 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1744 } 1745 } 1746 b = (Mat_MPIAIJ*)B->data; 1747 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1748 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1749 1750 PetscFunctionReturn(0); 1751 } 1752 EXTERN_C_END 1753 1754 /*MC 1755 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 1756 1757 Options Database Keys: 1758 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 1759 1760 Level: beginner 1761 1762 .seealso: MatCreateMPIAIJ 1763 M*/ 1764 1765 EXTERN_C_BEGIN 1766 #undef __FUNCT__ 1767 #define __FUNCT__ "MatCreate_MPIAIJ" 1768 PetscErrorCode MatCreate_MPIAIJ(Mat B) 1769 { 1770 Mat_MPIAIJ *b; 1771 PetscErrorCode ierr; 1772 int i,size; 1773 1774 PetscFunctionBegin; 1775 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1776 1777 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1778 B->data = (void*)b; 1779 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1780 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1781 B->factor = 0; 1782 B->assembled = PETSC_FALSE; 1783 B->mapping = 0; 1784 1785 B->insertmode = NOT_SET_VALUES; 1786 b->size = size; 1787 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1788 1789 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1790 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1791 1792 /* the information in the maps duplicates the information computed below, eventually 1793 we should remove the duplicate information that is not contained in the maps */ 1794 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1795 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1796 1797 /* build local table of row and column ownerships */ 1798 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1799 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1800 b->cowners = b->rowners + b->size + 2; 1801 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1802 b->rowners[0] = 0; 1803 for (i=2; i<=b->size; i++) { 1804 b->rowners[i] += b->rowners[i-1]; 1805 } 1806 b->rstart = b->rowners[b->rank]; 1807 b->rend = b->rowners[b->rank+1]; 1808 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1809 b->cowners[0] = 0; 1810 for (i=2; i<=b->size; i++) { 1811 b->cowners[i] += b->cowners[i-1]; 1812 } 1813 b->cstart = b->cowners[b->rank]; 1814 b->cend = b->cowners[b->rank+1]; 1815 1816 /* build cache for off array entries formed */ 1817 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1818 b->donotstash = PETSC_FALSE; 1819 b->colmap = 0; 1820 b->garray = 0; 1821 b->roworiented = PETSC_TRUE; 1822 1823 /* stuff used for matrix vector multiply */ 1824 b->lvec = PETSC_NULL; 1825 b->Mvctx = PETSC_NULL; 1826 1827 /* stuff for MatGetRow() */ 1828 b->rowindices = 0; 1829 b->rowvalues = 0; 1830 b->getrowactive = PETSC_FALSE; 1831 1832 /* Explicitly create 2 MATSEQAIJ matrices. */ 1833 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 1834 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 1835 PetscLogObjectParent(B,b->A); 1836 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 1837 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 1838 PetscLogObjectParent(B,b->B); 1839 1840 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1841 "MatStoreValues_MPIAIJ", 1842 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1843 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1844 "MatRetrieveValues_MPIAIJ", 1845 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1846 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1847 "MatGetDiagonalBlock_MPIAIJ", 1848 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1849 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 1850 "MatIsTranspose_MPIAIJ", 1851 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 1852 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 1853 "MatMPIAIJSetPreallocation_MPIAIJ", 1854 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 1855 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 1856 "MatDiagonalScaleLocal_MPIAIJ", 1857 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 1858 PetscFunctionReturn(0); 1859 } 1860 EXTERN_C_END 1861 1862 /*MC 1863 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1864 1865 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1866 and MATMPIAIJ otherwise. As a result, for single process communicators, 1867 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 1868 for communicators controlling multiple processes. It is recommended that you call both of 1869 the above preallocation routines for simplicity. 1870 1871 Options Database Keys: 1872 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 1873 1874 Level: beginner 1875 1876 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ 1877 M*/ 1878 1879 EXTERN_C_BEGIN 1880 #undef __FUNCT__ 1881 #define __FUNCT__ "MatCreate_AIJ" 1882 PetscErrorCode MatCreate_AIJ(Mat A) 1883 { 1884 PetscErrorCode ierr; 1885 int size; 1886 1887 PetscFunctionBegin; 1888 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr); 1889 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1890 if (size == 1) { 1891 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1892 } else { 1893 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1894 } 1895 PetscFunctionReturn(0); 1896 } 1897 EXTERN_C_END 1898 1899 #undef __FUNCT__ 1900 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1901 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1902 { 1903 Mat mat; 1904 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1905 PetscErrorCode ierr; 1906 1907 PetscFunctionBegin; 1908 *newmat = 0; 1909 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1910 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1911 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1912 a = (Mat_MPIAIJ*)mat->data; 1913 1914 mat->factor = matin->factor; 1915 mat->assembled = PETSC_TRUE; 1916 mat->insertmode = NOT_SET_VALUES; 1917 mat->preallocated = PETSC_TRUE; 1918 1919 a->rstart = oldmat->rstart; 1920 a->rend = oldmat->rend; 1921 a->cstart = oldmat->cstart; 1922 a->cend = oldmat->cend; 1923 a->size = oldmat->size; 1924 a->rank = oldmat->rank; 1925 a->donotstash = oldmat->donotstash; 1926 a->roworiented = oldmat->roworiented; 1927 a->rowindices = 0; 1928 a->rowvalues = 0; 1929 a->getrowactive = PETSC_FALSE; 1930 1931 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1932 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1933 if (oldmat->colmap) { 1934 #if defined (PETSC_USE_CTABLE) 1935 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1936 #else 1937 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1938 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1939 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1940 #endif 1941 } else a->colmap = 0; 1942 if (oldmat->garray) { 1943 int len; 1944 len = oldmat->B->n; 1945 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1946 PetscLogObjectMemory(mat,len*sizeof(int)); 1947 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1948 } else a->garray = 0; 1949 1950 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1951 PetscLogObjectParent(mat,a->lvec); 1952 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1953 PetscLogObjectParent(mat,a->Mvctx); 1954 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1955 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1956 PetscLogObjectParent(mat,a->A); 1957 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1958 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1959 PetscLogObjectParent(mat,a->B); 1960 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1961 *newmat = mat; 1962 PetscFunctionReturn(0); 1963 } 1964 1965 #include "petscsys.h" 1966 1967 #undef __FUNCT__ 1968 #define __FUNCT__ "MatLoad_MPIAIJ" 1969 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1970 { 1971 Mat A; 1972 PetscScalar *vals,*svals; 1973 MPI_Comm comm = ((PetscObject)viewer)->comm; 1974 MPI_Status status; 1975 PetscErrorCode ierr; 1976 int i,nz,j,rstart,rend,fd; 1977 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1978 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1979 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1980 1981 PetscFunctionBegin; 1982 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1983 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1984 if (!rank) { 1985 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1986 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1987 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1988 if (header[3] < 0) { 1989 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1990 } 1991 } 1992 1993 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1994 M = header[1]; N = header[2]; 1995 /* determine ownership of all rows */ 1996 m = M/size + ((M % size) > rank); 1997 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1998 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1999 rowners[0] = 0; 2000 for (i=2; i<=size; i++) { 2001 rowners[i] += rowners[i-1]; 2002 } 2003 rstart = rowners[rank]; 2004 rend = rowners[rank+1]; 2005 2006 /* distribute row lengths to all processors */ 2007 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 2008 offlens = ourlens + (rend-rstart); 2009 if (!rank) { 2010 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 2011 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2012 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2013 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 2014 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2015 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2016 } else { 2017 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2018 } 2019 2020 if (!rank) { 2021 /* calculate the number of nonzeros on each processor */ 2022 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2023 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2024 for (i=0; i<size; i++) { 2025 for (j=rowners[i]; j< rowners[i+1]; j++) { 2026 procsnz[i] += rowlengths[j]; 2027 } 2028 } 2029 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2030 2031 /* determine max buffer needed and allocate it */ 2032 maxnz = 0; 2033 for (i=0; i<size; i++) { 2034 maxnz = PetscMax(maxnz,procsnz[i]); 2035 } 2036 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2037 2038 /* read in my part of the matrix column indices */ 2039 nz = procsnz[0]; 2040 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 2041 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2042 2043 /* read in every one elses and ship off */ 2044 for (i=1; i<size; i++) { 2045 nz = procsnz[i]; 2046 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2047 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2048 } 2049 ierr = PetscFree(cols);CHKERRQ(ierr); 2050 } else { 2051 /* determine buffer space needed for message */ 2052 nz = 0; 2053 for (i=0; i<m; i++) { 2054 nz += ourlens[i]; 2055 } 2056 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 2057 2058 /* receive message of column indices*/ 2059 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2060 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2061 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2062 } 2063 2064 /* determine column ownership if matrix is not square */ 2065 if (N != M) { 2066 n = N/size + ((N % size) > rank); 2067 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2068 cstart = cend - n; 2069 } else { 2070 cstart = rstart; 2071 cend = rend; 2072 n = cend - cstart; 2073 } 2074 2075 /* loop over local rows, determining number of off diagonal entries */ 2076 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2077 jj = 0; 2078 for (i=0; i<m; i++) { 2079 for (j=0; j<ourlens[i]; j++) { 2080 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2081 jj++; 2082 } 2083 } 2084 2085 /* create our matrix */ 2086 for (i=0; i<m; i++) { 2087 ourlens[i] -= offlens[i]; 2088 } 2089 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2090 ierr = MatSetType(A,type);CHKERRQ(ierr); 2091 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2092 2093 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2094 for (i=0; i<m; i++) { 2095 ourlens[i] += offlens[i]; 2096 } 2097 2098 if (!rank) { 2099 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2100 2101 /* read in my part of the matrix numerical values */ 2102 nz = procsnz[0]; 2103 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2104 2105 /* insert into matrix */ 2106 jj = rstart; 2107 smycols = mycols; 2108 svals = vals; 2109 for (i=0; i<m; i++) { 2110 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2111 smycols += ourlens[i]; 2112 svals += ourlens[i]; 2113 jj++; 2114 } 2115 2116 /* read in other processors and ship out */ 2117 for (i=1; i<size; i++) { 2118 nz = procsnz[i]; 2119 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2120 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2121 } 2122 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2123 } else { 2124 /* receive numeric values */ 2125 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2126 2127 /* receive message of values*/ 2128 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2129 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2130 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2131 2132 /* insert into matrix */ 2133 jj = rstart; 2134 smycols = mycols; 2135 svals = vals; 2136 for (i=0; i<m; i++) { 2137 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2138 smycols += ourlens[i]; 2139 svals += ourlens[i]; 2140 jj++; 2141 } 2142 } 2143 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2144 ierr = PetscFree(vals);CHKERRQ(ierr); 2145 ierr = PetscFree(mycols);CHKERRQ(ierr); 2146 ierr = PetscFree(rowners);CHKERRQ(ierr); 2147 2148 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2149 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2150 *newmat = A; 2151 PetscFunctionReturn(0); 2152 } 2153 2154 #undef __FUNCT__ 2155 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2156 /* 2157 Not great since it makes two copies of the submatrix, first an SeqAIJ 2158 in local and then by concatenating the local matrices the end result. 2159 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2160 */ 2161 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2162 { 2163 PetscErrorCode ierr; 2164 int i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2165 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2166 Mat *local,M,Mreuse; 2167 PetscScalar *vwork,*aa; 2168 MPI_Comm comm = mat->comm; 2169 Mat_SeqAIJ *aij; 2170 2171 2172 PetscFunctionBegin; 2173 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2174 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2175 2176 if (call == MAT_REUSE_MATRIX) { 2177 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2178 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2179 local = &Mreuse; 2180 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2181 } else { 2182 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2183 Mreuse = *local; 2184 ierr = PetscFree(local);CHKERRQ(ierr); 2185 } 2186 2187 /* 2188 m - number of local rows 2189 n - number of columns (same on all processors) 2190 rstart - first row in new global matrix generated 2191 */ 2192 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2193 if (call == MAT_INITIAL_MATRIX) { 2194 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2195 ii = aij->i; 2196 jj = aij->j; 2197 2198 /* 2199 Determine the number of non-zeros in the diagonal and off-diagonal 2200 portions of the matrix in order to do correct preallocation 2201 */ 2202 2203 /* first get start and end of "diagonal" columns */ 2204 if (csize == PETSC_DECIDE) { 2205 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2206 if (mglobal == n) { /* square matrix */ 2207 nlocal = m; 2208 } else { 2209 nlocal = n/size + ((n % size) > rank); 2210 } 2211 } else { 2212 nlocal = csize; 2213 } 2214 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2215 rstart = rend - nlocal; 2216 if (rank == size - 1 && rend != n) { 2217 SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n); 2218 } 2219 2220 /* next, compute all the lengths */ 2221 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2222 olens = dlens + m; 2223 for (i=0; i<m; i++) { 2224 jend = ii[i+1] - ii[i]; 2225 olen = 0; 2226 dlen = 0; 2227 for (j=0; j<jend; j++) { 2228 if (*jj < rstart || *jj >= rend) olen++; 2229 else dlen++; 2230 jj++; 2231 } 2232 olens[i] = olen; 2233 dlens[i] = dlen; 2234 } 2235 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2236 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2237 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2238 ierr = PetscFree(dlens);CHKERRQ(ierr); 2239 } else { 2240 int ml,nl; 2241 2242 M = *newmat; 2243 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2244 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2245 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2246 /* 2247 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2248 rather than the slower MatSetValues(). 2249 */ 2250 M->was_assembled = PETSC_TRUE; 2251 M->assembled = PETSC_FALSE; 2252 } 2253 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2254 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2255 ii = aij->i; 2256 jj = aij->j; 2257 aa = aij->a; 2258 for (i=0; i<m; i++) { 2259 row = rstart + i; 2260 nz = ii[i+1] - ii[i]; 2261 cwork = jj; jj += nz; 2262 vwork = aa; aa += nz; 2263 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2264 } 2265 2266 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2267 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2268 *newmat = M; 2269 2270 /* save submatrix used in processor for next request */ 2271 if (call == MAT_INITIAL_MATRIX) { 2272 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2273 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2274 } 2275 2276 PetscFunctionReturn(0); 2277 } 2278 2279 #undef __FUNCT__ 2280 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2281 /*@C 2282 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2283 (the default parallel PETSc format). For good matrix assembly performance 2284 the user should preallocate the matrix storage by setting the parameters 2285 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2286 performance can be increased by more than a factor of 50. 2287 2288 Collective on MPI_Comm 2289 2290 Input Parameters: 2291 + A - the matrix 2292 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2293 (same value is used for all local rows) 2294 . d_nnz - array containing the number of nonzeros in the various rows of the 2295 DIAGONAL portion of the local submatrix (possibly different for each row) 2296 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2297 The size of this array is equal to the number of local rows, i.e 'm'. 2298 You must leave room for the diagonal entry even if it is zero. 2299 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2300 submatrix (same value is used for all local rows). 2301 - o_nnz - array containing the number of nonzeros in the various rows of the 2302 OFF-DIAGONAL portion of the local submatrix (possibly different for 2303 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2304 structure. The size of this array is equal to the number 2305 of local rows, i.e 'm'. 2306 2307 The AIJ format (also called the Yale sparse matrix format or 2308 compressed row storage), is fully compatible with standard Fortran 77 2309 storage. That is, the stored row and column indices can begin at 2310 either one (as in Fortran) or zero. See the users manual for details. 2311 2312 The user MUST specify either the local or global matrix dimensions 2313 (possibly both). 2314 2315 The parallel matrix is partitioned such that the first m0 rows belong to 2316 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2317 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2318 2319 The DIAGONAL portion of the local submatrix of a processor can be defined 2320 as the submatrix which is obtained by extraction the part corresponding 2321 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2322 first row that belongs to the processor, and r2 is the last row belonging 2323 to the this processor. This is a square mxm matrix. The remaining portion 2324 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2325 2326 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2327 2328 By default, this format uses inodes (identical nodes) when possible. 2329 We search for consecutive rows with the same nonzero structure, thereby 2330 reusing matrix information to achieve increased efficiency. 2331 2332 Options Database Keys: 2333 + -mat_aij_no_inode - Do not use inodes 2334 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2335 - -mat_aij_oneindex - Internally use indexing starting at 1 2336 rather than 0. Note that when calling MatSetValues(), 2337 the user still MUST index entries starting at 0! 2338 2339 Example usage: 2340 2341 Consider the following 8x8 matrix with 34 non-zero values, that is 2342 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2343 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2344 as follows: 2345 2346 .vb 2347 1 2 0 | 0 3 0 | 0 4 2348 Proc0 0 5 6 | 7 0 0 | 8 0 2349 9 0 10 | 11 0 0 | 12 0 2350 ------------------------------------- 2351 13 0 14 | 15 16 17 | 0 0 2352 Proc1 0 18 0 | 19 20 21 | 0 0 2353 0 0 0 | 22 23 0 | 24 0 2354 ------------------------------------- 2355 Proc2 25 26 27 | 0 0 28 | 29 0 2356 30 0 0 | 31 32 33 | 0 34 2357 .ve 2358 2359 This can be represented as a collection of submatrices as: 2360 2361 .vb 2362 A B C 2363 D E F 2364 G H I 2365 .ve 2366 2367 Where the submatrices A,B,C are owned by proc0, D,E,F are 2368 owned by proc1, G,H,I are owned by proc2. 2369 2370 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2371 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2372 The 'M','N' parameters are 8,8, and have the same values on all procs. 2373 2374 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2375 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2376 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2377 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2378 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2379 matrix, ans [DF] as another SeqAIJ matrix. 2380 2381 When d_nz, o_nz parameters are specified, d_nz storage elements are 2382 allocated for every row of the local diagonal submatrix, and o_nz 2383 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2384 One way to choose d_nz and o_nz is to use the max nonzerors per local 2385 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2386 In this case, the values of d_nz,o_nz are: 2387 .vb 2388 proc0 : dnz = 2, o_nz = 2 2389 proc1 : dnz = 3, o_nz = 2 2390 proc2 : dnz = 1, o_nz = 4 2391 .ve 2392 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2393 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2394 for proc3. i.e we are using 12+15+10=37 storage locations to store 2395 34 values. 2396 2397 When d_nnz, o_nnz parameters are specified, the storage is specified 2398 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2399 In the above case the values for d_nnz,o_nnz are: 2400 .vb 2401 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2402 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2403 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2404 .ve 2405 Here the space allocated is sum of all the above values i.e 34, and 2406 hence pre-allocation is perfect. 2407 2408 Level: intermediate 2409 2410 .keywords: matrix, aij, compressed row, sparse, parallel 2411 2412 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2413 @*/ 2414 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2415 { 2416 PetscErrorCode ierr,(*f)(Mat,int,const int[],int,const int[]); 2417 2418 PetscFunctionBegin; 2419 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2420 if (f) { 2421 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2422 } 2423 PetscFunctionReturn(0); 2424 } 2425 2426 #undef __FUNCT__ 2427 #define __FUNCT__ "MatCreateMPIAIJ" 2428 /*@C 2429 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2430 (the default parallel PETSc format). For good matrix assembly performance 2431 the user should preallocate the matrix storage by setting the parameters 2432 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2433 performance can be increased by more than a factor of 50. 2434 2435 Collective on MPI_Comm 2436 2437 Input Parameters: 2438 + comm - MPI communicator 2439 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2440 This value should be the same as the local size used in creating the 2441 y vector for the matrix-vector product y = Ax. 2442 . n - This value should be the same as the local size used in creating the 2443 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2444 calculated if N is given) For square matrices n is almost always m. 2445 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2446 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2447 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2448 (same value is used for all local rows) 2449 . d_nnz - array containing the number of nonzeros in the various rows of the 2450 DIAGONAL portion of the local submatrix (possibly different for each row) 2451 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2452 The size of this array is equal to the number of local rows, i.e 'm'. 2453 You must leave room for the diagonal entry even if it is zero. 2454 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2455 submatrix (same value is used for all local rows). 2456 - o_nnz - array containing the number of nonzeros in the various rows of the 2457 OFF-DIAGONAL portion of the local submatrix (possibly different for 2458 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2459 structure. The size of this array is equal to the number 2460 of local rows, i.e 'm'. 2461 2462 Output Parameter: 2463 . A - the matrix 2464 2465 Notes: 2466 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2467 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2468 storage requirements for this matrix. 2469 2470 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2471 processor than it must be used on all processors that share the object for 2472 that argument. 2473 2474 The AIJ format (also called the Yale sparse matrix format or 2475 compressed row storage), is fully compatible with standard Fortran 77 2476 storage. That is, the stored row and column indices can begin at 2477 either one (as in Fortran) or zero. See the users manual for details. 2478 2479 The user MUST specify either the local or global matrix dimensions 2480 (possibly both). 2481 2482 The parallel matrix is partitioned such that the first m0 rows belong to 2483 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2484 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2485 2486 The DIAGONAL portion of the local submatrix of a processor can be defined 2487 as the submatrix which is obtained by extraction the part corresponding 2488 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2489 first row that belongs to the processor, and r2 is the last row belonging 2490 to the this processor. This is a square mxm matrix. The remaining portion 2491 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2492 2493 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2494 2495 When calling this routine with a single process communicator, a matrix of 2496 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2497 type of communicator, use the construction mechanism: 2498 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2499 2500 By default, this format uses inodes (identical nodes) when possible. 2501 We search for consecutive rows with the same nonzero structure, thereby 2502 reusing matrix information to achieve increased efficiency. 2503 2504 Options Database Keys: 2505 + -mat_aij_no_inode - Do not use inodes 2506 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2507 - -mat_aij_oneindex - Internally use indexing starting at 1 2508 rather than 0. Note that when calling MatSetValues(), 2509 the user still MUST index entries starting at 0! 2510 2511 2512 Example usage: 2513 2514 Consider the following 8x8 matrix with 34 non-zero values, that is 2515 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2516 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2517 as follows: 2518 2519 .vb 2520 1 2 0 | 0 3 0 | 0 4 2521 Proc0 0 5 6 | 7 0 0 | 8 0 2522 9 0 10 | 11 0 0 | 12 0 2523 ------------------------------------- 2524 13 0 14 | 15 16 17 | 0 0 2525 Proc1 0 18 0 | 19 20 21 | 0 0 2526 0 0 0 | 22 23 0 | 24 0 2527 ------------------------------------- 2528 Proc2 25 26 27 | 0 0 28 | 29 0 2529 30 0 0 | 31 32 33 | 0 34 2530 .ve 2531 2532 This can be represented as a collection of submatrices as: 2533 2534 .vb 2535 A B C 2536 D E F 2537 G H I 2538 .ve 2539 2540 Where the submatrices A,B,C are owned by proc0, D,E,F are 2541 owned by proc1, G,H,I are owned by proc2. 2542 2543 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2544 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2545 The 'M','N' parameters are 8,8, and have the same values on all procs. 2546 2547 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2548 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2549 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2550 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2551 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2552 matrix, ans [DF] as another SeqAIJ matrix. 2553 2554 When d_nz, o_nz parameters are specified, d_nz storage elements are 2555 allocated for every row of the local diagonal submatrix, and o_nz 2556 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2557 One way to choose d_nz and o_nz is to use the max nonzerors per local 2558 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2559 In this case, the values of d_nz,o_nz are: 2560 .vb 2561 proc0 : dnz = 2, o_nz = 2 2562 proc1 : dnz = 3, o_nz = 2 2563 proc2 : dnz = 1, o_nz = 4 2564 .ve 2565 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2566 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2567 for proc3. i.e we are using 12+15+10=37 storage locations to store 2568 34 values. 2569 2570 When d_nnz, o_nnz parameters are specified, the storage is specified 2571 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2572 In the above case the values for d_nnz,o_nnz are: 2573 .vb 2574 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2575 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2576 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2577 .ve 2578 Here the space allocated is sum of all the above values i.e 34, and 2579 hence pre-allocation is perfect. 2580 2581 Level: intermediate 2582 2583 .keywords: matrix, aij, compressed row, sparse, parallel 2584 2585 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2586 @*/ 2587 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2588 { 2589 PetscErrorCode ierr; 2590 int size; 2591 2592 PetscFunctionBegin; 2593 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2594 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2595 if (size > 1) { 2596 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2597 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2598 } else { 2599 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2600 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2601 } 2602 PetscFunctionReturn(0); 2603 } 2604 2605 #undef __FUNCT__ 2606 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2607 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2608 { 2609 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2610 PetscFunctionBegin; 2611 *Ad = a->A; 2612 *Ao = a->B; 2613 *colmap = a->garray; 2614 PetscFunctionReturn(0); 2615 } 2616 2617 #undef __FUNCT__ 2618 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2619 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2620 { 2621 PetscErrorCode ierr; 2622 int i; 2623 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2624 2625 PetscFunctionBegin; 2626 if (coloring->ctype == IS_COLORING_LOCAL) { 2627 ISColoringValue *allcolors,*colors; 2628 ISColoring ocoloring; 2629 2630 /* set coloring for diagonal portion */ 2631 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2632 2633 /* set coloring for off-diagonal portion */ 2634 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2635 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2636 for (i=0; i<a->B->n; i++) { 2637 colors[i] = allcolors[a->garray[i]]; 2638 } 2639 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2640 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2641 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2642 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2643 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2644 ISColoringValue *colors; 2645 int *larray; 2646 ISColoring ocoloring; 2647 2648 /* set coloring for diagonal portion */ 2649 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2650 for (i=0; i<a->A->n; i++) { 2651 larray[i] = i + a->cstart; 2652 } 2653 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2654 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2655 for (i=0; i<a->A->n; i++) { 2656 colors[i] = coloring->colors[larray[i]]; 2657 } 2658 ierr = PetscFree(larray);CHKERRQ(ierr); 2659 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2660 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2661 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2662 2663 /* set coloring for off-diagonal portion */ 2664 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2665 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2666 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2667 for (i=0; i<a->B->n; i++) { 2668 colors[i] = coloring->colors[larray[i]]; 2669 } 2670 ierr = PetscFree(larray);CHKERRQ(ierr); 2671 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2672 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2673 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2674 } else { 2675 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2676 } 2677 2678 PetscFunctionReturn(0); 2679 } 2680 2681 #undef __FUNCT__ 2682 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2683 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2684 { 2685 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2686 PetscErrorCode ierr; 2687 2688 PetscFunctionBegin; 2689 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2690 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2691 PetscFunctionReturn(0); 2692 } 2693 2694 #undef __FUNCT__ 2695 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2696 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2697 { 2698 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2699 PetscErrorCode ierr; 2700 2701 PetscFunctionBegin; 2702 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2703 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2704 PetscFunctionReturn(0); 2705 } 2706 2707 #undef __FUNCT__ 2708 #define __FUNCT__ "MatMerge" 2709 /*@C 2710 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2711 matrices from each processor 2712 2713 Collective on MPI_Comm 2714 2715 Input Parameters: 2716 + comm - the communicators the parallel matrix will live on 2717 . inmat - the input sequential matrices 2718 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2719 2720 Output Parameter: 2721 . outmat - the parallel matrix generated 2722 2723 Level: advanced 2724 2725 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2726 2727 @*/ 2728 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,MatReuse scall,Mat *outmat) 2729 { 2730 PetscErrorCode ierr; 2731 int m,n,i,rstart,nnz,I,*dnz,*onz; 2732 const int *indx; 2733 const PetscScalar *values; 2734 PetscMap columnmap,rowmap; 2735 2736 PetscFunctionBegin; 2737 2738 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2739 if (scall == MAT_INITIAL_MATRIX){ 2740 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2741 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2742 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2743 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2744 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2745 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2746 2747 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2748 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2749 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2750 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2751 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2752 2753 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2754 for (i=0;i<m;i++) { 2755 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2756 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2757 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2758 } 2759 /* This routine will ONLY return MPIAIJ type matrix */ 2760 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2761 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2762 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2763 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2764 2765 } else if (scall == MAT_REUSE_MATRIX){ 2766 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2767 } else { 2768 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 2769 } 2770 2771 for (i=0;i<m;i++) { 2772 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2773 I = i + rstart; 2774 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2775 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2776 } 2777 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2778 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2779 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2780 2781 PetscFunctionReturn(0); 2782 } 2783 2784 #undef __FUNCT__ 2785 #define __FUNCT__ "MatFileSplit" 2786 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2787 { 2788 PetscErrorCode ierr; 2789 int rank,m,N,i,rstart,nnz; 2790 size_t len; 2791 const int *indx; 2792 PetscViewer out; 2793 char *name; 2794 Mat B; 2795 const PetscScalar *values; 2796 2797 PetscFunctionBegin; 2798 2799 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2800 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2801 /* Should this be the type of the diagonal block of A? */ 2802 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2803 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2804 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2805 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2806 for (i=0;i<m;i++) { 2807 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2808 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2809 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2810 } 2811 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2812 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2813 2814 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2815 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2816 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2817 sprintf(name,"%s.%d",outfile,rank); 2818 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2819 ierr = PetscFree(name); 2820 ierr = MatView(B,out);CHKERRQ(ierr); 2821 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2822 ierr = MatDestroy(B);CHKERRQ(ierr); 2823 PetscFunctionReturn(0); 2824 } 2825 2826 typedef struct { /* used by MatMerge_SeqsToMPI for reusing the merged matrix */ 2827 PetscMap rowmap; 2828 int nrecv,nsend,*id_r,*bi,*bj,**ijbuf_r; 2829 int *len_sra; /* array of length 2*size, len_sra[i]/len_sra[size+i] 2830 store length of i-th send/recv matrix values */ 2831 MatScalar *abuf_s; 2832 } Mat_Merge_SeqsToMPI; 2833 2834 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2835 #undef __FUNCT__ 2836 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2837 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2838 { 2839 PetscErrorCode ierr; 2840 Mat_Merge_SeqsToMPI *merge=(Mat_Merge_SeqsToMPI*)A->spptr; 2841 2842 PetscFunctionBegin; 2843 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2844 ierr = PetscFree(merge->abuf_s);CHKERRQ(ierr); 2845 ierr = PetscFree(merge->len_sra);CHKERRQ(ierr); 2846 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2847 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2848 ierr = PetscFree(merge->ijbuf_r);CHKERRQ(ierr); 2849 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2850 ierr = PetscFree(merge);CHKERRQ(ierr); 2851 2852 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2853 PetscFunctionReturn(0); 2854 } 2855 2856 #include "src/mat/utils/freespace.h" 2857 #undef __FUNCT__ 2858 #define __FUNCT__ "MatMerge_SeqsToMPI" 2859 /*@C 2860 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2861 matrices from each processor 2862 2863 Collective on MPI_Comm 2864 2865 Input Parameters: 2866 + comm - the communicators the parallel matrix will live on 2867 . seqmat - the input sequential matrices 2868 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2869 2870 Output Parameter: 2871 . mpimat - the parallel matrix generated 2872 2873 Level: advanced 2874 2875 Notes: The dimensions of the sequential matrix in each processor 2876 MUST be the same. 2877 2878 @*/ 2879 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,MatReuse scall,Mat *mpimat) 2880 { 2881 PetscErrorCode ierr; 2882 Mat B_seq,B_mpi; 2883 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2884 int size,rank,M=seqmat->m,N=seqmat->n,m,i,j,*owners, 2885 *ai=a->i,*aj=a->j,tag,taga,len,len_a,*len_s,*len_sa,proc,*len_r,*len_ra, 2886 **ijbuf_r,*ijbuf_s,*nnz_ptr,k,anzi,*bj_i, 2887 *bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0,nextaj; 2888 MPI_Request *s_waits,*r_waits,*s_waitsa,*r_waitsa; 2889 MPI_Status *status; 2890 MatScalar *ba,*aa=a->a,**abuf_r,*abuf_i,*ba_i; 2891 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2892 Mat_Merge_SeqsToMPI *merge; 2893 2894 PetscFunctionBegin; 2895 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2896 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2897 if (scall == MAT_INITIAL_MATRIX){ 2898 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 2899 } else if (scall == MAT_REUSE_MATRIX){ 2900 B_mpi = *mpimat; 2901 merge = (Mat_Merge_SeqsToMPI*)B_mpi->spptr; 2902 bi = merge->bi; 2903 bj = merge->bj; 2904 ijbuf_r = merge->ijbuf_r; 2905 } else { 2906 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 2907 } 2908 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2909 2910 /* determine the number of messages to send, their lengths */ 2911 /*---------------------------------------------------------*/ 2912 if (scall == MAT_INITIAL_MATRIX){ 2913 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 2914 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 2915 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 2916 ierr = PetscMalloc(size*sizeof(int),&len_s);CHKERRQ(ierr); 2917 ierr = PetscMalloc(2*size*sizeof(int),&merge->len_sra);CHKERRQ(ierr); 2918 } 2919 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2920 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2921 2922 len_sa = merge->len_sra; 2923 len_ra = merge->len_sra + size; 2924 2925 if (scall == MAT_INITIAL_MATRIX){ 2926 merge->nsend = 0; 2927 len = len_a = 0; 2928 for (proc=0; proc<size; proc++){ 2929 if (proc == rank) continue; 2930 len_s[proc] = len_sa[proc] = 0; 2931 for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */ 2932 len_sa[proc] += ai[i+1] - ai[i]; /* num of cols sent to [proc] */ 2933 } 2934 if (len_sa[proc]) { 2935 merge->nsend++; 2936 len_s[proc] = len_sa[proc] + owners[proc+1] - owners[proc]; 2937 if (len < len_s[proc]) len = len_s[proc]; 2938 if (len_a < len_sa[proc]) len_a = len_sa[proc]; 2939 } 2940 } 2941 2942 /* determine the number and length of messages to receive */ 2943 /*--------------------------------------------------------*/ 2944 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 2945 ierr = PetscGatherMessageLengths(comm,merge->nsend,merge->nrecv,len_s,&merge->id_r,&len_r);CHKERRQ(ierr); 2946 2947 /* post the Irecvs corresponding to these messages */ 2948 /*-------------------------------------------------*/ 2949 /* get new tag to keep the communication clean */ 2950 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tag);CHKERRQ(ierr); 2951 ierr = PetscPostIrecvInt(comm,tag,merge->nrecv,merge->id_r,len_r,&ijbuf_r,&r_waits);CHKERRQ(ierr); 2952 2953 /* post the sends */ 2954 /*----------------*/ 2955 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2956 ierr = PetscMalloc((len+1)*sizeof(int),&ijbuf_s);CHKERRQ(ierr); 2957 k = 0; 2958 for (proc=0; proc<size; proc++){ 2959 if (!len_s[proc]) continue; 2960 /* form outgoing ij data to be sent to [proc] */ 2961 nnz_ptr = ijbuf_s + owners[proc+1] - owners[proc]; 2962 for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */ 2963 anzi = ai[i+1] - ai[i]; 2964 if (i==owners[proc]){ 2965 ijbuf_s[i-owners[proc]] = anzi; 2966 } else { 2967 ijbuf_s[i-owners[proc]] = ijbuf_s[i-owners[proc]-1]+ anzi; 2968 } 2969 for (j=0; j <anzi; j++){ 2970 *nnz_ptr = *(aj+ai[i]+j); nnz_ptr++; 2971 } 2972 } 2973 ierr = MPI_Isend(ijbuf_s,len_s[proc],MPI_INT,proc,tag,comm,s_waits+k);CHKERRQ(ierr); 2974 k++; 2975 } 2976 2977 /* receives and sends of ij-structure are complete, deallocate memories */ 2978 /*----------------------------------------------------------------------*/ 2979 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 2980 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 2981 2982 ierr = PetscFree(ijbuf_s);CHKERRQ(ierr); 2983 ierr = PetscFree(s_waits);CHKERRQ(ierr); 2984 ierr = PetscFree(r_waits);CHKERRQ(ierr); 2985 } 2986 2987 /* send and recv matrix values */ 2988 /*-----------------------------*/ 2989 if (scall == MAT_INITIAL_MATRIX){ 2990 ierr = PetscMalloc((len_a+1)*sizeof(MatScalar),&merge->abuf_s);CHKERRQ(ierr); 2991 for (i=0; i<merge->nrecv; i++) len_ra[i] = len_r[i]-m; /* length of seqmat->a to be received from a proc */ 2992 } 2993 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2994 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,len_ra,&abuf_r,&r_waitsa);CHKERRQ(ierr); 2995 2996 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waitsa);CHKERRQ(ierr); 2997 k = 0; 2998 for (proc=0; proc<size; proc++){ 2999 if (!len_sa[proc]) continue; 3000 /* form outgoing mat value data to be sent to [proc] */ 3001 abuf_i = merge->abuf_s; 3002 for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */ 3003 anzi = ai[i+1] - ai[i]; 3004 for (j=0; j <anzi; j++){ 3005 *abuf_i = *(aa+ai[i]+j); abuf_i++; 3006 } 3007 } 3008 ierr = MPI_Isend(merge->abuf_s,len_sa[proc],MPIU_MATSCALAR,proc,taga,comm,s_waitsa+k);CHKERRQ(ierr); 3009 k++; 3010 } 3011 3012 ierr = MPI_Waitall(merge->nrecv,r_waitsa,status);CHKERRQ(ierr); 3013 ierr = MPI_Waitall(merge->nsend,s_waitsa,status);CHKERRQ(ierr); 3014 ierr = PetscFree(status);CHKERRQ(ierr); 3015 3016 ierr = PetscFree(s_waitsa);CHKERRQ(ierr); 3017 ierr = PetscFree(r_waitsa);CHKERRQ(ierr); 3018 3019 /* create seq matrix B_seq in each processor */ 3020 /*-------------------------------------------*/ 3021 if (scall == MAT_INITIAL_MATRIX){ 3022 ierr = PetscFree(len_s);CHKERRQ(ierr); 3023 /* allocate bi array and free space for accumulating nonzero column info */ 3024 ierr = PetscMalloc((m+1)*sizeof(int),&bi);CHKERRQ(ierr); 3025 bi[0] = 0; 3026 3027 ierr = PetscMalloc((N+1)*sizeof(int),&lnk);CHKERRQ(ierr); 3028 ierr = PetscLLInitialize(N,N);CHKERRQ(ierr); 3029 3030 /* initial FreeSpace size is (nproc)*(num of local nnz(seqmat)) */ 3031 len = 0; 3032 for (i=owners[rank]; i<owners[rank+1]; i++) len += ai[i+1] - ai[i]; 3033 ierr = GetMoreSpace((int)(size*len+1),&free_space);CHKERRQ(ierr); 3034 current_space = free_space; 3035 3036 /* determine symbolic info for each row of B_seq */ 3037 for (i=0;i<m;i++) { 3038 bnzi = 0; 3039 /* add local non-zero cols of this proc's seqmat into lnk */ 3040 arow = owners[rank] + i; 3041 anzi = ai[arow+1] - ai[arow]; 3042 aj = a->j + ai[arow]; 3043 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk);CHKERRQ(ierr); 3044 bnzi += nlnk; 3045 /* add received col data into lnk */ 3046 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3047 /* i-th row */ 3048 if (i == 0){ 3049 anzi = *(ijbuf_r[k]+i); 3050 aj = ijbuf_r[k] + m; 3051 } else { 3052 anzi = *(ijbuf_r[k]+i) - *(ijbuf_r[k]+i-1); 3053 aj = ijbuf_r[k]+m + *(ijbuf_r[k]+i-1); 3054 } 3055 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk);CHKERRQ(ierr); 3056 bnzi += nlnk; 3057 } 3058 3059 /* if free space is not available, make more free space */ 3060 if (current_space->local_remaining<bnzi) { 3061 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3062 nspacedouble++; 3063 } 3064 /* copy data into free space, then initialize lnk */ 3065 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array);CHKERRQ(ierr); 3066 current_space->array += bnzi; 3067 current_space->local_used += bnzi; 3068 current_space->local_remaining -= bnzi; 3069 3070 bi[i+1] = bi[i] + bnzi; 3071 } 3072 ierr = PetscMalloc((bi[m]+1)*sizeof(int),&bj);CHKERRQ(ierr); 3073 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3074 3075 ierr = PetscFree(lnk);CHKERRQ(ierr); 3076 ierr = PetscFree(len_r);CHKERRQ(ierr); 3077 3078 /* allocate space for ba */ 3079 ierr = PetscMalloc((bi[m]+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr); 3080 ierr = PetscMemzero(ba,(bi[m]+1)*sizeof(MatScalar));CHKERRQ(ierr); 3081 3082 /* put together B_seq */ 3083 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,bi,bj,ba,&B_seq);CHKERRQ(ierr); 3084 ierr = PetscFree(ba);CHKERRQ(ierr); /* bi and bj are saved for reuse */ 3085 3086 /* create symbolic parallel matrix B_mpi by concatinating B_seq */ 3087 /*--------------------------------------------------------------*/ 3088 ierr = MatMerge(comm,B_seq,scall,&B_mpi);CHKERRQ(ierr); 3089 } /* endof if (scall == MAT_INITIAL_MATRIX) */ 3090 3091 /* insert mat values of B_mpi */ 3092 /*----------------------------*/ 3093 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3094 3095 /* set values of ba */ 3096 for (i=0; i<m; i++) { 3097 arow = owners[rank] + i; 3098 bj_i = bj+bi[i]; /* col indices of the i-th row of B_seq */ 3099 bnzi = bi[i+1] - bi[i]; 3100 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3101 3102 /* add local non-zero vals of this proc's seqmat into ba */ 3103 anzi = ai[arow+1] - ai[arow]; 3104 aj = a->j + ai[arow]; 3105 aa = a->a + ai[arow]; 3106 nextaj = 0; 3107 for (j=0; nextaj<anzi; j++){ 3108 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3109 ba_i[j] += aa[nextaj++]; 3110 } 3111 } 3112 3113 /* add received vals into ba */ 3114 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3115 /* i-th row */ 3116 if (i == 0){ 3117 anzi = *(ijbuf_r[k]+i); 3118 aj = ijbuf_r[k] + m; 3119 aa = abuf_r[k]; 3120 } else { 3121 anzi = *(ijbuf_r[k]+i) - *(ijbuf_r[k]+i-1); 3122 aj = ijbuf_r[k]+m + *(ijbuf_r[k]+i-1); 3123 aa = abuf_r[k] + *(ijbuf_r[k]+i-1); 3124 } 3125 nextaj = 0; 3126 for (j=0; nextaj<anzi; j++){ 3127 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3128 ba_i[j] += aa[nextaj++]; 3129 } 3130 } 3131 } 3132 3133 ierr = MatSetValues(B_mpi,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3134 } 3135 ierr = MatAssemblyBegin(B_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3136 ierr = MatAssemblyEnd(B_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3137 3138 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3139 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3140 3141 /* attach the supporting struct to B_mpi for reuse */ 3142 /*-------------------------------------------------*/ 3143 if (scall == MAT_INITIAL_MATRIX){ 3144 B_mpi->spptr = (void*)merge; 3145 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3146 merge->bi = bi; 3147 merge->bj = bj; 3148 merge->ijbuf_r = ijbuf_r; 3149 3150 *mpimat = B_mpi; 3151 } 3152 PetscFunctionReturn(0); 3153 } 3154