1 2 #include "src/mat/impls/aij/mpi/mpiaij.h" 3 #include "src/inline/spops.h" 4 5 /* 6 Local utility routine that creates a mapping from the global column 7 number to the local number in the off-diagonal part of the local 8 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 9 a slightly higher hash table cost; without it it is not scalable (each processor 10 has an order N integer array but is fast to acess. 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 17 PetscErrorCode ierr; 18 int n = aij->B->n,i; 19 20 PetscFunctionBegin; 21 #if defined (PETSC_USE_CTABLE) 22 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 23 for (i=0; i<n; i++){ 24 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 25 } 26 #else 27 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 28 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 29 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 30 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 31 #endif 32 PetscFunctionReturn(0); 33 } 34 35 #define CHUNKSIZE 15 36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 40 rmax = aimax[row]; nrow = ailen[row]; \ 41 col1 = col - shift; \ 42 \ 43 low = 0; high = nrow; \ 44 while (high-low > 5) { \ 45 t = (low+high)/2; \ 46 if (rp[t] > col) high = t; \ 47 else low = t; \ 48 } \ 49 for (_i=low; _i<high; _i++) { \ 50 if (rp[_i] > col1) break; \ 51 if (rp[_i] == col1) { \ 52 if (addv == ADD_VALUES) ap[_i] += value; \ 53 else ap[_i] = value; \ 54 goto a_noinsert; \ 55 } \ 56 } \ 57 if (nonew == 1) goto a_noinsert; \ 58 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 59 if (nrow >= rmax) { \ 60 /* there is no extra room in row, therefore enlarge */ \ 61 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 62 PetscScalar *new_a; \ 63 \ 64 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 65 \ 66 /* malloc new storage space */ \ 67 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 68 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 69 new_j = (int*)(new_a + new_nz); \ 70 new_i = new_j + new_nz; \ 71 \ 72 /* copy over old data into new slots */ \ 73 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 74 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 75 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 76 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 77 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 78 len*sizeof(int));CHKERRQ(ierr); \ 79 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 81 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 82 /* free up old matrix storage */ \ 83 \ 84 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 85 if (!a->singlemalloc) { \ 86 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 87 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 88 } \ 89 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 90 a->singlemalloc = PETSC_TRUE; \ 91 \ 92 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 93 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 94 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 95 a->maxnz += CHUNKSIZE; \ 96 a->reallocs++; \ 97 } \ 98 N = nrow++ - 1; a->nz++; \ 99 /* shift up all the later entries in this row */ \ 100 for (ii=N; ii>=_i; ii--) { \ 101 rp[ii+1] = rp[ii]; \ 102 ap[ii+1] = ap[ii]; \ 103 } \ 104 rp[_i] = col1; \ 105 ap[_i] = value; \ 106 a_noinsert: ; \ 107 ailen[row] = nrow; \ 108 } 109 110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 111 { \ 112 \ 113 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 114 rmax = bimax[row]; nrow = bilen[row]; \ 115 col1 = col - shift; \ 116 \ 117 low = 0; high = nrow; \ 118 while (high-low > 5) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > col) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > col1) break; \ 125 if (rp[_i] == col1) { \ 126 if (addv == ADD_VALUES) ap[_i] += value; \ 127 else ap[_i] = value; \ 128 goto b_noinsert; \ 129 } \ 130 } \ 131 if (nonew == 1) goto b_noinsert; \ 132 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 133 if (nrow >= rmax) { \ 134 /* there is no extra room in row, therefore enlarge */ \ 135 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 136 PetscScalar *new_a; \ 137 \ 138 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 139 \ 140 /* malloc new storage space */ \ 141 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 142 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 143 new_j = (int*)(new_a + new_nz); \ 144 new_i = new_j + new_nz; \ 145 \ 146 /* copy over old data into new slots */ \ 147 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 148 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 149 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 150 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 151 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 152 len*sizeof(int));CHKERRQ(ierr); \ 153 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 154 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 155 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 156 /* free up old matrix storage */ \ 157 \ 158 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 159 if (!b->singlemalloc) { \ 160 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 161 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 162 } \ 163 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 164 b->singlemalloc = PETSC_TRUE; \ 165 \ 166 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 167 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 168 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 169 b->maxnz += CHUNKSIZE; \ 170 b->reallocs++; \ 171 } \ 172 N = nrow++ - 1; b->nz++; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ap[ii+1] = ap[ii]; \ 177 } \ 178 rp[_i] = col1; \ 179 ap[_i] = value; \ 180 b_noinsert: ; \ 181 bilen[row] = nrow; \ 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatSetValues_MPIAIJ" 186 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 187 { 188 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 189 PetscScalar value; 190 PetscErrorCode ierr; 191 int i,j,rstart = aij->rstart,rend = aij->rend; 192 int cstart = aij->cstart,cend = aij->cend,row,col; 193 PetscTruth roworiented = aij->roworiented; 194 195 /* Some Variables required in the macro */ 196 Mat A = aij->A; 197 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 198 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 199 PetscScalar *aa = a->a; 200 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 201 Mat B = aij->B; 202 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 203 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 204 PetscScalar *ba = b->a; 205 206 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 207 int nonew = a->nonew,shift=0; 208 PetscScalar *ap; 209 210 PetscFunctionBegin; 211 for (i=0; i<m; i++) { 212 if (im[i] < 0) continue; 213 #if defined(PETSC_USE_BOPT_g) 214 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 215 #endif 216 if (im[i] >= rstart && im[i] < rend) { 217 row = im[i] - rstart; 218 for (j=0; j<n; j++) { 219 if (in[j] >= cstart && in[j] < cend){ 220 col = in[j] - cstart; 221 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 222 if (ignorezeroentries && value == 0.0) continue; 223 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 224 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 225 } else if (in[j] < 0) continue; 226 #if defined(PETSC_USE_BOPT_g) 227 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 228 #endif 229 else { 230 if (mat->was_assembled) { 231 if (!aij->colmap) { 232 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 233 } 234 #if defined (PETSC_USE_CTABLE) 235 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 236 col--; 237 #else 238 col = aij->colmap[in[j]] - 1; 239 #endif 240 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 241 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 244 B = aij->B; 245 b = (Mat_SeqAIJ*)B->data; 246 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 247 ba = b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 if (ignorezeroentries && value == 0.0) continue; 252 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 253 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 254 } 255 } 256 } else { 257 if (!aij->donotstash) { 258 if (roworiented) { 259 if (ignorezeroentries && v[i*n] == 0.0) continue; 260 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 261 } else { 262 if (ignorezeroentries && v[i] == 0.0) continue; 263 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatGetValues_MPIAIJ" 273 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 274 { 275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 276 PetscErrorCode ierr; 277 int i,j,rstart = aij->rstart,rend = aij->rend; 278 int cstart = aij->cstart,cend = aij->cend,row,col; 279 280 PetscFunctionBegin; 281 for (i=0; i<m; i++) { 282 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 283 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 284 if (idxm[i] >= rstart && idxm[i] < rend) { 285 row = idxm[i] - rstart; 286 for (j=0; j<n; j++) { 287 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 288 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 289 if (idxn[j] >= cstart && idxn[j] < cend){ 290 col = idxn[j] - cstart; 291 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 292 } else { 293 if (!aij->colmap) { 294 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 295 } 296 #if defined (PETSC_USE_CTABLE) 297 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 298 col --; 299 #else 300 col = aij->colmap[idxn[j]] - 1; 301 #endif 302 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 303 else { 304 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 305 } 306 } 307 } 308 } else { 309 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 310 } 311 } 312 PetscFunctionReturn(0); 313 } 314 315 #undef __FUNCT__ 316 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 317 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 318 { 319 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 320 PetscErrorCode ierr; 321 int nstash,reallocs; 322 InsertMode addv; 323 324 PetscFunctionBegin; 325 if (aij->donotstash) { 326 PetscFunctionReturn(0); 327 } 328 329 /* make sure all processors are either in INSERTMODE or ADDMODE */ 330 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 331 if (addv == (ADD_VALUES|INSERT_VALUES)) { 332 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 333 } 334 mat->insertmode = addv; /* in case this processor had no cache */ 335 336 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 337 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 338 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 339 PetscFunctionReturn(0); 340 } 341 342 343 #undef __FUNCT__ 344 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 345 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 346 { 347 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 348 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 349 PetscErrorCode ierr; 350 int i,j,rstart,ncols,n,flg; 351 int *row,*col,other_disassembled; 352 PetscScalar *val; 353 InsertMode addv = mat->insertmode; 354 355 PetscFunctionBegin; 356 if (!aij->donotstash) { 357 while (1) { 358 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 359 if (!flg) break; 360 361 for (i=0; i<n;) { 362 /* Now identify the consecutive vals belonging to the same row */ 363 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 364 if (j < n) ncols = j-i; 365 else ncols = n-i; 366 /* Now assemble all these values with a single function call */ 367 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 368 i = j; 369 } 370 } 371 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 372 } 373 374 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 375 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 376 377 /* determine if any processor has disassembled, if so we must 378 also disassemble ourselfs, in order that we may reassemble. */ 379 /* 380 if nonzero structure of submatrix B cannot change then we know that 381 no processor disassembled thus we can skip this stuff 382 */ 383 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 384 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 385 if (mat->was_assembled && !other_disassembled) { 386 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 387 } 388 } 389 390 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 391 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 392 } 393 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 394 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 395 396 if (aij->rowvalues) { 397 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 398 aij->rowvalues = 0; 399 } 400 401 /* used by MatAXPY() */ 402 a->xtoy = 0; b->xtoy = 0; 403 a->XtoY = 0; b->XtoY = 0; 404 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 410 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 411 { 412 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 413 PetscErrorCode ierr; 414 415 PetscFunctionBegin; 416 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 417 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 418 PetscFunctionReturn(0); 419 } 420 421 #undef __FUNCT__ 422 #define __FUNCT__ "MatZeroRows_MPIAIJ" 423 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 424 { 425 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 426 PetscErrorCode ierr; 427 int i,N,*rows,*owners = l->rowners,size = l->size; 428 int *nprocs,j,idx,nsends,row; 429 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 430 int *rvalues,tag = A->tag,count,base,slen,n,*source; 431 int *lens,imdex,*lrows,*values,rstart=l->rstart; 432 MPI_Comm comm = A->comm; 433 MPI_Request *send_waits,*recv_waits; 434 MPI_Status recv_status,*send_status; 435 IS istmp; 436 PetscTruth found; 437 438 PetscFunctionBegin; 439 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 440 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 441 442 /* first count number of contributors to each processor */ 443 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 444 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 445 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 446 for (i=0; i<N; i++) { 447 idx = rows[i]; 448 found = PETSC_FALSE; 449 for (j=0; j<size; j++) { 450 if (idx >= owners[j] && idx < owners[j+1]) { 451 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 452 } 453 } 454 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 455 } 456 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 457 458 /* inform other processors of number of messages and max length*/ 459 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 460 461 /* post receives: */ 462 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 463 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 464 for (i=0; i<nrecvs; i++) { 465 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 466 } 467 468 /* do sends: 469 1) starts[i] gives the starting index in svalues for stuff going to 470 the ith processor 471 */ 472 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 473 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 474 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 475 starts[0] = 0; 476 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 477 for (i=0; i<N; i++) { 478 svalues[starts[owner[i]]++] = rows[i]; 479 } 480 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 481 482 starts[0] = 0; 483 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 484 count = 0; 485 for (i=0; i<size; i++) { 486 if (nprocs[2*i+1]) { 487 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 488 } 489 } 490 ierr = PetscFree(starts);CHKERRQ(ierr); 491 492 base = owners[rank]; 493 494 /* wait on receives */ 495 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 496 source = lens + nrecvs; 497 count = nrecvs; slen = 0; 498 while (count) { 499 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 500 /* unpack receives into our local space */ 501 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 502 source[imdex] = recv_status.MPI_SOURCE; 503 lens[imdex] = n; 504 slen += n; 505 count--; 506 } 507 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 508 509 /* move the data into the send scatter */ 510 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 511 count = 0; 512 for (i=0; i<nrecvs; i++) { 513 values = rvalues + i*nmax; 514 for (j=0; j<lens[i]; j++) { 515 lrows[count++] = values[j] - base; 516 } 517 } 518 ierr = PetscFree(rvalues);CHKERRQ(ierr); 519 ierr = PetscFree(lens);CHKERRQ(ierr); 520 ierr = PetscFree(owner);CHKERRQ(ierr); 521 ierr = PetscFree(nprocs);CHKERRQ(ierr); 522 523 /* actually zap the local rows */ 524 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 525 PetscLogObjectParent(A,istmp); 526 527 /* 528 Zero the required rows. If the "diagonal block" of the matrix 529 is square and the user wishes to set the diagonal we use seperate 530 code so that MatSetValues() is not called for each diagonal allocating 531 new memory, thus calling lots of mallocs and slowing things down. 532 533 Contributed by: Mathew Knepley 534 */ 535 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 536 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 537 if (diag && (l->A->M == l->A->N)) { 538 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 539 } else if (diag) { 540 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 541 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 542 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 543 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 544 } 545 for (i = 0; i < slen; i++) { 546 row = lrows[i] + rstart; 547 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 548 } 549 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 550 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 551 } else { 552 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 553 } 554 ierr = ISDestroy(istmp);CHKERRQ(ierr); 555 ierr = PetscFree(lrows);CHKERRQ(ierr); 556 557 /* wait on sends */ 558 if (nsends) { 559 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 560 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 561 ierr = PetscFree(send_status);CHKERRQ(ierr); 562 } 563 ierr = PetscFree(send_waits);CHKERRQ(ierr); 564 ierr = PetscFree(svalues);CHKERRQ(ierr); 565 566 PetscFunctionReturn(0); 567 } 568 569 #undef __FUNCT__ 570 #define __FUNCT__ "MatMult_MPIAIJ" 571 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 572 { 573 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 574 PetscErrorCode ierr; 575 int nt; 576 577 PetscFunctionBegin; 578 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 579 if (nt != A->n) { 580 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 581 } 582 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 583 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 584 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 585 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatMultAdd_MPIAIJ" 591 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 592 { 593 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 594 PetscErrorCode ierr; 595 596 PetscFunctionBegin; 597 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 598 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 599 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 600 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 601 PetscFunctionReturn(0); 602 } 603 604 #undef __FUNCT__ 605 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 606 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 607 { 608 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 609 PetscErrorCode ierr; 610 611 PetscFunctionBegin; 612 /* do nondiagonal part */ 613 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 614 /* send it on its way */ 615 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 616 /* do local part */ 617 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 618 /* receive remote parts: note this assumes the values are not actually */ 619 /* inserted in yy until the next line, which is true for my implementation*/ 620 /* but is not perhaps always true. */ 621 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 622 PetscFunctionReturn(0); 623 } 624 625 EXTERN_C_BEGIN 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 628 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 629 { 630 MPI_Comm comm; 631 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 632 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 633 IS Me,Notme; 634 PetscErrorCode ierr; 635 int M,N,first,last,*notme,ntids,i; 636 637 PetscFunctionBegin; 638 639 /* Easy test: symmetric diagonal block */ 640 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 641 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 642 if (!*f) PetscFunctionReturn(0); 643 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 644 ierr = MPI_Comm_size(comm,&ntids);CHKERRQ(ierr); 645 if (ntids==1) PetscFunctionReturn(0); 646 647 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 648 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 649 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 650 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me);CHKERRQ(ierr); 651 for (i=0; i<first; i++) notme[i] = i; 652 for (i=last; i<M; i++) notme[i-last+first] = i; 653 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 654 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 655 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 656 Aoff = Aoffs[0]; 657 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 658 Boff = Boffs[0]; 659 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 660 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 661 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 662 ierr = ISDestroy(Me);CHKERRQ(ierr); 663 ierr = ISDestroy(Notme);CHKERRQ(ierr); 664 665 PetscFunctionReturn(0); 666 } 667 EXTERN_C_END 668 669 #undef __FUNCT__ 670 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 671 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 672 { 673 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 674 PetscErrorCode ierr; 675 676 PetscFunctionBegin; 677 /* do nondiagonal part */ 678 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 679 /* send it on its way */ 680 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 681 /* do local part */ 682 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 683 /* receive remote parts: note this assumes the values are not actually */ 684 /* inserted in yy until the next line, which is true for my implementation*/ 685 /* but is not perhaps always true. */ 686 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 687 PetscFunctionReturn(0); 688 } 689 690 /* 691 This only works correctly for square matrices where the subblock A->A is the 692 diagonal block 693 */ 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 696 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 697 { 698 PetscErrorCode ierr; 699 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 700 701 PetscFunctionBegin; 702 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 703 if (a->rstart != a->cstart || a->rend != a->cend) { 704 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 705 } 706 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 707 PetscFunctionReturn(0); 708 } 709 710 #undef __FUNCT__ 711 #define __FUNCT__ "MatScale_MPIAIJ" 712 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 713 { 714 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 715 PetscErrorCode ierr; 716 717 PetscFunctionBegin; 718 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 719 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 720 PetscFunctionReturn(0); 721 } 722 723 #undef __FUNCT__ 724 #define __FUNCT__ "MatDestroy_MPIAIJ" 725 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 726 { 727 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 728 PetscErrorCode ierr; 729 730 PetscFunctionBegin; 731 #if defined(PETSC_USE_LOG) 732 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 733 #endif 734 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 735 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 736 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 737 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 738 #if defined (PETSC_USE_CTABLE) 739 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 740 #else 741 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 742 #endif 743 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 744 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 745 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 746 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 747 ierr = PetscFree(aij);CHKERRQ(ierr); 748 PetscFunctionReturn(0); 749 } 750 751 #undef __FUNCT__ 752 #define __FUNCT__ "MatView_MPIAIJ_Binary" 753 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 754 { 755 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 756 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 757 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 758 PetscErrorCode ierr; 759 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 760 int nz,fd,header[4],*row_lengths,*range,rlen,i; 761 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 762 PetscScalar *column_values; 763 764 PetscFunctionBegin; 765 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 766 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 767 nz = A->nz + B->nz; 768 if (!rank) { 769 header[0] = MAT_FILE_COOKIE; 770 header[1] = mat->M; 771 header[2] = mat->N; 772 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 773 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 774 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 775 /* get largest number of rows any processor has */ 776 rlen = mat->m; 777 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 778 for (i=1; i<size; i++) { 779 rlen = PetscMax(rlen,range[i+1] - range[i]); 780 } 781 } else { 782 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 783 rlen = mat->m; 784 } 785 786 /* load up the local row counts */ 787 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 788 for (i=0; i<mat->m; i++) { 789 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 790 } 791 792 /* store the row lengths to the file */ 793 if (!rank) { 794 MPI_Status status; 795 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 796 for (i=1; i<size; i++) { 797 rlen = range[i+1] - range[i]; 798 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 799 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 800 } 801 } else { 802 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 803 } 804 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 805 806 /* load up the local column indices */ 807 nzmax = nz; /* )th processor needs space a largest processor needs */ 808 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 809 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 810 cnt = 0; 811 for (i=0; i<mat->m; i++) { 812 for (j=B->i[i]; j<B->i[i+1]; j++) { 813 if ( (col = garray[B->j[j]]) > cstart) break; 814 column_indices[cnt++] = col; 815 } 816 for (k=A->i[i]; k<A->i[i+1]; k++) { 817 column_indices[cnt++] = A->j[k] + cstart; 818 } 819 for (; j<B->i[i+1]; j++) { 820 column_indices[cnt++] = garray[B->j[j]]; 821 } 822 } 823 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 824 825 /* store the column indices to the file */ 826 if (!rank) { 827 MPI_Status status; 828 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 829 for (i=1; i<size; i++) { 830 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 831 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 832 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 833 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 834 } 835 } else { 836 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 837 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 838 } 839 ierr = PetscFree(column_indices);CHKERRQ(ierr); 840 841 /* load up the local column values */ 842 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 843 cnt = 0; 844 for (i=0; i<mat->m; i++) { 845 for (j=B->i[i]; j<B->i[i+1]; j++) { 846 if ( garray[B->j[j]] > cstart) break; 847 column_values[cnt++] = B->a[j]; 848 } 849 for (k=A->i[i]; k<A->i[i+1]; k++) { 850 column_values[cnt++] = A->a[k]; 851 } 852 for (; j<B->i[i+1]; j++) { 853 column_values[cnt++] = B->a[j]; 854 } 855 } 856 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 857 858 /* store the column values to the file */ 859 if (!rank) { 860 MPI_Status status; 861 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 862 for (i=1; i<size; i++) { 863 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 864 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 865 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 866 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 867 } 868 } else { 869 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 870 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 871 } 872 ierr = PetscFree(column_values);CHKERRQ(ierr); 873 PetscFunctionReturn(0); 874 } 875 876 #undef __FUNCT__ 877 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 878 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 879 { 880 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 881 PetscErrorCode ierr; 882 PetscMPIInt rank = aij->rank,size = aij->size; 883 PetscTruth isdraw,iascii,flg,isbinary; 884 PetscViewer sviewer; 885 PetscViewerFormat format; 886 887 PetscFunctionBegin; 888 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 889 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 890 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 891 if (iascii) { 892 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 893 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 894 MatInfo info; 895 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 896 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 897 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 898 if (flg) { 899 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 900 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 901 } else { 902 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 903 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 904 } 905 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 906 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 907 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 908 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 909 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 910 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 911 PetscFunctionReturn(0); 912 } else if (format == PETSC_VIEWER_ASCII_INFO) { 913 PetscFunctionReturn(0); 914 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 915 PetscFunctionReturn(0); 916 } 917 } else if (isbinary) { 918 if (size == 1) { 919 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 920 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 921 } else { 922 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 923 } 924 PetscFunctionReturn(0); 925 } else if (isdraw) { 926 PetscDraw draw; 927 PetscTruth isnull; 928 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 929 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 930 } 931 932 if (size == 1) { 933 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 934 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 935 } else { 936 /* assemble the entire matrix onto first processor. */ 937 Mat A; 938 Mat_SeqAIJ *Aloc; 939 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 940 PetscScalar *a; 941 942 if (!rank) { 943 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 944 } else { 945 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 946 } 947 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 948 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 949 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 950 PetscLogObjectParent(mat,A); 951 952 /* copy over the A part */ 953 Aloc = (Mat_SeqAIJ*)aij->A->data; 954 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 955 row = aij->rstart; 956 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 957 for (i=0; i<m; i++) { 958 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 959 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 960 } 961 aj = Aloc->j; 962 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 963 964 /* copy over the B part */ 965 Aloc = (Mat_SeqAIJ*)aij->B->data; 966 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 967 row = aij->rstart; 968 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 969 ct = cols; 970 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 971 for (i=0; i<m; i++) { 972 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 973 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 974 } 975 ierr = PetscFree(ct);CHKERRQ(ierr); 976 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 978 /* 979 Everyone has to call to draw the matrix since the graphics waits are 980 synchronized across all processors that share the PetscDraw object 981 */ 982 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 983 if (!rank) { 984 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 985 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 986 } 987 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 988 ierr = MatDestroy(A);CHKERRQ(ierr); 989 } 990 PetscFunctionReturn(0); 991 } 992 993 #undef __FUNCT__ 994 #define __FUNCT__ "MatView_MPIAIJ" 995 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 996 { 997 PetscErrorCode ierr; 998 PetscTruth iascii,isdraw,issocket,isbinary; 999 1000 PetscFunctionBegin; 1001 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1002 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1003 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1004 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1005 if (iascii || isdraw || isbinary || issocket) { 1006 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1007 } else { 1008 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1009 } 1010 PetscFunctionReturn(0); 1011 } 1012 1013 1014 1015 #undef __FUNCT__ 1016 #define __FUNCT__ "MatRelax_MPIAIJ" 1017 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1018 { 1019 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1020 PetscErrorCode ierr; 1021 Vec bb1; 1022 PetscScalar mone=-1.0; 1023 1024 PetscFunctionBegin; 1025 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1026 1027 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1028 1029 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1030 if (flag & SOR_ZERO_INITIAL_GUESS) { 1031 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1032 its--; 1033 } 1034 1035 while (its--) { 1036 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1037 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1038 1039 /* update rhs: bb1 = bb - B*x */ 1040 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1041 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1042 1043 /* local sweep */ 1044 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1045 CHKERRQ(ierr); 1046 } 1047 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1048 if (flag & SOR_ZERO_INITIAL_GUESS) { 1049 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1050 its--; 1051 } 1052 while (its--) { 1053 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1054 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1055 1056 /* update rhs: bb1 = bb - B*x */ 1057 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1058 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1059 1060 /* local sweep */ 1061 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1062 CHKERRQ(ierr); 1063 } 1064 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1065 if (flag & SOR_ZERO_INITIAL_GUESS) { 1066 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1067 its--; 1068 } 1069 while (its--) { 1070 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1071 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1072 1073 /* update rhs: bb1 = bb - B*x */ 1074 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1075 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1076 1077 /* local sweep */ 1078 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1079 CHKERRQ(ierr); 1080 } 1081 } else { 1082 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1083 } 1084 1085 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1091 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1092 { 1093 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1094 Mat A = mat->A,B = mat->B; 1095 PetscErrorCode ierr; 1096 PetscReal isend[5],irecv[5]; 1097 1098 PetscFunctionBegin; 1099 info->block_size = 1.0; 1100 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1101 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1102 isend[3] = info->memory; isend[4] = info->mallocs; 1103 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1104 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1105 isend[3] += info->memory; isend[4] += info->mallocs; 1106 if (flag == MAT_LOCAL) { 1107 info->nz_used = isend[0]; 1108 info->nz_allocated = isend[1]; 1109 info->nz_unneeded = isend[2]; 1110 info->memory = isend[3]; 1111 info->mallocs = isend[4]; 1112 } else if (flag == MAT_GLOBAL_MAX) { 1113 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1114 info->nz_used = irecv[0]; 1115 info->nz_allocated = irecv[1]; 1116 info->nz_unneeded = irecv[2]; 1117 info->memory = irecv[3]; 1118 info->mallocs = irecv[4]; 1119 } else if (flag == MAT_GLOBAL_SUM) { 1120 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1121 info->nz_used = irecv[0]; 1122 info->nz_allocated = irecv[1]; 1123 info->nz_unneeded = irecv[2]; 1124 info->memory = irecv[3]; 1125 info->mallocs = irecv[4]; 1126 } 1127 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1128 info->fill_ratio_needed = 0; 1129 info->factor_mallocs = 0; 1130 info->rows_global = (double)matin->M; 1131 info->columns_global = (double)matin->N; 1132 info->rows_local = (double)matin->m; 1133 info->columns_local = (double)matin->N; 1134 1135 PetscFunctionReturn(0); 1136 } 1137 1138 #undef __FUNCT__ 1139 #define __FUNCT__ "MatSetOption_MPIAIJ" 1140 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1141 { 1142 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1143 PetscErrorCode ierr; 1144 1145 PetscFunctionBegin; 1146 switch (op) { 1147 case MAT_NO_NEW_NONZERO_LOCATIONS: 1148 case MAT_YES_NEW_NONZERO_LOCATIONS: 1149 case MAT_COLUMNS_UNSORTED: 1150 case MAT_COLUMNS_SORTED: 1151 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1152 case MAT_KEEP_ZEROED_ROWS: 1153 case MAT_NEW_NONZERO_LOCATION_ERR: 1154 case MAT_USE_INODES: 1155 case MAT_DO_NOT_USE_INODES: 1156 case MAT_IGNORE_ZERO_ENTRIES: 1157 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1158 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1159 break; 1160 case MAT_ROW_ORIENTED: 1161 a->roworiented = PETSC_TRUE; 1162 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1163 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1164 break; 1165 case MAT_ROWS_SORTED: 1166 case MAT_ROWS_UNSORTED: 1167 case MAT_YES_NEW_DIAGONALS: 1168 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1169 break; 1170 case MAT_COLUMN_ORIENTED: 1171 a->roworiented = PETSC_FALSE; 1172 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1173 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1174 break; 1175 case MAT_IGNORE_OFF_PROC_ENTRIES: 1176 a->donotstash = PETSC_TRUE; 1177 break; 1178 case MAT_NO_NEW_DIAGONALS: 1179 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1180 case MAT_SYMMETRIC: 1181 case MAT_STRUCTURALLY_SYMMETRIC: 1182 case MAT_HERMITIAN: 1183 case MAT_SYMMETRY_ETERNAL: 1184 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1185 break; 1186 case MAT_NOT_SYMMETRIC: 1187 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1188 case MAT_NOT_HERMITIAN: 1189 case MAT_NOT_SYMMETRY_ETERNAL: 1190 break; 1191 default: 1192 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1193 } 1194 PetscFunctionReturn(0); 1195 } 1196 1197 #undef __FUNCT__ 1198 #define __FUNCT__ "MatGetRow_MPIAIJ" 1199 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1200 { 1201 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1202 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1203 PetscErrorCode ierr; 1204 int i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1205 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1206 int *cmap,*idx_p; 1207 1208 PetscFunctionBegin; 1209 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1210 mat->getrowactive = PETSC_TRUE; 1211 1212 if (!mat->rowvalues && (idx || v)) { 1213 /* 1214 allocate enough space to hold information from the longest row. 1215 */ 1216 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1217 int max = 1,tmp; 1218 for (i=0; i<matin->m; i++) { 1219 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1220 if (max < tmp) { max = tmp; } 1221 } 1222 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1223 mat->rowindices = (int*)(mat->rowvalues + max); 1224 } 1225 1226 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1227 lrow = row - rstart; 1228 1229 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1230 if (!v) {pvA = 0; pvB = 0;} 1231 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1232 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1233 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1234 nztot = nzA + nzB; 1235 1236 cmap = mat->garray; 1237 if (v || idx) { 1238 if (nztot) { 1239 /* Sort by increasing column numbers, assuming A and B already sorted */ 1240 int imark = -1; 1241 if (v) { 1242 *v = v_p = mat->rowvalues; 1243 for (i=0; i<nzB; i++) { 1244 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1245 else break; 1246 } 1247 imark = i; 1248 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1249 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1250 } 1251 if (idx) { 1252 *idx = idx_p = mat->rowindices; 1253 if (imark > -1) { 1254 for (i=0; i<imark; i++) { 1255 idx_p[i] = cmap[cworkB[i]]; 1256 } 1257 } else { 1258 for (i=0; i<nzB; i++) { 1259 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1260 else break; 1261 } 1262 imark = i; 1263 } 1264 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1265 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1266 } 1267 } else { 1268 if (idx) *idx = 0; 1269 if (v) *v = 0; 1270 } 1271 } 1272 *nz = nztot; 1273 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1274 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1275 PetscFunctionReturn(0); 1276 } 1277 1278 #undef __FUNCT__ 1279 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1280 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1281 { 1282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1283 1284 PetscFunctionBegin; 1285 if (aij->getrowactive == PETSC_FALSE) { 1286 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1287 } 1288 aij->getrowactive = PETSC_FALSE; 1289 PetscFunctionReturn(0); 1290 } 1291 1292 #undef __FUNCT__ 1293 #define __FUNCT__ "MatNorm_MPIAIJ" 1294 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1295 { 1296 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1297 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1298 PetscErrorCode ierr; 1299 int i,j,cstart = aij->cstart; 1300 PetscReal sum = 0.0; 1301 PetscScalar *v; 1302 1303 PetscFunctionBegin; 1304 if (aij->size == 1) { 1305 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1306 } else { 1307 if (type == NORM_FROBENIUS) { 1308 v = amat->a; 1309 for (i=0; i<amat->nz; i++) { 1310 #if defined(PETSC_USE_COMPLEX) 1311 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1312 #else 1313 sum += (*v)*(*v); v++; 1314 #endif 1315 } 1316 v = bmat->a; 1317 for (i=0; i<bmat->nz; i++) { 1318 #if defined(PETSC_USE_COMPLEX) 1319 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1320 #else 1321 sum += (*v)*(*v); v++; 1322 #endif 1323 } 1324 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1325 *norm = sqrt(*norm); 1326 } else if (type == NORM_1) { /* max column norm */ 1327 PetscReal *tmp,*tmp2; 1328 int *jj,*garray = aij->garray; 1329 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1330 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1331 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1332 *norm = 0.0; 1333 v = amat->a; jj = amat->j; 1334 for (j=0; j<amat->nz; j++) { 1335 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1336 } 1337 v = bmat->a; jj = bmat->j; 1338 for (j=0; j<bmat->nz; j++) { 1339 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1340 } 1341 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1342 for (j=0; j<mat->N; j++) { 1343 if (tmp2[j] > *norm) *norm = tmp2[j]; 1344 } 1345 ierr = PetscFree(tmp);CHKERRQ(ierr); 1346 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1347 } else if (type == NORM_INFINITY) { /* max row norm */ 1348 PetscReal ntemp = 0.0; 1349 for (j=0; j<aij->A->m; j++) { 1350 v = amat->a + amat->i[j]; 1351 sum = 0.0; 1352 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1353 sum += PetscAbsScalar(*v); v++; 1354 } 1355 v = bmat->a + bmat->i[j]; 1356 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1357 sum += PetscAbsScalar(*v); v++; 1358 } 1359 if (sum > ntemp) ntemp = sum; 1360 } 1361 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1362 } else { 1363 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1364 } 1365 } 1366 PetscFunctionReturn(0); 1367 } 1368 1369 #undef __FUNCT__ 1370 #define __FUNCT__ "MatTranspose_MPIAIJ" 1371 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1372 { 1373 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1374 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1375 PetscErrorCode ierr; 1376 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1377 Mat B; 1378 PetscScalar *array; 1379 1380 PetscFunctionBegin; 1381 if (!matout && M != N) { 1382 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1383 } 1384 1385 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1386 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1387 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1388 1389 /* copy over the A part */ 1390 Aloc = (Mat_SeqAIJ*)a->A->data; 1391 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1392 row = a->rstart; 1393 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1394 for (i=0; i<m; i++) { 1395 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1396 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1397 } 1398 aj = Aloc->j; 1399 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1400 1401 /* copy over the B part */ 1402 Aloc = (Mat_SeqAIJ*)a->B->data; 1403 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1404 row = a->rstart; 1405 ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr); 1406 ct = cols; 1407 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1408 for (i=0; i<m; i++) { 1409 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1410 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1411 } 1412 ierr = PetscFree(ct);CHKERRQ(ierr); 1413 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1414 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1415 if (matout) { 1416 *matout = B; 1417 } else { 1418 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1419 } 1420 PetscFunctionReturn(0); 1421 } 1422 1423 #undef __FUNCT__ 1424 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1425 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1426 { 1427 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1428 Mat a = aij->A,b = aij->B; 1429 PetscErrorCode ierr; 1430 int s1,s2,s3; 1431 1432 PetscFunctionBegin; 1433 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1434 if (rr) { 1435 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1436 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1437 /* Overlap communication with computation. */ 1438 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1439 } 1440 if (ll) { 1441 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1442 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1443 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1444 } 1445 /* scale the diagonal block */ 1446 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1447 1448 if (rr) { 1449 /* Do a scatter end and then right scale the off-diagonal block */ 1450 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1451 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1452 } 1453 1454 PetscFunctionReturn(0); 1455 } 1456 1457 1458 #undef __FUNCT__ 1459 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1460 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1461 { 1462 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1463 PetscErrorCode ierr; 1464 1465 PetscFunctionBegin; 1466 if (!a->rank) { 1467 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1468 } 1469 PetscFunctionReturn(0); 1470 } 1471 1472 #undef __FUNCT__ 1473 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1474 PetscErrorCode MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1475 { 1476 PetscFunctionBegin; 1477 *bs = 1; 1478 PetscFunctionReturn(0); 1479 } 1480 #undef __FUNCT__ 1481 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1482 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1483 { 1484 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1485 PetscErrorCode ierr; 1486 1487 PetscFunctionBegin; 1488 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1489 PetscFunctionReturn(0); 1490 } 1491 1492 #undef __FUNCT__ 1493 #define __FUNCT__ "MatEqual_MPIAIJ" 1494 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1495 { 1496 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1497 Mat a,b,c,d; 1498 PetscTruth flg; 1499 PetscErrorCode ierr; 1500 1501 PetscFunctionBegin; 1502 a = matA->A; b = matA->B; 1503 c = matB->A; d = matB->B; 1504 1505 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1506 if (flg == PETSC_TRUE) { 1507 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1508 } 1509 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1510 PetscFunctionReturn(0); 1511 } 1512 1513 #undef __FUNCT__ 1514 #define __FUNCT__ "MatCopy_MPIAIJ" 1515 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1516 { 1517 PetscErrorCode ierr; 1518 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1519 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1520 1521 PetscFunctionBegin; 1522 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1523 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1524 /* because of the column compression in the off-processor part of the matrix a->B, 1525 the number of columns in a->B and b->B may be different, hence we cannot call 1526 the MatCopy() directly on the two parts. If need be, we can provide a more 1527 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1528 then copying the submatrices */ 1529 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1530 } else { 1531 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1532 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1533 } 1534 PetscFunctionReturn(0); 1535 } 1536 1537 #undef __FUNCT__ 1538 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1539 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1540 { 1541 PetscErrorCode ierr; 1542 1543 PetscFunctionBegin; 1544 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1545 PetscFunctionReturn(0); 1546 } 1547 1548 #include "petscblaslapack.h" 1549 #undef __FUNCT__ 1550 #define __FUNCT__ "MatAXPY_MPIAIJ" 1551 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1552 { 1553 PetscErrorCode ierr; 1554 int i; 1555 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1556 PetscBLASInt bnz,one=1; 1557 Mat_SeqAIJ *x,*y; 1558 1559 PetscFunctionBegin; 1560 if (str == SAME_NONZERO_PATTERN) { 1561 x = (Mat_SeqAIJ *)xx->A->data; 1562 y = (Mat_SeqAIJ *)yy->A->data; 1563 bnz = (PetscBLASInt)x->nz; 1564 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1565 x = (Mat_SeqAIJ *)xx->B->data; 1566 y = (Mat_SeqAIJ *)yy->B->data; 1567 bnz = (PetscBLASInt)x->nz; 1568 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1569 } else if (str == SUBSET_NONZERO_PATTERN) { 1570 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1571 1572 x = (Mat_SeqAIJ *)xx->B->data; 1573 y = (Mat_SeqAIJ *)yy->B->data; 1574 if (y->xtoy && y->XtoY != xx->B) { 1575 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1576 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1577 } 1578 if (!y->xtoy) { /* get xtoy */ 1579 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1580 y->XtoY = xx->B; 1581 } 1582 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1583 } else { 1584 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1585 } 1586 PetscFunctionReturn(0); 1587 } 1588 1589 /* -------------------------------------------------------------------*/ 1590 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1591 MatGetRow_MPIAIJ, 1592 MatRestoreRow_MPIAIJ, 1593 MatMult_MPIAIJ, 1594 /* 4*/ MatMultAdd_MPIAIJ, 1595 MatMultTranspose_MPIAIJ, 1596 MatMultTransposeAdd_MPIAIJ, 1597 0, 1598 0, 1599 0, 1600 /*10*/ 0, 1601 0, 1602 0, 1603 MatRelax_MPIAIJ, 1604 MatTranspose_MPIAIJ, 1605 /*15*/ MatGetInfo_MPIAIJ, 1606 MatEqual_MPIAIJ, 1607 MatGetDiagonal_MPIAIJ, 1608 MatDiagonalScale_MPIAIJ, 1609 MatNorm_MPIAIJ, 1610 /*20*/ MatAssemblyBegin_MPIAIJ, 1611 MatAssemblyEnd_MPIAIJ, 1612 0, 1613 MatSetOption_MPIAIJ, 1614 MatZeroEntries_MPIAIJ, 1615 /*25*/ MatZeroRows_MPIAIJ, 1616 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1617 MatLUFactorSymbolic_MPIAIJ_TFS, 1618 #else 1619 0, 1620 #endif 1621 0, 1622 0, 1623 0, 1624 /*30*/ MatSetUpPreallocation_MPIAIJ, 1625 0, 1626 0, 1627 0, 1628 0, 1629 /*35*/ MatDuplicate_MPIAIJ, 1630 0, 1631 0, 1632 0, 1633 0, 1634 /*40*/ MatAXPY_MPIAIJ, 1635 MatGetSubMatrices_MPIAIJ, 1636 MatIncreaseOverlap_MPIAIJ, 1637 MatGetValues_MPIAIJ, 1638 MatCopy_MPIAIJ, 1639 /*45*/ MatPrintHelp_MPIAIJ, 1640 MatScale_MPIAIJ, 1641 0, 1642 0, 1643 0, 1644 /*50*/ MatGetBlockSize_MPIAIJ, 1645 0, 1646 0, 1647 0, 1648 0, 1649 /*55*/ MatFDColoringCreate_MPIAIJ, 1650 0, 1651 MatSetUnfactored_MPIAIJ, 1652 0, 1653 0, 1654 /*60*/ MatGetSubMatrix_MPIAIJ, 1655 MatDestroy_MPIAIJ, 1656 MatView_MPIAIJ, 1657 MatGetPetscMaps_Petsc, 1658 0, 1659 /*65*/ 0, 1660 0, 1661 0, 1662 0, 1663 0, 1664 /*70*/ 0, 1665 0, 1666 MatSetColoring_MPIAIJ, 1667 MatSetValuesAdic_MPIAIJ, 1668 MatSetValuesAdifor_MPIAIJ, 1669 /*75*/ 0, 1670 0, 1671 0, 1672 0, 1673 0, 1674 /*80*/ 0, 1675 0, 1676 0, 1677 0, 1678 /*84*/ MatLoad_MPIAIJ, 1679 0, 1680 0, 1681 0, 1682 0, 1683 /*89*/ 0, 1684 MatMatMult_MPIAIJ_MPIAIJ, 1685 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1686 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1687 MatPtAP_MPIAIJ_MPIAIJ, 1688 /*94*/ MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1689 MatPtAPNumeric_MPIAIJ_MPIAIJ, 1690 }; 1691 1692 /* ----------------------------------------------------------------------------------------*/ 1693 1694 EXTERN_C_BEGIN 1695 #undef __FUNCT__ 1696 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1697 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 1698 { 1699 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1700 PetscErrorCode ierr; 1701 1702 PetscFunctionBegin; 1703 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1704 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1705 PetscFunctionReturn(0); 1706 } 1707 EXTERN_C_END 1708 1709 EXTERN_C_BEGIN 1710 #undef __FUNCT__ 1711 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1712 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 1713 { 1714 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1715 PetscErrorCode ierr; 1716 1717 PetscFunctionBegin; 1718 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1719 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1720 PetscFunctionReturn(0); 1721 } 1722 EXTERN_C_END 1723 1724 #include "petscpc.h" 1725 EXTERN_C_BEGIN 1726 #undef __FUNCT__ 1727 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1728 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1729 { 1730 Mat_MPIAIJ *b; 1731 PetscErrorCode ierr; 1732 int i; 1733 1734 PetscFunctionBegin; 1735 B->preallocated = PETSC_TRUE; 1736 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1737 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1738 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1739 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1740 if (d_nnz) { 1741 for (i=0; i<B->m; i++) { 1742 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1743 } 1744 } 1745 if (o_nnz) { 1746 for (i=0; i<B->m; i++) { 1747 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1748 } 1749 } 1750 b = (Mat_MPIAIJ*)B->data; 1751 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1752 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1753 1754 PetscFunctionReturn(0); 1755 } 1756 EXTERN_C_END 1757 1758 /*MC 1759 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 1760 1761 Options Database Keys: 1762 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 1763 1764 Level: beginner 1765 1766 .seealso: MatCreateMPIAIJ 1767 M*/ 1768 1769 EXTERN_C_BEGIN 1770 #undef __FUNCT__ 1771 #define __FUNCT__ "MatCreate_MPIAIJ" 1772 PetscErrorCode MatCreate_MPIAIJ(Mat B) 1773 { 1774 Mat_MPIAIJ *b; 1775 PetscErrorCode ierr; 1776 int i,size; 1777 1778 PetscFunctionBegin; 1779 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1780 1781 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1782 B->data = (void*)b; 1783 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1784 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1785 B->factor = 0; 1786 B->assembled = PETSC_FALSE; 1787 B->mapping = 0; 1788 1789 B->insertmode = NOT_SET_VALUES; 1790 b->size = size; 1791 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1792 1793 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1794 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1795 1796 /* the information in the maps duplicates the information computed below, eventually 1797 we should remove the duplicate information that is not contained in the maps */ 1798 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1799 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1800 1801 /* build local table of row and column ownerships */ 1802 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1803 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1804 b->cowners = b->rowners + b->size + 2; 1805 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1806 b->rowners[0] = 0; 1807 for (i=2; i<=b->size; i++) { 1808 b->rowners[i] += b->rowners[i-1]; 1809 } 1810 b->rstart = b->rowners[b->rank]; 1811 b->rend = b->rowners[b->rank+1]; 1812 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1813 b->cowners[0] = 0; 1814 for (i=2; i<=b->size; i++) { 1815 b->cowners[i] += b->cowners[i-1]; 1816 } 1817 b->cstart = b->cowners[b->rank]; 1818 b->cend = b->cowners[b->rank+1]; 1819 1820 /* build cache for off array entries formed */ 1821 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1822 b->donotstash = PETSC_FALSE; 1823 b->colmap = 0; 1824 b->garray = 0; 1825 b->roworiented = PETSC_TRUE; 1826 1827 /* stuff used for matrix vector multiply */ 1828 b->lvec = PETSC_NULL; 1829 b->Mvctx = PETSC_NULL; 1830 1831 /* stuff for MatGetRow() */ 1832 b->rowindices = 0; 1833 b->rowvalues = 0; 1834 b->getrowactive = PETSC_FALSE; 1835 1836 /* Explicitly create 2 MATSEQAIJ matrices. */ 1837 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 1838 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 1839 PetscLogObjectParent(B,b->A); 1840 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 1841 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 1842 PetscLogObjectParent(B,b->B); 1843 1844 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1845 "MatStoreValues_MPIAIJ", 1846 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1847 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1848 "MatRetrieveValues_MPIAIJ", 1849 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1850 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1851 "MatGetDiagonalBlock_MPIAIJ", 1852 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1853 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 1854 "MatIsTranspose_MPIAIJ", 1855 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 1856 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 1857 "MatMPIAIJSetPreallocation_MPIAIJ", 1858 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 1859 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 1860 "MatDiagonalScaleLocal_MPIAIJ", 1861 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 1862 PetscFunctionReturn(0); 1863 } 1864 EXTERN_C_END 1865 1866 /*MC 1867 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1868 1869 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1870 and MATMPIAIJ otherwise. As a result, for single process communicators, 1871 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 1872 for communicators controlling multiple processes. It is recommended that you call both of 1873 the above preallocation routines for simplicity. 1874 1875 Options Database Keys: 1876 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 1877 1878 Level: beginner 1879 1880 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ 1881 M*/ 1882 1883 EXTERN_C_BEGIN 1884 #undef __FUNCT__ 1885 #define __FUNCT__ "MatCreate_AIJ" 1886 PetscErrorCode MatCreate_AIJ(Mat A) 1887 { 1888 PetscErrorCode ierr; 1889 int size; 1890 1891 PetscFunctionBegin; 1892 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr); 1893 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1894 if (size == 1) { 1895 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1896 } else { 1897 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1898 } 1899 PetscFunctionReturn(0); 1900 } 1901 EXTERN_C_END 1902 1903 #undef __FUNCT__ 1904 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1905 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1906 { 1907 Mat mat; 1908 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1909 PetscErrorCode ierr; 1910 1911 PetscFunctionBegin; 1912 *newmat = 0; 1913 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1914 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1915 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1916 a = (Mat_MPIAIJ*)mat->data; 1917 1918 mat->factor = matin->factor; 1919 mat->assembled = PETSC_TRUE; 1920 mat->insertmode = NOT_SET_VALUES; 1921 mat->preallocated = PETSC_TRUE; 1922 1923 a->rstart = oldmat->rstart; 1924 a->rend = oldmat->rend; 1925 a->cstart = oldmat->cstart; 1926 a->cend = oldmat->cend; 1927 a->size = oldmat->size; 1928 a->rank = oldmat->rank; 1929 a->donotstash = oldmat->donotstash; 1930 a->roworiented = oldmat->roworiented; 1931 a->rowindices = 0; 1932 a->rowvalues = 0; 1933 a->getrowactive = PETSC_FALSE; 1934 1935 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1936 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1937 if (oldmat->colmap) { 1938 #if defined (PETSC_USE_CTABLE) 1939 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1940 #else 1941 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1942 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1943 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1944 #endif 1945 } else a->colmap = 0; 1946 if (oldmat->garray) { 1947 int len; 1948 len = oldmat->B->n; 1949 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1950 PetscLogObjectMemory(mat,len*sizeof(int)); 1951 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1952 } else a->garray = 0; 1953 1954 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1955 PetscLogObjectParent(mat,a->lvec); 1956 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1957 PetscLogObjectParent(mat,a->Mvctx); 1958 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1959 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1960 PetscLogObjectParent(mat,a->A); 1961 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1962 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1963 PetscLogObjectParent(mat,a->B); 1964 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1965 *newmat = mat; 1966 PetscFunctionReturn(0); 1967 } 1968 1969 #include "petscsys.h" 1970 1971 #undef __FUNCT__ 1972 #define __FUNCT__ "MatLoad_MPIAIJ" 1973 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1974 { 1975 Mat A; 1976 PetscScalar *vals,*svals; 1977 MPI_Comm comm = ((PetscObject)viewer)->comm; 1978 MPI_Status status; 1979 PetscErrorCode ierr; 1980 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1981 int i,nz,j,rstart,rend,fd; 1982 int header[4],*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1983 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1984 int cend,cstart,n; 1985 1986 PetscFunctionBegin; 1987 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1988 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1989 if (!rank) { 1990 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1991 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1992 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1993 if (header[3] < 0) { 1994 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1995 } 1996 } 1997 1998 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1999 M = header[1]; N = header[2]; 2000 /* determine ownership of all rows */ 2001 m = M/size + ((M % size) > rank); 2002 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2003 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2004 rowners[0] = 0; 2005 for (i=2; i<=size; i++) { 2006 rowners[i] += rowners[i-1]; 2007 } 2008 rstart = rowners[rank]; 2009 rend = rowners[rank+1]; 2010 2011 /* distribute row lengths to all processors */ 2012 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 2013 offlens = ourlens + (rend-rstart); 2014 if (!rank) { 2015 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 2016 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2017 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2018 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 2019 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2020 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2021 } else { 2022 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2023 } 2024 2025 if (!rank) { 2026 /* calculate the number of nonzeros on each processor */ 2027 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2028 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2029 for (i=0; i<size; i++) { 2030 for (j=rowners[i]; j< rowners[i+1]; j++) { 2031 procsnz[i] += rowlengths[j]; 2032 } 2033 } 2034 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2035 2036 /* determine max buffer needed and allocate it */ 2037 maxnz = 0; 2038 for (i=0; i<size; i++) { 2039 maxnz = PetscMax(maxnz,procsnz[i]); 2040 } 2041 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2042 2043 /* read in my part of the matrix column indices */ 2044 nz = procsnz[0]; 2045 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 2046 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2047 2048 /* read in every one elses and ship off */ 2049 for (i=1; i<size; i++) { 2050 nz = procsnz[i]; 2051 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2052 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2053 } 2054 ierr = PetscFree(cols);CHKERRQ(ierr); 2055 } else { 2056 /* determine buffer space needed for message */ 2057 nz = 0; 2058 for (i=0; i<m; i++) { 2059 nz += ourlens[i]; 2060 } 2061 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 2062 2063 /* receive message of column indices*/ 2064 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2065 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2066 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2067 } 2068 2069 /* determine column ownership if matrix is not square */ 2070 if (N != M) { 2071 n = N/size + ((N % size) > rank); 2072 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2073 cstart = cend - n; 2074 } else { 2075 cstart = rstart; 2076 cend = rend; 2077 n = cend - cstart; 2078 } 2079 2080 /* loop over local rows, determining number of off diagonal entries */ 2081 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2082 jj = 0; 2083 for (i=0; i<m; i++) { 2084 for (j=0; j<ourlens[i]; j++) { 2085 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2086 jj++; 2087 } 2088 } 2089 2090 /* create our matrix */ 2091 for (i=0; i<m; i++) { 2092 ourlens[i] -= offlens[i]; 2093 } 2094 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2095 ierr = MatSetType(A,type);CHKERRQ(ierr); 2096 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2097 2098 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2099 for (i=0; i<m; i++) { 2100 ourlens[i] += offlens[i]; 2101 } 2102 2103 if (!rank) { 2104 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2105 2106 /* read in my part of the matrix numerical values */ 2107 nz = procsnz[0]; 2108 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2109 2110 /* insert into matrix */ 2111 jj = rstart; 2112 smycols = mycols; 2113 svals = vals; 2114 for (i=0; i<m; i++) { 2115 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2116 smycols += ourlens[i]; 2117 svals += ourlens[i]; 2118 jj++; 2119 } 2120 2121 /* read in other processors and ship out */ 2122 for (i=1; i<size; i++) { 2123 nz = procsnz[i]; 2124 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2125 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2126 } 2127 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2128 } else { 2129 /* receive numeric values */ 2130 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2131 2132 /* receive message of values*/ 2133 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2134 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2135 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2136 2137 /* insert into matrix */ 2138 jj = rstart; 2139 smycols = mycols; 2140 svals = vals; 2141 for (i=0; i<m; i++) { 2142 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2143 smycols += ourlens[i]; 2144 svals += ourlens[i]; 2145 jj++; 2146 } 2147 } 2148 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2149 ierr = PetscFree(vals);CHKERRQ(ierr); 2150 ierr = PetscFree(mycols);CHKERRQ(ierr); 2151 ierr = PetscFree(rowners);CHKERRQ(ierr); 2152 2153 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2154 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2155 *newmat = A; 2156 PetscFunctionReturn(0); 2157 } 2158 2159 #undef __FUNCT__ 2160 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2161 /* 2162 Not great since it makes two copies of the submatrix, first an SeqAIJ 2163 in local and then by concatenating the local matrices the end result. 2164 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2165 */ 2166 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2167 { 2168 PetscErrorCode ierr; 2169 PetscMPIInt rank,size; 2170 int i,m,n,rstart,row,rend,nz,*cwork,j; 2171 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2172 Mat *local,M,Mreuse; 2173 PetscScalar *vwork,*aa; 2174 MPI_Comm comm = mat->comm; 2175 Mat_SeqAIJ *aij; 2176 2177 2178 PetscFunctionBegin; 2179 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2180 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2181 2182 if (call == MAT_REUSE_MATRIX) { 2183 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2184 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2185 local = &Mreuse; 2186 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2187 } else { 2188 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2189 Mreuse = *local; 2190 ierr = PetscFree(local);CHKERRQ(ierr); 2191 } 2192 2193 /* 2194 m - number of local rows 2195 n - number of columns (same on all processors) 2196 rstart - first row in new global matrix generated 2197 */ 2198 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2199 if (call == MAT_INITIAL_MATRIX) { 2200 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2201 ii = aij->i; 2202 jj = aij->j; 2203 2204 /* 2205 Determine the number of non-zeros in the diagonal and off-diagonal 2206 portions of the matrix in order to do correct preallocation 2207 */ 2208 2209 /* first get start and end of "diagonal" columns */ 2210 if (csize == PETSC_DECIDE) { 2211 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2212 if (mglobal == n) { /* square matrix */ 2213 nlocal = m; 2214 } else { 2215 nlocal = n/size + ((n % size) > rank); 2216 } 2217 } else { 2218 nlocal = csize; 2219 } 2220 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2221 rstart = rend - nlocal; 2222 if (rank == size - 1 && rend != n) { 2223 SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n); 2224 } 2225 2226 /* next, compute all the lengths */ 2227 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2228 olens = dlens + m; 2229 for (i=0; i<m; i++) { 2230 jend = ii[i+1] - ii[i]; 2231 olen = 0; 2232 dlen = 0; 2233 for (j=0; j<jend; j++) { 2234 if (*jj < rstart || *jj >= rend) olen++; 2235 else dlen++; 2236 jj++; 2237 } 2238 olens[i] = olen; 2239 dlens[i] = dlen; 2240 } 2241 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2242 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2243 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2244 ierr = PetscFree(dlens);CHKERRQ(ierr); 2245 } else { 2246 int ml,nl; 2247 2248 M = *newmat; 2249 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2250 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2251 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2252 /* 2253 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2254 rather than the slower MatSetValues(). 2255 */ 2256 M->was_assembled = PETSC_TRUE; 2257 M->assembled = PETSC_FALSE; 2258 } 2259 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2260 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2261 ii = aij->i; 2262 jj = aij->j; 2263 aa = aij->a; 2264 for (i=0; i<m; i++) { 2265 row = rstart + i; 2266 nz = ii[i+1] - ii[i]; 2267 cwork = jj; jj += nz; 2268 vwork = aa; aa += nz; 2269 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2270 } 2271 2272 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2273 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2274 *newmat = M; 2275 2276 /* save submatrix used in processor for next request */ 2277 if (call == MAT_INITIAL_MATRIX) { 2278 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2279 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2280 } 2281 2282 PetscFunctionReturn(0); 2283 } 2284 2285 #undef __FUNCT__ 2286 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2287 /*@C 2288 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2289 (the default parallel PETSc format). For good matrix assembly performance 2290 the user should preallocate the matrix storage by setting the parameters 2291 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2292 performance can be increased by more than a factor of 50. 2293 2294 Collective on MPI_Comm 2295 2296 Input Parameters: 2297 + A - the matrix 2298 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2299 (same value is used for all local rows) 2300 . d_nnz - array containing the number of nonzeros in the various rows of the 2301 DIAGONAL portion of the local submatrix (possibly different for each row) 2302 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2303 The size of this array is equal to the number of local rows, i.e 'm'. 2304 You must leave room for the diagonal entry even if it is zero. 2305 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2306 submatrix (same value is used for all local rows). 2307 - o_nnz - array containing the number of nonzeros in the various rows of the 2308 OFF-DIAGONAL portion of the local submatrix (possibly different for 2309 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2310 structure. The size of this array is equal to the number 2311 of local rows, i.e 'm'. 2312 2313 The AIJ format (also called the Yale sparse matrix format or 2314 compressed row storage), is fully compatible with standard Fortran 77 2315 storage. That is, the stored row and column indices can begin at 2316 either one (as in Fortran) or zero. See the users manual for details. 2317 2318 The user MUST specify either the local or global matrix dimensions 2319 (possibly both). 2320 2321 The parallel matrix is partitioned such that the first m0 rows belong to 2322 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2323 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2324 2325 The DIAGONAL portion of the local submatrix of a processor can be defined 2326 as the submatrix which is obtained by extraction the part corresponding 2327 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2328 first row that belongs to the processor, and r2 is the last row belonging 2329 to the this processor. This is a square mxm matrix. The remaining portion 2330 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2331 2332 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2333 2334 By default, this format uses inodes (identical nodes) when possible. 2335 We search for consecutive rows with the same nonzero structure, thereby 2336 reusing matrix information to achieve increased efficiency. 2337 2338 Options Database Keys: 2339 + -mat_aij_no_inode - Do not use inodes 2340 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2341 - -mat_aij_oneindex - Internally use indexing starting at 1 2342 rather than 0. Note that when calling MatSetValues(), 2343 the user still MUST index entries starting at 0! 2344 2345 Example usage: 2346 2347 Consider the following 8x8 matrix with 34 non-zero values, that is 2348 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2349 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2350 as follows: 2351 2352 .vb 2353 1 2 0 | 0 3 0 | 0 4 2354 Proc0 0 5 6 | 7 0 0 | 8 0 2355 9 0 10 | 11 0 0 | 12 0 2356 ------------------------------------- 2357 13 0 14 | 15 16 17 | 0 0 2358 Proc1 0 18 0 | 19 20 21 | 0 0 2359 0 0 0 | 22 23 0 | 24 0 2360 ------------------------------------- 2361 Proc2 25 26 27 | 0 0 28 | 29 0 2362 30 0 0 | 31 32 33 | 0 34 2363 .ve 2364 2365 This can be represented as a collection of submatrices as: 2366 2367 .vb 2368 A B C 2369 D E F 2370 G H I 2371 .ve 2372 2373 Where the submatrices A,B,C are owned by proc0, D,E,F are 2374 owned by proc1, G,H,I are owned by proc2. 2375 2376 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2377 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2378 The 'M','N' parameters are 8,8, and have the same values on all procs. 2379 2380 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2381 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2382 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2383 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2384 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2385 matrix, ans [DF] as another SeqAIJ matrix. 2386 2387 When d_nz, o_nz parameters are specified, d_nz storage elements are 2388 allocated for every row of the local diagonal submatrix, and o_nz 2389 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2390 One way to choose d_nz and o_nz is to use the max nonzerors per local 2391 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2392 In this case, the values of d_nz,o_nz are: 2393 .vb 2394 proc0 : dnz = 2, o_nz = 2 2395 proc1 : dnz = 3, o_nz = 2 2396 proc2 : dnz = 1, o_nz = 4 2397 .ve 2398 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2399 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2400 for proc3. i.e we are using 12+15+10=37 storage locations to store 2401 34 values. 2402 2403 When d_nnz, o_nnz parameters are specified, the storage is specified 2404 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2405 In the above case the values for d_nnz,o_nnz are: 2406 .vb 2407 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2408 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2409 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2410 .ve 2411 Here the space allocated is sum of all the above values i.e 34, and 2412 hence pre-allocation is perfect. 2413 2414 Level: intermediate 2415 2416 .keywords: matrix, aij, compressed row, sparse, parallel 2417 2418 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2419 @*/ 2420 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2421 { 2422 PetscErrorCode ierr,(*f)(Mat,int,const int[],int,const int[]); 2423 2424 PetscFunctionBegin; 2425 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2426 if (f) { 2427 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2428 } 2429 PetscFunctionReturn(0); 2430 } 2431 2432 #undef __FUNCT__ 2433 #define __FUNCT__ "MatCreateMPIAIJ" 2434 /*@C 2435 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2436 (the default parallel PETSc format). For good matrix assembly performance 2437 the user should preallocate the matrix storage by setting the parameters 2438 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2439 performance can be increased by more than a factor of 50. 2440 2441 Collective on MPI_Comm 2442 2443 Input Parameters: 2444 + comm - MPI communicator 2445 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2446 This value should be the same as the local size used in creating the 2447 y vector for the matrix-vector product y = Ax. 2448 . n - This value should be the same as the local size used in creating the 2449 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2450 calculated if N is given) For square matrices n is almost always m. 2451 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2452 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2453 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2454 (same value is used for all local rows) 2455 . d_nnz - array containing the number of nonzeros in the various rows of the 2456 DIAGONAL portion of the local submatrix (possibly different for each row) 2457 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2458 The size of this array is equal to the number of local rows, i.e 'm'. 2459 You must leave room for the diagonal entry even if it is zero. 2460 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2461 submatrix (same value is used for all local rows). 2462 - o_nnz - array containing the number of nonzeros in the various rows of the 2463 OFF-DIAGONAL portion of the local submatrix (possibly different for 2464 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2465 structure. The size of this array is equal to the number 2466 of local rows, i.e 'm'. 2467 2468 Output Parameter: 2469 . A - the matrix 2470 2471 Notes: 2472 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2473 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2474 storage requirements for this matrix. 2475 2476 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2477 processor than it must be used on all processors that share the object for 2478 that argument. 2479 2480 The AIJ format (also called the Yale sparse matrix format or 2481 compressed row storage), is fully compatible with standard Fortran 77 2482 storage. That is, the stored row and column indices can begin at 2483 either one (as in Fortran) or zero. See the users manual for details. 2484 2485 The user MUST specify either the local or global matrix dimensions 2486 (possibly both). 2487 2488 The parallel matrix is partitioned such that the first m0 rows belong to 2489 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2490 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2491 2492 The DIAGONAL portion of the local submatrix of a processor can be defined 2493 as the submatrix which is obtained by extraction the part corresponding 2494 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2495 first row that belongs to the processor, and r2 is the last row belonging 2496 to the this processor. This is a square mxm matrix. The remaining portion 2497 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2498 2499 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2500 2501 When calling this routine with a single process communicator, a matrix of 2502 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2503 type of communicator, use the construction mechanism: 2504 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2505 2506 By default, this format uses inodes (identical nodes) when possible. 2507 We search for consecutive rows with the same nonzero structure, thereby 2508 reusing matrix information to achieve increased efficiency. 2509 2510 Options Database Keys: 2511 + -mat_aij_no_inode - Do not use inodes 2512 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2513 - -mat_aij_oneindex - Internally use indexing starting at 1 2514 rather than 0. Note that when calling MatSetValues(), 2515 the user still MUST index entries starting at 0! 2516 2517 2518 Example usage: 2519 2520 Consider the following 8x8 matrix with 34 non-zero values, that is 2521 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2522 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2523 as follows: 2524 2525 .vb 2526 1 2 0 | 0 3 0 | 0 4 2527 Proc0 0 5 6 | 7 0 0 | 8 0 2528 9 0 10 | 11 0 0 | 12 0 2529 ------------------------------------- 2530 13 0 14 | 15 16 17 | 0 0 2531 Proc1 0 18 0 | 19 20 21 | 0 0 2532 0 0 0 | 22 23 0 | 24 0 2533 ------------------------------------- 2534 Proc2 25 26 27 | 0 0 28 | 29 0 2535 30 0 0 | 31 32 33 | 0 34 2536 .ve 2537 2538 This can be represented as a collection of submatrices as: 2539 2540 .vb 2541 A B C 2542 D E F 2543 G H I 2544 .ve 2545 2546 Where the submatrices A,B,C are owned by proc0, D,E,F are 2547 owned by proc1, G,H,I are owned by proc2. 2548 2549 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2550 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2551 The 'M','N' parameters are 8,8, and have the same values on all procs. 2552 2553 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2554 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2555 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2556 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2557 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2558 matrix, ans [DF] as another SeqAIJ matrix. 2559 2560 When d_nz, o_nz parameters are specified, d_nz storage elements are 2561 allocated for every row of the local diagonal submatrix, and o_nz 2562 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2563 One way to choose d_nz and o_nz is to use the max nonzerors per local 2564 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2565 In this case, the values of d_nz,o_nz are: 2566 .vb 2567 proc0 : dnz = 2, o_nz = 2 2568 proc1 : dnz = 3, o_nz = 2 2569 proc2 : dnz = 1, o_nz = 4 2570 .ve 2571 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2572 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2573 for proc3. i.e we are using 12+15+10=37 storage locations to store 2574 34 values. 2575 2576 When d_nnz, o_nnz parameters are specified, the storage is specified 2577 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2578 In the above case the values for d_nnz,o_nnz are: 2579 .vb 2580 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2581 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2582 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2583 .ve 2584 Here the space allocated is sum of all the above values i.e 34, and 2585 hence pre-allocation is perfect. 2586 2587 Level: intermediate 2588 2589 .keywords: matrix, aij, compressed row, sparse, parallel 2590 2591 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2592 @*/ 2593 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2594 { 2595 PetscErrorCode ierr; 2596 int size; 2597 2598 PetscFunctionBegin; 2599 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2600 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2601 if (size > 1) { 2602 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2603 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2604 } else { 2605 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2606 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2607 } 2608 PetscFunctionReturn(0); 2609 } 2610 2611 #undef __FUNCT__ 2612 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2613 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2614 { 2615 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2616 PetscFunctionBegin; 2617 *Ad = a->A; 2618 *Ao = a->B; 2619 *colmap = a->garray; 2620 PetscFunctionReturn(0); 2621 } 2622 2623 #undef __FUNCT__ 2624 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2625 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2626 { 2627 PetscErrorCode ierr; 2628 int i; 2629 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2630 2631 PetscFunctionBegin; 2632 if (coloring->ctype == IS_COLORING_LOCAL) { 2633 ISColoringValue *allcolors,*colors; 2634 ISColoring ocoloring; 2635 2636 /* set coloring for diagonal portion */ 2637 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2638 2639 /* set coloring for off-diagonal portion */ 2640 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2641 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2642 for (i=0; i<a->B->n; i++) { 2643 colors[i] = allcolors[a->garray[i]]; 2644 } 2645 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2646 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2647 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2648 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2649 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2650 ISColoringValue *colors; 2651 int *larray; 2652 ISColoring ocoloring; 2653 2654 /* set coloring for diagonal portion */ 2655 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2656 for (i=0; i<a->A->n; i++) { 2657 larray[i] = i + a->cstart; 2658 } 2659 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2660 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2661 for (i=0; i<a->A->n; i++) { 2662 colors[i] = coloring->colors[larray[i]]; 2663 } 2664 ierr = PetscFree(larray);CHKERRQ(ierr); 2665 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2666 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2667 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2668 2669 /* set coloring for off-diagonal portion */ 2670 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2671 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2672 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2673 for (i=0; i<a->B->n; i++) { 2674 colors[i] = coloring->colors[larray[i]]; 2675 } 2676 ierr = PetscFree(larray);CHKERRQ(ierr); 2677 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2678 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2679 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2680 } else { 2681 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2682 } 2683 2684 PetscFunctionReturn(0); 2685 } 2686 2687 #undef __FUNCT__ 2688 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2689 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2690 { 2691 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2692 PetscErrorCode ierr; 2693 2694 PetscFunctionBegin; 2695 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2696 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2697 PetscFunctionReturn(0); 2698 } 2699 2700 #undef __FUNCT__ 2701 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2702 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2703 { 2704 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2705 PetscErrorCode ierr; 2706 2707 PetscFunctionBegin; 2708 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2709 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2710 PetscFunctionReturn(0); 2711 } 2712 2713 #undef __FUNCT__ 2714 #define __FUNCT__ "MatMerge" 2715 /*@C 2716 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2717 matrices from each processor 2718 2719 Collective on MPI_Comm 2720 2721 Input Parameters: 2722 + comm - the communicators the parallel matrix will live on 2723 . inmat - the input sequential matrices 2724 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2725 2726 Output Parameter: 2727 . outmat - the parallel matrix generated 2728 2729 Level: advanced 2730 2731 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2732 2733 @*/ 2734 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,MatReuse scall,Mat *outmat) 2735 { 2736 PetscErrorCode ierr; 2737 int m,n,i,rstart,nnz,I,*dnz,*onz; 2738 const int *indx; 2739 const PetscScalar *values; 2740 PetscMap columnmap,rowmap; 2741 2742 PetscFunctionBegin; 2743 2744 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2745 if (scall == MAT_INITIAL_MATRIX){ 2746 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2747 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2748 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2749 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2750 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2751 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2752 2753 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2754 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2755 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2756 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2757 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2758 2759 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2760 for (i=0;i<m;i++) { 2761 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2762 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2763 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2764 } 2765 /* This routine will ONLY return MPIAIJ type matrix */ 2766 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2767 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2768 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2769 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2770 2771 } else if (scall == MAT_REUSE_MATRIX){ 2772 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2773 } else { 2774 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 2775 } 2776 2777 for (i=0;i<m;i++) { 2778 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2779 I = i + rstart; 2780 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2781 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2782 } 2783 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2784 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2785 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2786 2787 PetscFunctionReturn(0); 2788 } 2789 2790 #undef __FUNCT__ 2791 #define __FUNCT__ "MatFileSplit" 2792 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2793 { 2794 PetscErrorCode ierr; 2795 PetscMPIInt rank; 2796 int m,N,i,rstart,nnz; 2797 size_t len; 2798 const int *indx; 2799 PetscViewer out; 2800 char *name; 2801 Mat B; 2802 const PetscScalar *values; 2803 2804 PetscFunctionBegin; 2805 2806 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2807 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2808 /* Should this be the type of the diagonal block of A? */ 2809 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2810 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2811 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2812 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2813 for (i=0;i<m;i++) { 2814 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2815 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2816 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2817 } 2818 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2819 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2820 2821 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2822 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2823 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2824 sprintf(name,"%s.%d",outfile,rank); 2825 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2826 ierr = PetscFree(name); 2827 ierr = MatView(B,out);CHKERRQ(ierr); 2828 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2829 ierr = MatDestroy(B);CHKERRQ(ierr); 2830 PetscFunctionReturn(0); 2831 } 2832 2833 typedef struct { /* used by MatMerge_SeqsToMPI for reusing the merged matrix */ 2834 PetscMap rowmap; 2835 int nrecv,nsend,*id_r,*bi,*bj,**ijbuf_r; 2836 int *len_sra; /* array of length 2*size, len_sra[i]/len_sra[size+i] 2837 store length of i-th send/recv matrix values */ 2838 MatScalar *abuf_s; 2839 } Mat_Merge_SeqsToMPI; 2840 2841 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2842 #undef __FUNCT__ 2843 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2844 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2845 { 2846 PetscErrorCode ierr; 2847 Mat_Merge_SeqsToMPI *merge=(Mat_Merge_SeqsToMPI*)A->spptr; 2848 2849 PetscFunctionBegin; 2850 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2851 ierr = PetscFree(merge->abuf_s);CHKERRQ(ierr); 2852 ierr = PetscFree(merge->len_sra);CHKERRQ(ierr); 2853 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2854 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2855 ierr = PetscFree(merge->ijbuf_r);CHKERRQ(ierr); 2856 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2857 ierr = PetscFree(merge);CHKERRQ(ierr); 2858 2859 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2860 PetscFunctionReturn(0); 2861 } 2862 2863 #include "src/mat/utils/freespace.h" 2864 #undef __FUNCT__ 2865 #define __FUNCT__ "MatMerge_SeqsToMPI" 2866 /*@C 2867 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2868 matrices from each processor 2869 2870 Collective on MPI_Comm 2871 2872 Input Parameters: 2873 + comm - the communicators the parallel matrix will live on 2874 . seqmat - the input sequential matrices 2875 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2876 2877 Output Parameter: 2878 . mpimat - the parallel matrix generated 2879 2880 Level: advanced 2881 2882 Notes: The dimensions of the sequential matrix in each processor 2883 MUST be the same. 2884 2885 @*/ 2886 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,MatReuse scall,Mat *mpimat) 2887 { 2888 PetscErrorCode ierr; 2889 Mat B_seq,B_mpi; 2890 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2891 PetscMPIInt size,rank; 2892 int M=seqmat->m,N=seqmat->n,m,i,j,*owners,*ai=a->i,*aj=a->j,tag,taga,len,len_a,*len_s,*len_sa,proc,*len_r,*len_ra; 2893 int **ijbuf_r,*ijbuf_s,*nnz_ptr,k,anzi,*bj_i,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0,nextaj; 2894 MPI_Request *s_waits,*r_waits,*s_waitsa,*r_waitsa; 2895 MPI_Status *status; 2896 MatScalar *ba,*aa=a->a,**abuf_r,*abuf_i,*ba_i; 2897 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2898 Mat_Merge_SeqsToMPI *merge; 2899 2900 PetscFunctionBegin; 2901 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2902 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2903 if (scall == MAT_INITIAL_MATRIX){ 2904 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 2905 } else if (scall == MAT_REUSE_MATRIX){ 2906 B_mpi = *mpimat; 2907 merge = (Mat_Merge_SeqsToMPI*)B_mpi->spptr; 2908 bi = merge->bi; 2909 bj = merge->bj; 2910 ijbuf_r = merge->ijbuf_r; 2911 } else { 2912 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 2913 } 2914 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2915 2916 /* determine the number of messages to send, their lengths */ 2917 /*---------------------------------------------------------*/ 2918 if (scall == MAT_INITIAL_MATRIX){ 2919 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 2920 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 2921 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 2922 ierr = PetscMalloc(size*sizeof(int),&len_s);CHKERRQ(ierr); 2923 ierr = PetscMalloc(2*size*sizeof(int),&merge->len_sra);CHKERRQ(ierr); 2924 } 2925 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2926 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2927 2928 len_sa = merge->len_sra; 2929 len_ra = merge->len_sra + size; 2930 2931 if (scall == MAT_INITIAL_MATRIX){ 2932 merge->nsend = 0; 2933 len = len_a = 0; 2934 for (proc=0; proc<size; proc++){ 2935 if (proc == rank) continue; 2936 len_s[proc] = len_sa[proc] = 0; 2937 for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */ 2938 len_sa[proc] += ai[i+1] - ai[i]; /* num of cols sent to [proc] */ 2939 } 2940 if (len_sa[proc]) { 2941 merge->nsend++; 2942 len_s[proc] = len_sa[proc] + owners[proc+1] - owners[proc]; 2943 if (len < len_s[proc]) len = len_s[proc]; 2944 if (len_a < len_sa[proc]) len_a = len_sa[proc]; 2945 } 2946 } 2947 2948 /* determine the number and length of messages to receive */ 2949 /*--------------------------------------------------------*/ 2950 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 2951 ierr = PetscGatherMessageLengths(comm,merge->nsend,merge->nrecv,len_s,&merge->id_r,&len_r);CHKERRQ(ierr); 2952 2953 /* post the Irecvs corresponding to these messages */ 2954 /*-------------------------------------------------*/ 2955 /* get new tag to keep the communication clean */ 2956 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tag);CHKERRQ(ierr); 2957 ierr = PetscPostIrecvInt(comm,tag,merge->nrecv,merge->id_r,len_r,&ijbuf_r,&r_waits);CHKERRQ(ierr); 2958 2959 /* post the sends */ 2960 /*----------------*/ 2961 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2962 ierr = PetscMalloc((len+1)*sizeof(int),&ijbuf_s);CHKERRQ(ierr); 2963 k = 0; 2964 for (proc=0; proc<size; proc++){ 2965 if (!len_s[proc]) continue; 2966 /* form outgoing ij data to be sent to [proc] */ 2967 nnz_ptr = ijbuf_s + owners[proc+1] - owners[proc]; 2968 for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */ 2969 anzi = ai[i+1] - ai[i]; 2970 if (i==owners[proc]){ 2971 ijbuf_s[i-owners[proc]] = anzi; 2972 } else { 2973 ijbuf_s[i-owners[proc]] = ijbuf_s[i-owners[proc]-1]+ anzi; 2974 } 2975 for (j=0; j <anzi; j++){ 2976 *nnz_ptr = *(aj+ai[i]+j); nnz_ptr++; 2977 } 2978 } 2979 ierr = MPI_Isend(ijbuf_s,len_s[proc],MPI_INT,proc,tag,comm,s_waits+k);CHKERRQ(ierr); 2980 k++; 2981 } 2982 2983 /* receives and sends of ij-structure are complete, deallocate memories */ 2984 /*----------------------------------------------------------------------*/ 2985 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 2986 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 2987 2988 ierr = PetscFree(ijbuf_s);CHKERRQ(ierr); 2989 ierr = PetscFree(s_waits);CHKERRQ(ierr); 2990 ierr = PetscFree(r_waits);CHKERRQ(ierr); 2991 } 2992 2993 /* send and recv matrix values */ 2994 /*-----------------------------*/ 2995 if (scall == MAT_INITIAL_MATRIX){ 2996 ierr = PetscMalloc((len_a+1)*sizeof(MatScalar),&merge->abuf_s);CHKERRQ(ierr); 2997 for (i=0; i<merge->nrecv; i++) len_ra[i] = len_r[i]-m; /* length of seqmat->a to be received from a proc */ 2998 } 2999 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 3000 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,len_ra,&abuf_r,&r_waitsa);CHKERRQ(ierr); 3001 3002 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waitsa);CHKERRQ(ierr); 3003 k = 0; 3004 for (proc=0; proc<size; proc++){ 3005 if (!len_sa[proc]) continue; 3006 /* form outgoing mat value data to be sent to [proc] */ 3007 abuf_i = merge->abuf_s; 3008 for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */ 3009 anzi = ai[i+1] - ai[i]; 3010 for (j=0; j <anzi; j++){ 3011 *abuf_i = *(aa+ai[i]+j); abuf_i++; 3012 } 3013 } 3014 ierr = MPI_Isend(merge->abuf_s,len_sa[proc],MPIU_MATSCALAR,proc,taga,comm,s_waitsa+k);CHKERRQ(ierr); 3015 k++; 3016 } 3017 3018 ierr = MPI_Waitall(merge->nrecv,r_waitsa,status);CHKERRQ(ierr); 3019 ierr = MPI_Waitall(merge->nsend,s_waitsa,status);CHKERRQ(ierr); 3020 ierr = PetscFree(status);CHKERRQ(ierr); 3021 3022 ierr = PetscFree(s_waitsa);CHKERRQ(ierr); 3023 ierr = PetscFree(r_waitsa);CHKERRQ(ierr); 3024 3025 /* create seq matrix B_seq in each processor */ 3026 /*-------------------------------------------*/ 3027 if (scall == MAT_INITIAL_MATRIX){ 3028 ierr = PetscFree(len_s);CHKERRQ(ierr); 3029 /* allocate bi array and free space for accumulating nonzero column info */ 3030 ierr = PetscMalloc((m+1)*sizeof(int),&bi);CHKERRQ(ierr); 3031 bi[0] = 0; 3032 3033 ierr = PetscMalloc((N+1)*sizeof(int),&lnk);CHKERRQ(ierr); 3034 ierr = PetscLLInitialize(N,N);CHKERRQ(ierr); 3035 3036 /* initial FreeSpace size is (nproc)*(num of local nnz(seqmat)) */ 3037 len = 0; 3038 for (i=owners[rank]; i<owners[rank+1]; i++) len += ai[i+1] - ai[i]; 3039 ierr = GetMoreSpace((int)(size*len+1),&free_space);CHKERRQ(ierr); 3040 current_space = free_space; 3041 3042 /* determine symbolic info for each row of B_seq */ 3043 for (i=0;i<m;i++) { 3044 bnzi = 0; 3045 /* add local non-zero cols of this proc's seqmat into lnk */ 3046 arow = owners[rank] + i; 3047 anzi = ai[arow+1] - ai[arow]; 3048 aj = a->j + ai[arow]; 3049 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk);CHKERRQ(ierr); 3050 bnzi += nlnk; 3051 /* add received col data into lnk */ 3052 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3053 /* i-th row */ 3054 if (i == 0){ 3055 anzi = *(ijbuf_r[k]+i); 3056 aj = ijbuf_r[k] + m; 3057 } else { 3058 anzi = *(ijbuf_r[k]+i) - *(ijbuf_r[k]+i-1); 3059 aj = ijbuf_r[k]+m + *(ijbuf_r[k]+i-1); 3060 } 3061 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk);CHKERRQ(ierr); 3062 bnzi += nlnk; 3063 } 3064 3065 /* if free space is not available, make more free space */ 3066 if (current_space->local_remaining<bnzi) { 3067 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3068 nspacedouble++; 3069 } 3070 /* copy data into free space, then initialize lnk */ 3071 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array);CHKERRQ(ierr); 3072 current_space->array += bnzi; 3073 current_space->local_used += bnzi; 3074 current_space->local_remaining -= bnzi; 3075 3076 bi[i+1] = bi[i] + bnzi; 3077 } 3078 ierr = PetscMalloc((bi[m]+1)*sizeof(int),&bj);CHKERRQ(ierr); 3079 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3080 3081 ierr = PetscFree(lnk);CHKERRQ(ierr); 3082 ierr = PetscFree(len_r);CHKERRQ(ierr); 3083 3084 /* allocate space for ba */ 3085 ierr = PetscMalloc((bi[m]+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr); 3086 ierr = PetscMemzero(ba,(bi[m]+1)*sizeof(MatScalar));CHKERRQ(ierr); 3087 3088 /* put together B_seq */ 3089 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,bi,bj,ba,&B_seq);CHKERRQ(ierr); 3090 ierr = PetscFree(ba);CHKERRQ(ierr); /* bi and bj are saved for reuse */ 3091 3092 /* create symbolic parallel matrix B_mpi by concatinating B_seq */ 3093 /*--------------------------------------------------------------*/ 3094 ierr = MatMerge(comm,B_seq,scall,&B_mpi);CHKERRQ(ierr); 3095 } /* endof if (scall == MAT_INITIAL_MATRIX) */ 3096 3097 /* insert mat values of B_mpi */ 3098 /*----------------------------*/ 3099 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3100 3101 /* set values of ba */ 3102 for (i=0; i<m; i++) { 3103 arow = owners[rank] + i; 3104 bj_i = bj+bi[i]; /* col indices of the i-th row of B_seq */ 3105 bnzi = bi[i+1] - bi[i]; 3106 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3107 3108 /* add local non-zero vals of this proc's seqmat into ba */ 3109 anzi = ai[arow+1] - ai[arow]; 3110 aj = a->j + ai[arow]; 3111 aa = a->a + ai[arow]; 3112 nextaj = 0; 3113 for (j=0; nextaj<anzi; j++){ 3114 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3115 ba_i[j] += aa[nextaj++]; 3116 } 3117 } 3118 3119 /* add received vals into ba */ 3120 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3121 /* i-th row */ 3122 if (i == 0){ 3123 anzi = *(ijbuf_r[k]+i); 3124 aj = ijbuf_r[k] + m; 3125 aa = abuf_r[k]; 3126 } else { 3127 anzi = *(ijbuf_r[k]+i) - *(ijbuf_r[k]+i-1); 3128 aj = ijbuf_r[k]+m + *(ijbuf_r[k]+i-1); 3129 aa = abuf_r[k] + *(ijbuf_r[k]+i-1); 3130 } 3131 nextaj = 0; 3132 for (j=0; nextaj<anzi; j++){ 3133 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3134 ba_i[j] += aa[nextaj++]; 3135 } 3136 } 3137 } 3138 3139 ierr = MatSetValues(B_mpi,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3140 } 3141 ierr = MatAssemblyBegin(B_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3142 ierr = MatAssemblyEnd(B_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3143 3144 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3145 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3146 3147 /* attach the supporting struct to B_mpi for reuse */ 3148 /*-------------------------------------------------*/ 3149 if (scall == MAT_INITIAL_MATRIX){ 3150 B_mpi->spptr = (void*)merge; 3151 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3152 merge->bi = bi; 3153 merge->bj = bj; 3154 merge->ijbuf_r = ijbuf_r; 3155 3156 *mpimat = B_mpi; 3157 } 3158 PetscFunctionReturn(0); 3159 } 3160