1 2 #include "src/mat/impls/aij/mpi/mpiaij.h" 3 #include "src/inline/spops.h" 4 5 /* 6 Local utility routine that creates a mapping from the global column 7 number to the local number in the off-diagonal part of the local 8 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 9 a slightly higher hash table cost; without it it is not scalable (each processor 10 has an order N integer array but is fast to acess. 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 17 PetscErrorCode ierr; 18 int n = aij->B->n,i; 19 20 PetscFunctionBegin; 21 #if defined (PETSC_USE_CTABLE) 22 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 23 for (i=0; i<n; i++){ 24 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 25 } 26 #else 27 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 28 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 29 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 30 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 31 #endif 32 PetscFunctionReturn(0); 33 } 34 35 #define CHUNKSIZE 15 36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 40 rmax = aimax[row]; nrow = ailen[row]; \ 41 col1 = col - shift; \ 42 \ 43 low = 0; high = nrow; \ 44 while (high-low > 5) { \ 45 t = (low+high)/2; \ 46 if (rp[t] > col) high = t; \ 47 else low = t; \ 48 } \ 49 for (_i=low; _i<high; _i++) { \ 50 if (rp[_i] > col1) break; \ 51 if (rp[_i] == col1) { \ 52 if (addv == ADD_VALUES) ap[_i] += value; \ 53 else ap[_i] = value; \ 54 goto a_noinsert; \ 55 } \ 56 } \ 57 if (nonew == 1) goto a_noinsert; \ 58 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 59 if (nrow >= rmax) { \ 60 /* there is no extra room in row, therefore enlarge */ \ 61 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 62 PetscScalar *new_a; \ 63 \ 64 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 65 \ 66 /* malloc new storage space */ \ 67 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 68 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 69 new_j = (int*)(new_a + new_nz); \ 70 new_i = new_j + new_nz; \ 71 \ 72 /* copy over old data into new slots */ \ 73 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 74 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 75 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 76 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 77 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 78 len*sizeof(int));CHKERRQ(ierr); \ 79 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 81 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 82 /* free up old matrix storage */ \ 83 \ 84 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 85 if (!a->singlemalloc) { \ 86 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 87 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 88 } \ 89 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 90 a->singlemalloc = PETSC_TRUE; \ 91 \ 92 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 93 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 94 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 95 a->maxnz += CHUNKSIZE; \ 96 a->reallocs++; \ 97 } \ 98 N = nrow++ - 1; a->nz++; \ 99 /* shift up all the later entries in this row */ \ 100 for (ii=N; ii>=_i; ii--) { \ 101 rp[ii+1] = rp[ii]; \ 102 ap[ii+1] = ap[ii]; \ 103 } \ 104 rp[_i] = col1; \ 105 ap[_i] = value; \ 106 a_noinsert: ; \ 107 ailen[row] = nrow; \ 108 } 109 110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 111 { \ 112 \ 113 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 114 rmax = bimax[row]; nrow = bilen[row]; \ 115 col1 = col - shift; \ 116 \ 117 low = 0; high = nrow; \ 118 while (high-low > 5) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > col) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > col1) break; \ 125 if (rp[_i] == col1) { \ 126 if (addv == ADD_VALUES) ap[_i] += value; \ 127 else ap[_i] = value; \ 128 goto b_noinsert; \ 129 } \ 130 } \ 131 if (nonew == 1) goto b_noinsert; \ 132 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 133 if (nrow >= rmax) { \ 134 /* there is no extra room in row, therefore enlarge */ \ 135 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 136 PetscScalar *new_a; \ 137 \ 138 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 139 \ 140 /* malloc new storage space */ \ 141 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 142 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 143 new_j = (int*)(new_a + new_nz); \ 144 new_i = new_j + new_nz; \ 145 \ 146 /* copy over old data into new slots */ \ 147 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 148 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 149 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 150 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 151 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 152 len*sizeof(int));CHKERRQ(ierr); \ 153 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 154 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 155 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 156 /* free up old matrix storage */ \ 157 \ 158 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 159 if (!b->singlemalloc) { \ 160 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 161 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 162 } \ 163 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 164 b->singlemalloc = PETSC_TRUE; \ 165 \ 166 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 167 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 168 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 169 b->maxnz += CHUNKSIZE; \ 170 b->reallocs++; \ 171 } \ 172 N = nrow++ - 1; b->nz++; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ap[ii+1] = ap[ii]; \ 177 } \ 178 rp[_i] = col1; \ 179 ap[_i] = value; \ 180 b_noinsert: ; \ 181 bilen[row] = nrow; \ 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatSetValues_MPIAIJ" 186 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 187 { 188 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 189 PetscScalar value; 190 PetscErrorCode ierr; 191 int i,j,rstart = aij->rstart,rend = aij->rend; 192 int cstart = aij->cstart,cend = aij->cend,row,col; 193 PetscTruth roworiented = aij->roworiented; 194 195 /* Some Variables required in the macro */ 196 Mat A = aij->A; 197 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 198 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 199 PetscScalar *aa = a->a; 200 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 201 Mat B = aij->B; 202 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 203 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 204 PetscScalar *ba = b->a; 205 206 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 207 int nonew = a->nonew,shift=0; 208 PetscScalar *ap; 209 210 PetscFunctionBegin; 211 for (i=0; i<m; i++) { 212 if (im[i] < 0) continue; 213 #if defined(PETSC_USE_BOPT_g) 214 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 215 #endif 216 if (im[i] >= rstart && im[i] < rend) { 217 row = im[i] - rstart; 218 for (j=0; j<n; j++) { 219 if (in[j] >= cstart && in[j] < cend){ 220 col = in[j] - cstart; 221 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 222 if (ignorezeroentries && value == 0.0) continue; 223 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 224 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 225 } else if (in[j] < 0) continue; 226 #if defined(PETSC_USE_BOPT_g) 227 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 228 #endif 229 else { 230 if (mat->was_assembled) { 231 if (!aij->colmap) { 232 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 233 } 234 #if defined (PETSC_USE_CTABLE) 235 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 236 col--; 237 #else 238 col = aij->colmap[in[j]] - 1; 239 #endif 240 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 241 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 244 B = aij->B; 245 b = (Mat_SeqAIJ*)B->data; 246 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 247 ba = b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 if (ignorezeroentries && value == 0.0) continue; 252 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 253 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 254 } 255 } 256 } else { 257 if (!aij->donotstash) { 258 if (roworiented) { 259 if (ignorezeroentries && v[i*n] == 0.0) continue; 260 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 261 } else { 262 if (ignorezeroentries && v[i] == 0.0) continue; 263 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatGetValues_MPIAIJ" 273 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 274 { 275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 276 PetscErrorCode ierr; 277 int i,j,rstart = aij->rstart,rend = aij->rend; 278 int cstart = aij->cstart,cend = aij->cend,row,col; 279 280 PetscFunctionBegin; 281 for (i=0; i<m; i++) { 282 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 283 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 284 if (idxm[i] >= rstart && idxm[i] < rend) { 285 row = idxm[i] - rstart; 286 for (j=0; j<n; j++) { 287 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 288 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 289 if (idxn[j] >= cstart && idxn[j] < cend){ 290 col = idxn[j] - cstart; 291 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 292 } else { 293 if (!aij->colmap) { 294 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 295 } 296 #if defined (PETSC_USE_CTABLE) 297 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 298 col --; 299 #else 300 col = aij->colmap[idxn[j]] - 1; 301 #endif 302 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 303 else { 304 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 305 } 306 } 307 } 308 } else { 309 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 310 } 311 } 312 PetscFunctionReturn(0); 313 } 314 315 #undef __FUNCT__ 316 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 317 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 318 { 319 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 320 PetscErrorCode ierr; 321 int nstash,reallocs; 322 InsertMode addv; 323 324 PetscFunctionBegin; 325 if (aij->donotstash) { 326 PetscFunctionReturn(0); 327 } 328 329 /* make sure all processors are either in INSERTMODE or ADDMODE */ 330 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 331 if (addv == (ADD_VALUES|INSERT_VALUES)) { 332 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 333 } 334 mat->insertmode = addv; /* in case this processor had no cache */ 335 336 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 337 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 338 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 339 PetscFunctionReturn(0); 340 } 341 342 343 #undef __FUNCT__ 344 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 345 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 346 { 347 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 348 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 349 PetscErrorCode ierr; 350 int i,j,rstart,ncols,n,flg; 351 int *row,*col,other_disassembled; 352 PetscScalar *val; 353 InsertMode addv = mat->insertmode; 354 355 PetscFunctionBegin; 356 if (!aij->donotstash) { 357 while (1) { 358 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 359 if (!flg) break; 360 361 for (i=0; i<n;) { 362 /* Now identify the consecutive vals belonging to the same row */ 363 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 364 if (j < n) ncols = j-i; 365 else ncols = n-i; 366 /* Now assemble all these values with a single function call */ 367 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 368 i = j; 369 } 370 } 371 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 372 } 373 374 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 375 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 376 377 /* determine if any processor has disassembled, if so we must 378 also disassemble ourselfs, in order that we may reassemble. */ 379 /* 380 if nonzero structure of submatrix B cannot change then we know that 381 no processor disassembled thus we can skip this stuff 382 */ 383 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 384 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 385 if (mat->was_assembled && !other_disassembled) { 386 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 387 } 388 } 389 390 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 391 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 392 } 393 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 394 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 395 396 if (aij->rowvalues) { 397 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 398 aij->rowvalues = 0; 399 } 400 401 /* used by MatAXPY() */ 402 a->xtoy = 0; b->xtoy = 0; 403 a->XtoY = 0; b->XtoY = 0; 404 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 410 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 411 { 412 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 413 PetscErrorCode ierr; 414 415 PetscFunctionBegin; 416 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 417 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 418 PetscFunctionReturn(0); 419 } 420 421 #undef __FUNCT__ 422 #define __FUNCT__ "MatZeroRows_MPIAIJ" 423 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 424 { 425 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 426 PetscErrorCode ierr; 427 int i,N,*rows,*owners = l->rowners,size = l->size; 428 int *nprocs,j,idx,nsends,row; 429 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 430 int *rvalues,tag = A->tag,count,base,slen,n,*source; 431 int *lens,imdex,*lrows,*values,rstart=l->rstart; 432 MPI_Comm comm = A->comm; 433 MPI_Request *send_waits,*recv_waits; 434 MPI_Status recv_status,*send_status; 435 IS istmp; 436 PetscTruth found; 437 438 PetscFunctionBegin; 439 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 440 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 441 442 /* first count number of contributors to each processor */ 443 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 444 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 445 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 446 for (i=0; i<N; i++) { 447 idx = rows[i]; 448 found = PETSC_FALSE; 449 for (j=0; j<size; j++) { 450 if (idx >= owners[j] && idx < owners[j+1]) { 451 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 452 } 453 } 454 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 455 } 456 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 457 458 /* inform other processors of number of messages and max length*/ 459 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 460 461 /* post receives: */ 462 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 463 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 464 for (i=0; i<nrecvs; i++) { 465 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 466 } 467 468 /* do sends: 469 1) starts[i] gives the starting index in svalues for stuff going to 470 the ith processor 471 */ 472 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 473 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 474 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 475 starts[0] = 0; 476 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 477 for (i=0; i<N; i++) { 478 svalues[starts[owner[i]]++] = rows[i]; 479 } 480 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 481 482 starts[0] = 0; 483 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 484 count = 0; 485 for (i=0; i<size; i++) { 486 if (nprocs[2*i+1]) { 487 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 488 } 489 } 490 ierr = PetscFree(starts);CHKERRQ(ierr); 491 492 base = owners[rank]; 493 494 /* wait on receives */ 495 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 496 source = lens + nrecvs; 497 count = nrecvs; slen = 0; 498 while (count) { 499 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 500 /* unpack receives into our local space */ 501 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 502 source[imdex] = recv_status.MPI_SOURCE; 503 lens[imdex] = n; 504 slen += n; 505 count--; 506 } 507 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 508 509 /* move the data into the send scatter */ 510 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 511 count = 0; 512 for (i=0; i<nrecvs; i++) { 513 values = rvalues + i*nmax; 514 for (j=0; j<lens[i]; j++) { 515 lrows[count++] = values[j] - base; 516 } 517 } 518 ierr = PetscFree(rvalues);CHKERRQ(ierr); 519 ierr = PetscFree(lens);CHKERRQ(ierr); 520 ierr = PetscFree(owner);CHKERRQ(ierr); 521 ierr = PetscFree(nprocs);CHKERRQ(ierr); 522 523 /* actually zap the local rows */ 524 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 525 PetscLogObjectParent(A,istmp); 526 527 /* 528 Zero the required rows. If the "diagonal block" of the matrix 529 is square and the user wishes to set the diagonal we use seperate 530 code so that MatSetValues() is not called for each diagonal allocating 531 new memory, thus calling lots of mallocs and slowing things down. 532 533 Contributed by: Mathew Knepley 534 */ 535 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 536 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 537 if (diag && (l->A->M == l->A->N)) { 538 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 539 } else if (diag) { 540 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 541 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 542 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 543 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 544 } 545 for (i = 0; i < slen; i++) { 546 row = lrows[i] + rstart; 547 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 548 } 549 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 550 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 551 } else { 552 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 553 } 554 ierr = ISDestroy(istmp);CHKERRQ(ierr); 555 ierr = PetscFree(lrows);CHKERRQ(ierr); 556 557 /* wait on sends */ 558 if (nsends) { 559 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 560 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 561 ierr = PetscFree(send_status);CHKERRQ(ierr); 562 } 563 ierr = PetscFree(send_waits);CHKERRQ(ierr); 564 ierr = PetscFree(svalues);CHKERRQ(ierr); 565 566 PetscFunctionReturn(0); 567 } 568 569 #undef __FUNCT__ 570 #define __FUNCT__ "MatMult_MPIAIJ" 571 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 572 { 573 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 574 PetscErrorCode ierr; 575 int nt; 576 577 PetscFunctionBegin; 578 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 579 if (nt != A->n) { 580 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 581 } 582 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 583 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 584 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 585 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatMultAdd_MPIAIJ" 591 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 592 { 593 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 594 PetscErrorCode ierr; 595 596 PetscFunctionBegin; 597 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 598 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 599 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 600 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 601 PetscFunctionReturn(0); 602 } 603 604 #undef __FUNCT__ 605 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 606 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 607 { 608 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 609 PetscErrorCode ierr; 610 611 PetscFunctionBegin; 612 /* do nondiagonal part */ 613 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 614 /* send it on its way */ 615 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 616 /* do local part */ 617 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 618 /* receive remote parts: note this assumes the values are not actually */ 619 /* inserted in yy until the next line, which is true for my implementation*/ 620 /* but is not perhaps always true. */ 621 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 622 PetscFunctionReturn(0); 623 } 624 625 EXTERN_C_BEGIN 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 628 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 629 { 630 MPI_Comm comm; 631 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 632 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 633 IS Me,Notme; 634 PetscErrorCode ierr; 635 int M,N,first,last,*notme,ntids,i; 636 637 PetscFunctionBegin; 638 639 /* Easy test: symmetric diagonal block */ 640 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 641 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 642 if (!*f) PetscFunctionReturn(0); 643 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 644 ierr = MPI_Comm_size(comm,&ntids);CHKERRQ(ierr); 645 if (ntids==1) PetscFunctionReturn(0); 646 647 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 648 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 649 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 650 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me);CHKERRQ(ierr); 651 for (i=0; i<first; i++) notme[i] = i; 652 for (i=last; i<M; i++) notme[i-last+first] = i; 653 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 654 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 655 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 656 Aoff = Aoffs[0]; 657 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 658 Boff = Boffs[0]; 659 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 660 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 661 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 662 ierr = ISDestroy(Me);CHKERRQ(ierr); 663 ierr = ISDestroy(Notme);CHKERRQ(ierr); 664 665 PetscFunctionReturn(0); 666 } 667 EXTERN_C_END 668 669 #undef __FUNCT__ 670 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 671 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 672 { 673 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 674 PetscErrorCode ierr; 675 676 PetscFunctionBegin; 677 /* do nondiagonal part */ 678 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 679 /* send it on its way */ 680 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 681 /* do local part */ 682 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 683 /* receive remote parts: note this assumes the values are not actually */ 684 /* inserted in yy until the next line, which is true for my implementation*/ 685 /* but is not perhaps always true. */ 686 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 687 PetscFunctionReturn(0); 688 } 689 690 /* 691 This only works correctly for square matrices where the subblock A->A is the 692 diagonal block 693 */ 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 696 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 697 { 698 PetscErrorCode ierr; 699 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 700 701 PetscFunctionBegin; 702 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 703 if (a->rstart != a->cstart || a->rend != a->cend) { 704 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 705 } 706 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 707 PetscFunctionReturn(0); 708 } 709 710 #undef __FUNCT__ 711 #define __FUNCT__ "MatScale_MPIAIJ" 712 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 713 { 714 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 715 PetscErrorCode ierr; 716 717 PetscFunctionBegin; 718 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 719 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 720 PetscFunctionReturn(0); 721 } 722 723 #undef __FUNCT__ 724 #define __FUNCT__ "MatDestroy_MPIAIJ" 725 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 726 { 727 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 728 PetscErrorCode ierr; 729 730 PetscFunctionBegin; 731 #if defined(PETSC_USE_LOG) 732 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 733 #endif 734 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 735 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 736 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 737 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 738 #if defined (PETSC_USE_CTABLE) 739 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 740 #else 741 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 742 #endif 743 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 744 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 745 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 746 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 747 ierr = PetscFree(aij);CHKERRQ(ierr); 748 PetscFunctionReturn(0); 749 } 750 751 #undef __FUNCT__ 752 #define __FUNCT__ "MatView_MPIAIJ_Binary" 753 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 754 { 755 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 756 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 757 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 758 PetscErrorCode ierr; 759 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 760 int fd; 761 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 762 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 763 PetscScalar *column_values; 764 765 PetscFunctionBegin; 766 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 767 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 768 nz = A->nz + B->nz; 769 if (!rank) { 770 header[0] = MAT_FILE_COOKIE; 771 header[1] = mat->M; 772 header[2] = mat->N; 773 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 774 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 775 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 776 /* get largest number of rows any processor has */ 777 rlen = mat->m; 778 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 779 for (i=1; i<size; i++) { 780 rlen = PetscMax(rlen,range[i+1] - range[i]); 781 } 782 } else { 783 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 784 rlen = mat->m; 785 } 786 787 /* load up the local row counts */ 788 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 789 for (i=0; i<mat->m; i++) { 790 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 791 } 792 793 /* store the row lengths to the file */ 794 if (!rank) { 795 MPI_Status status; 796 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 797 for (i=1; i<size; i++) { 798 rlen = range[i+1] - range[i]; 799 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 800 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 801 } 802 } else { 803 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 804 } 805 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 806 807 /* load up the local column indices */ 808 nzmax = nz; /* )th processor needs space a largest processor needs */ 809 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 810 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 811 cnt = 0; 812 for (i=0; i<mat->m; i++) { 813 for (j=B->i[i]; j<B->i[i+1]; j++) { 814 if ( (col = garray[B->j[j]]) > cstart) break; 815 column_indices[cnt++] = col; 816 } 817 for (k=A->i[i]; k<A->i[i+1]; k++) { 818 column_indices[cnt++] = A->j[k] + cstart; 819 } 820 for (; j<B->i[i+1]; j++) { 821 column_indices[cnt++] = garray[B->j[j]]; 822 } 823 } 824 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 825 826 /* store the column indices to the file */ 827 if (!rank) { 828 MPI_Status status; 829 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 830 for (i=1; i<size; i++) { 831 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 832 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 833 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 834 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 835 } 836 } else { 837 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 838 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 839 } 840 ierr = PetscFree(column_indices);CHKERRQ(ierr); 841 842 /* load up the local column values */ 843 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 844 cnt = 0; 845 for (i=0; i<mat->m; i++) { 846 for (j=B->i[i]; j<B->i[i+1]; j++) { 847 if ( garray[B->j[j]] > cstart) break; 848 column_values[cnt++] = B->a[j]; 849 } 850 for (k=A->i[i]; k<A->i[i+1]; k++) { 851 column_values[cnt++] = A->a[k]; 852 } 853 for (; j<B->i[i+1]; j++) { 854 column_values[cnt++] = B->a[j]; 855 } 856 } 857 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 858 859 /* store the column values to the file */ 860 if (!rank) { 861 MPI_Status status; 862 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 863 for (i=1; i<size; i++) { 864 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 865 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 866 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 867 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 868 } 869 } else { 870 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 871 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 872 } 873 ierr = PetscFree(column_values);CHKERRQ(ierr); 874 PetscFunctionReturn(0); 875 } 876 877 #undef __FUNCT__ 878 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 879 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 880 { 881 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 882 PetscErrorCode ierr; 883 PetscMPIInt rank = aij->rank,size = aij->size; 884 PetscTruth isdraw,iascii,flg,isbinary; 885 PetscViewer sviewer; 886 PetscViewerFormat format; 887 888 PetscFunctionBegin; 889 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 890 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 891 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 892 if (iascii) { 893 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 894 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 895 MatInfo info; 896 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 897 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 898 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 899 if (flg) { 900 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 901 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 902 } else { 903 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 904 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 905 } 906 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 907 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 908 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 909 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 910 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 911 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 912 PetscFunctionReturn(0); 913 } else if (format == PETSC_VIEWER_ASCII_INFO) { 914 PetscFunctionReturn(0); 915 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 916 PetscFunctionReturn(0); 917 } 918 } else if (isbinary) { 919 if (size == 1) { 920 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 921 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 922 } else { 923 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 924 } 925 PetscFunctionReturn(0); 926 } else if (isdraw) { 927 PetscDraw draw; 928 PetscTruth isnull; 929 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 930 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 931 } 932 933 if (size == 1) { 934 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 935 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 936 } else { 937 /* assemble the entire matrix onto first processor. */ 938 Mat A; 939 Mat_SeqAIJ *Aloc; 940 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 941 PetscScalar *a; 942 943 if (!rank) { 944 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 945 } else { 946 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 947 } 948 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 949 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 950 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 951 PetscLogObjectParent(mat,A); 952 953 /* copy over the A part */ 954 Aloc = (Mat_SeqAIJ*)aij->A->data; 955 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 956 row = aij->rstart; 957 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 958 for (i=0; i<m; i++) { 959 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 960 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 961 } 962 aj = Aloc->j; 963 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 964 965 /* copy over the B part */ 966 Aloc = (Mat_SeqAIJ*)aij->B->data; 967 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 968 row = aij->rstart; 969 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 970 ct = cols; 971 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 972 for (i=0; i<m; i++) { 973 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 974 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 975 } 976 ierr = PetscFree(ct);CHKERRQ(ierr); 977 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 978 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 979 /* 980 Everyone has to call to draw the matrix since the graphics waits are 981 synchronized across all processors that share the PetscDraw object 982 */ 983 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 984 if (!rank) { 985 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 986 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 987 } 988 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 989 ierr = MatDestroy(A);CHKERRQ(ierr); 990 } 991 PetscFunctionReturn(0); 992 } 993 994 #undef __FUNCT__ 995 #define __FUNCT__ "MatView_MPIAIJ" 996 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 997 { 998 PetscErrorCode ierr; 999 PetscTruth iascii,isdraw,issocket,isbinary; 1000 1001 PetscFunctionBegin; 1002 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1003 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1004 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1005 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1006 if (iascii || isdraw || isbinary || issocket) { 1007 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1008 } else { 1009 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1010 } 1011 PetscFunctionReturn(0); 1012 } 1013 1014 1015 1016 #undef __FUNCT__ 1017 #define __FUNCT__ "MatRelax_MPIAIJ" 1018 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1019 { 1020 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1021 PetscErrorCode ierr; 1022 Vec bb1; 1023 PetscScalar mone=-1.0; 1024 1025 PetscFunctionBegin; 1026 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1027 1028 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1029 1030 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1031 if (flag & SOR_ZERO_INITIAL_GUESS) { 1032 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1033 its--; 1034 } 1035 1036 while (its--) { 1037 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1038 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1039 1040 /* update rhs: bb1 = bb - B*x */ 1041 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1042 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1043 1044 /* local sweep */ 1045 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1046 CHKERRQ(ierr); 1047 } 1048 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1049 if (flag & SOR_ZERO_INITIAL_GUESS) { 1050 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1051 its--; 1052 } 1053 while (its--) { 1054 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1055 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1056 1057 /* update rhs: bb1 = bb - B*x */ 1058 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1059 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1060 1061 /* local sweep */ 1062 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1063 CHKERRQ(ierr); 1064 } 1065 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1066 if (flag & SOR_ZERO_INITIAL_GUESS) { 1067 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1068 its--; 1069 } 1070 while (its--) { 1071 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1072 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1073 1074 /* update rhs: bb1 = bb - B*x */ 1075 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1076 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1077 1078 /* local sweep */ 1079 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1080 CHKERRQ(ierr); 1081 } 1082 } else { 1083 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1084 } 1085 1086 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1087 PetscFunctionReturn(0); 1088 } 1089 1090 #undef __FUNCT__ 1091 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1092 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1093 { 1094 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1095 Mat A = mat->A,B = mat->B; 1096 PetscErrorCode ierr; 1097 PetscReal isend[5],irecv[5]; 1098 1099 PetscFunctionBegin; 1100 info->block_size = 1.0; 1101 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1102 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1103 isend[3] = info->memory; isend[4] = info->mallocs; 1104 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1105 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1106 isend[3] += info->memory; isend[4] += info->mallocs; 1107 if (flag == MAT_LOCAL) { 1108 info->nz_used = isend[0]; 1109 info->nz_allocated = isend[1]; 1110 info->nz_unneeded = isend[2]; 1111 info->memory = isend[3]; 1112 info->mallocs = isend[4]; 1113 } else if (flag == MAT_GLOBAL_MAX) { 1114 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1115 info->nz_used = irecv[0]; 1116 info->nz_allocated = irecv[1]; 1117 info->nz_unneeded = irecv[2]; 1118 info->memory = irecv[3]; 1119 info->mallocs = irecv[4]; 1120 } else if (flag == MAT_GLOBAL_SUM) { 1121 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1122 info->nz_used = irecv[0]; 1123 info->nz_allocated = irecv[1]; 1124 info->nz_unneeded = irecv[2]; 1125 info->memory = irecv[3]; 1126 info->mallocs = irecv[4]; 1127 } 1128 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1129 info->fill_ratio_needed = 0; 1130 info->factor_mallocs = 0; 1131 info->rows_global = (double)matin->M; 1132 info->columns_global = (double)matin->N; 1133 info->rows_local = (double)matin->m; 1134 info->columns_local = (double)matin->N; 1135 1136 PetscFunctionReturn(0); 1137 } 1138 1139 #undef __FUNCT__ 1140 #define __FUNCT__ "MatSetOption_MPIAIJ" 1141 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1142 { 1143 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1144 PetscErrorCode ierr; 1145 1146 PetscFunctionBegin; 1147 switch (op) { 1148 case MAT_NO_NEW_NONZERO_LOCATIONS: 1149 case MAT_YES_NEW_NONZERO_LOCATIONS: 1150 case MAT_COLUMNS_UNSORTED: 1151 case MAT_COLUMNS_SORTED: 1152 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1153 case MAT_KEEP_ZEROED_ROWS: 1154 case MAT_NEW_NONZERO_LOCATION_ERR: 1155 case MAT_USE_INODES: 1156 case MAT_DO_NOT_USE_INODES: 1157 case MAT_IGNORE_ZERO_ENTRIES: 1158 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1159 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1160 break; 1161 case MAT_ROW_ORIENTED: 1162 a->roworiented = PETSC_TRUE; 1163 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1164 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1165 break; 1166 case MAT_ROWS_SORTED: 1167 case MAT_ROWS_UNSORTED: 1168 case MAT_YES_NEW_DIAGONALS: 1169 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1170 break; 1171 case MAT_COLUMN_ORIENTED: 1172 a->roworiented = PETSC_FALSE; 1173 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1174 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1175 break; 1176 case MAT_IGNORE_OFF_PROC_ENTRIES: 1177 a->donotstash = PETSC_TRUE; 1178 break; 1179 case MAT_NO_NEW_DIAGONALS: 1180 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1181 case MAT_SYMMETRIC: 1182 case MAT_STRUCTURALLY_SYMMETRIC: 1183 case MAT_HERMITIAN: 1184 case MAT_SYMMETRY_ETERNAL: 1185 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1186 break; 1187 case MAT_NOT_SYMMETRIC: 1188 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1189 case MAT_NOT_HERMITIAN: 1190 case MAT_NOT_SYMMETRY_ETERNAL: 1191 break; 1192 default: 1193 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1194 } 1195 PetscFunctionReturn(0); 1196 } 1197 1198 #undef __FUNCT__ 1199 #define __FUNCT__ "MatGetRow_MPIAIJ" 1200 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1201 { 1202 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1203 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1204 PetscErrorCode ierr; 1205 int i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1206 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1207 int *cmap,*idx_p; 1208 1209 PetscFunctionBegin; 1210 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1211 mat->getrowactive = PETSC_TRUE; 1212 1213 if (!mat->rowvalues && (idx || v)) { 1214 /* 1215 allocate enough space to hold information from the longest row. 1216 */ 1217 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1218 int max = 1,tmp; 1219 for (i=0; i<matin->m; i++) { 1220 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1221 if (max < tmp) { max = tmp; } 1222 } 1223 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1224 mat->rowindices = (int*)(mat->rowvalues + max); 1225 } 1226 1227 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1228 lrow = row - rstart; 1229 1230 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1231 if (!v) {pvA = 0; pvB = 0;} 1232 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1233 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1234 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1235 nztot = nzA + nzB; 1236 1237 cmap = mat->garray; 1238 if (v || idx) { 1239 if (nztot) { 1240 /* Sort by increasing column numbers, assuming A and B already sorted */ 1241 int imark = -1; 1242 if (v) { 1243 *v = v_p = mat->rowvalues; 1244 for (i=0; i<nzB; i++) { 1245 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1246 else break; 1247 } 1248 imark = i; 1249 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1250 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1251 } 1252 if (idx) { 1253 *idx = idx_p = mat->rowindices; 1254 if (imark > -1) { 1255 for (i=0; i<imark; i++) { 1256 idx_p[i] = cmap[cworkB[i]]; 1257 } 1258 } else { 1259 for (i=0; i<nzB; i++) { 1260 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1261 else break; 1262 } 1263 imark = i; 1264 } 1265 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1266 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1267 } 1268 } else { 1269 if (idx) *idx = 0; 1270 if (v) *v = 0; 1271 } 1272 } 1273 *nz = nztot; 1274 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1275 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1276 PetscFunctionReturn(0); 1277 } 1278 1279 #undef __FUNCT__ 1280 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1281 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1282 { 1283 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1284 1285 PetscFunctionBegin; 1286 if (aij->getrowactive == PETSC_FALSE) { 1287 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1288 } 1289 aij->getrowactive = PETSC_FALSE; 1290 PetscFunctionReturn(0); 1291 } 1292 1293 #undef __FUNCT__ 1294 #define __FUNCT__ "MatNorm_MPIAIJ" 1295 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1296 { 1297 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1298 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1299 PetscErrorCode ierr; 1300 int i,j,cstart = aij->cstart; 1301 PetscReal sum = 0.0; 1302 PetscScalar *v; 1303 1304 PetscFunctionBegin; 1305 if (aij->size == 1) { 1306 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1307 } else { 1308 if (type == NORM_FROBENIUS) { 1309 v = amat->a; 1310 for (i=0; i<amat->nz; i++) { 1311 #if defined(PETSC_USE_COMPLEX) 1312 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1313 #else 1314 sum += (*v)*(*v); v++; 1315 #endif 1316 } 1317 v = bmat->a; 1318 for (i=0; i<bmat->nz; i++) { 1319 #if defined(PETSC_USE_COMPLEX) 1320 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1321 #else 1322 sum += (*v)*(*v); v++; 1323 #endif 1324 } 1325 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1326 *norm = sqrt(*norm); 1327 } else if (type == NORM_1) { /* max column norm */ 1328 PetscReal *tmp,*tmp2; 1329 int *jj,*garray = aij->garray; 1330 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1331 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1332 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1333 *norm = 0.0; 1334 v = amat->a; jj = amat->j; 1335 for (j=0; j<amat->nz; j++) { 1336 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1337 } 1338 v = bmat->a; jj = bmat->j; 1339 for (j=0; j<bmat->nz; j++) { 1340 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1341 } 1342 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1343 for (j=0; j<mat->N; j++) { 1344 if (tmp2[j] > *norm) *norm = tmp2[j]; 1345 } 1346 ierr = PetscFree(tmp);CHKERRQ(ierr); 1347 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1348 } else if (type == NORM_INFINITY) { /* max row norm */ 1349 PetscReal ntemp = 0.0; 1350 for (j=0; j<aij->A->m; j++) { 1351 v = amat->a + amat->i[j]; 1352 sum = 0.0; 1353 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1354 sum += PetscAbsScalar(*v); v++; 1355 } 1356 v = bmat->a + bmat->i[j]; 1357 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1358 sum += PetscAbsScalar(*v); v++; 1359 } 1360 if (sum > ntemp) ntemp = sum; 1361 } 1362 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1363 } else { 1364 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1365 } 1366 } 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #undef __FUNCT__ 1371 #define __FUNCT__ "MatTranspose_MPIAIJ" 1372 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1373 { 1374 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1375 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1376 PetscErrorCode ierr; 1377 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1378 Mat B; 1379 PetscScalar *array; 1380 1381 PetscFunctionBegin; 1382 if (!matout && M != N) { 1383 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1384 } 1385 1386 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1387 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1388 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1389 1390 /* copy over the A part */ 1391 Aloc = (Mat_SeqAIJ*)a->A->data; 1392 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1393 row = a->rstart; 1394 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1395 for (i=0; i<m; i++) { 1396 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1397 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1398 } 1399 aj = Aloc->j; 1400 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1401 1402 /* copy over the B part */ 1403 Aloc = (Mat_SeqAIJ*)a->B->data; 1404 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1405 row = a->rstart; 1406 ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr); 1407 ct = cols; 1408 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1409 for (i=0; i<m; i++) { 1410 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1411 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1412 } 1413 ierr = PetscFree(ct);CHKERRQ(ierr); 1414 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1415 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1416 if (matout) { 1417 *matout = B; 1418 } else { 1419 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1420 } 1421 PetscFunctionReturn(0); 1422 } 1423 1424 #undef __FUNCT__ 1425 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1426 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1427 { 1428 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1429 Mat a = aij->A,b = aij->B; 1430 PetscErrorCode ierr; 1431 int s1,s2,s3; 1432 1433 PetscFunctionBegin; 1434 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1435 if (rr) { 1436 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1437 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1438 /* Overlap communication with computation. */ 1439 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1440 } 1441 if (ll) { 1442 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1443 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1444 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1445 } 1446 /* scale the diagonal block */ 1447 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1448 1449 if (rr) { 1450 /* Do a scatter end and then right scale the off-diagonal block */ 1451 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1452 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1453 } 1454 1455 PetscFunctionReturn(0); 1456 } 1457 1458 1459 #undef __FUNCT__ 1460 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1461 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1462 { 1463 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1464 PetscErrorCode ierr; 1465 1466 PetscFunctionBegin; 1467 if (!a->rank) { 1468 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1469 } 1470 PetscFunctionReturn(0); 1471 } 1472 1473 #undef __FUNCT__ 1474 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1475 PetscErrorCode MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1476 { 1477 PetscFunctionBegin; 1478 *bs = 1; 1479 PetscFunctionReturn(0); 1480 } 1481 #undef __FUNCT__ 1482 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1483 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1484 { 1485 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1486 PetscErrorCode ierr; 1487 1488 PetscFunctionBegin; 1489 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1490 PetscFunctionReturn(0); 1491 } 1492 1493 #undef __FUNCT__ 1494 #define __FUNCT__ "MatEqual_MPIAIJ" 1495 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1496 { 1497 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1498 Mat a,b,c,d; 1499 PetscTruth flg; 1500 PetscErrorCode ierr; 1501 1502 PetscFunctionBegin; 1503 a = matA->A; b = matA->B; 1504 c = matB->A; d = matB->B; 1505 1506 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1507 if (flg == PETSC_TRUE) { 1508 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1509 } 1510 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1511 PetscFunctionReturn(0); 1512 } 1513 1514 #undef __FUNCT__ 1515 #define __FUNCT__ "MatCopy_MPIAIJ" 1516 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1517 { 1518 PetscErrorCode ierr; 1519 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1520 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1521 1522 PetscFunctionBegin; 1523 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1524 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1525 /* because of the column compression in the off-processor part of the matrix a->B, 1526 the number of columns in a->B and b->B may be different, hence we cannot call 1527 the MatCopy() directly on the two parts. If need be, we can provide a more 1528 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1529 then copying the submatrices */ 1530 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1531 } else { 1532 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1533 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1534 } 1535 PetscFunctionReturn(0); 1536 } 1537 1538 #undef __FUNCT__ 1539 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1540 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1541 { 1542 PetscErrorCode ierr; 1543 1544 PetscFunctionBegin; 1545 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1546 PetscFunctionReturn(0); 1547 } 1548 1549 #include "petscblaslapack.h" 1550 #undef __FUNCT__ 1551 #define __FUNCT__ "MatAXPY_MPIAIJ" 1552 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1553 { 1554 PetscErrorCode ierr; 1555 int i; 1556 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1557 PetscBLASInt bnz,one=1; 1558 Mat_SeqAIJ *x,*y; 1559 1560 PetscFunctionBegin; 1561 if (str == SAME_NONZERO_PATTERN) { 1562 x = (Mat_SeqAIJ *)xx->A->data; 1563 y = (Mat_SeqAIJ *)yy->A->data; 1564 bnz = (PetscBLASInt)x->nz; 1565 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1566 x = (Mat_SeqAIJ *)xx->B->data; 1567 y = (Mat_SeqAIJ *)yy->B->data; 1568 bnz = (PetscBLASInt)x->nz; 1569 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1570 } else if (str == SUBSET_NONZERO_PATTERN) { 1571 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1572 1573 x = (Mat_SeqAIJ *)xx->B->data; 1574 y = (Mat_SeqAIJ *)yy->B->data; 1575 if (y->xtoy && y->XtoY != xx->B) { 1576 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1577 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1578 } 1579 if (!y->xtoy) { /* get xtoy */ 1580 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1581 y->XtoY = xx->B; 1582 } 1583 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1584 } else { 1585 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1586 } 1587 PetscFunctionReturn(0); 1588 } 1589 1590 /* -------------------------------------------------------------------*/ 1591 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1592 MatGetRow_MPIAIJ, 1593 MatRestoreRow_MPIAIJ, 1594 MatMult_MPIAIJ, 1595 /* 4*/ MatMultAdd_MPIAIJ, 1596 MatMultTranspose_MPIAIJ, 1597 MatMultTransposeAdd_MPIAIJ, 1598 0, 1599 0, 1600 0, 1601 /*10*/ 0, 1602 0, 1603 0, 1604 MatRelax_MPIAIJ, 1605 MatTranspose_MPIAIJ, 1606 /*15*/ MatGetInfo_MPIAIJ, 1607 MatEqual_MPIAIJ, 1608 MatGetDiagonal_MPIAIJ, 1609 MatDiagonalScale_MPIAIJ, 1610 MatNorm_MPIAIJ, 1611 /*20*/ MatAssemblyBegin_MPIAIJ, 1612 MatAssemblyEnd_MPIAIJ, 1613 0, 1614 MatSetOption_MPIAIJ, 1615 MatZeroEntries_MPIAIJ, 1616 /*25*/ MatZeroRows_MPIAIJ, 1617 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1618 MatLUFactorSymbolic_MPIAIJ_TFS, 1619 #else 1620 0, 1621 #endif 1622 0, 1623 0, 1624 0, 1625 /*30*/ MatSetUpPreallocation_MPIAIJ, 1626 0, 1627 0, 1628 0, 1629 0, 1630 /*35*/ MatDuplicate_MPIAIJ, 1631 0, 1632 0, 1633 0, 1634 0, 1635 /*40*/ MatAXPY_MPIAIJ, 1636 MatGetSubMatrices_MPIAIJ, 1637 MatIncreaseOverlap_MPIAIJ, 1638 MatGetValues_MPIAIJ, 1639 MatCopy_MPIAIJ, 1640 /*45*/ MatPrintHelp_MPIAIJ, 1641 MatScale_MPIAIJ, 1642 0, 1643 0, 1644 0, 1645 /*50*/ MatGetBlockSize_MPIAIJ, 1646 0, 1647 0, 1648 0, 1649 0, 1650 /*55*/ MatFDColoringCreate_MPIAIJ, 1651 0, 1652 MatSetUnfactored_MPIAIJ, 1653 0, 1654 0, 1655 /*60*/ MatGetSubMatrix_MPIAIJ, 1656 MatDestroy_MPIAIJ, 1657 MatView_MPIAIJ, 1658 MatGetPetscMaps_Petsc, 1659 0, 1660 /*65*/ 0, 1661 0, 1662 0, 1663 0, 1664 0, 1665 /*70*/ 0, 1666 0, 1667 MatSetColoring_MPIAIJ, 1668 MatSetValuesAdic_MPIAIJ, 1669 MatSetValuesAdifor_MPIAIJ, 1670 /*75*/ 0, 1671 0, 1672 0, 1673 0, 1674 0, 1675 /*80*/ 0, 1676 0, 1677 0, 1678 0, 1679 /*84*/ MatLoad_MPIAIJ, 1680 0, 1681 0, 1682 0, 1683 0, 1684 /*89*/ 0, 1685 MatMatMult_MPIAIJ_MPIAIJ, 1686 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1687 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1688 MatPtAP_MPIAIJ_MPIAIJ, 1689 /*94*/ MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1690 MatPtAPNumeric_MPIAIJ_MPIAIJ, 1691 }; 1692 1693 /* ----------------------------------------------------------------------------------------*/ 1694 1695 EXTERN_C_BEGIN 1696 #undef __FUNCT__ 1697 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1698 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 1699 { 1700 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1701 PetscErrorCode ierr; 1702 1703 PetscFunctionBegin; 1704 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1705 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1706 PetscFunctionReturn(0); 1707 } 1708 EXTERN_C_END 1709 1710 EXTERN_C_BEGIN 1711 #undef __FUNCT__ 1712 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1713 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 1714 { 1715 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1716 PetscErrorCode ierr; 1717 1718 PetscFunctionBegin; 1719 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1720 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1721 PetscFunctionReturn(0); 1722 } 1723 EXTERN_C_END 1724 1725 #include "petscpc.h" 1726 EXTERN_C_BEGIN 1727 #undef __FUNCT__ 1728 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1729 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1730 { 1731 Mat_MPIAIJ *b; 1732 PetscErrorCode ierr; 1733 int i; 1734 1735 PetscFunctionBegin; 1736 B->preallocated = PETSC_TRUE; 1737 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1738 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1739 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1740 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1741 if (d_nnz) { 1742 for (i=0; i<B->m; i++) { 1743 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1744 } 1745 } 1746 if (o_nnz) { 1747 for (i=0; i<B->m; i++) { 1748 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1749 } 1750 } 1751 b = (Mat_MPIAIJ*)B->data; 1752 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1753 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1754 1755 PetscFunctionReturn(0); 1756 } 1757 EXTERN_C_END 1758 1759 /*MC 1760 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 1761 1762 Options Database Keys: 1763 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 1764 1765 Level: beginner 1766 1767 .seealso: MatCreateMPIAIJ 1768 M*/ 1769 1770 EXTERN_C_BEGIN 1771 #undef __FUNCT__ 1772 #define __FUNCT__ "MatCreate_MPIAIJ" 1773 PetscErrorCode MatCreate_MPIAIJ(Mat B) 1774 { 1775 Mat_MPIAIJ *b; 1776 PetscErrorCode ierr; 1777 int i,size; 1778 1779 PetscFunctionBegin; 1780 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1781 1782 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1783 B->data = (void*)b; 1784 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1785 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1786 B->factor = 0; 1787 B->assembled = PETSC_FALSE; 1788 B->mapping = 0; 1789 1790 B->insertmode = NOT_SET_VALUES; 1791 b->size = size; 1792 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1793 1794 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1795 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1796 1797 /* the information in the maps duplicates the information computed below, eventually 1798 we should remove the duplicate information that is not contained in the maps */ 1799 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1800 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1801 1802 /* build local table of row and column ownerships */ 1803 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1804 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1805 b->cowners = b->rowners + b->size + 2; 1806 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1807 b->rowners[0] = 0; 1808 for (i=2; i<=b->size; i++) { 1809 b->rowners[i] += b->rowners[i-1]; 1810 } 1811 b->rstart = b->rowners[b->rank]; 1812 b->rend = b->rowners[b->rank+1]; 1813 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1814 b->cowners[0] = 0; 1815 for (i=2; i<=b->size; i++) { 1816 b->cowners[i] += b->cowners[i-1]; 1817 } 1818 b->cstart = b->cowners[b->rank]; 1819 b->cend = b->cowners[b->rank+1]; 1820 1821 /* build cache for off array entries formed */ 1822 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1823 b->donotstash = PETSC_FALSE; 1824 b->colmap = 0; 1825 b->garray = 0; 1826 b->roworiented = PETSC_TRUE; 1827 1828 /* stuff used for matrix vector multiply */ 1829 b->lvec = PETSC_NULL; 1830 b->Mvctx = PETSC_NULL; 1831 1832 /* stuff for MatGetRow() */ 1833 b->rowindices = 0; 1834 b->rowvalues = 0; 1835 b->getrowactive = PETSC_FALSE; 1836 1837 /* Explicitly create 2 MATSEQAIJ matrices. */ 1838 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 1839 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 1840 PetscLogObjectParent(B,b->A); 1841 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 1842 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 1843 PetscLogObjectParent(B,b->B); 1844 1845 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1846 "MatStoreValues_MPIAIJ", 1847 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1848 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1849 "MatRetrieveValues_MPIAIJ", 1850 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1851 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1852 "MatGetDiagonalBlock_MPIAIJ", 1853 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1854 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 1855 "MatIsTranspose_MPIAIJ", 1856 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 1857 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 1858 "MatMPIAIJSetPreallocation_MPIAIJ", 1859 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 1860 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 1861 "MatDiagonalScaleLocal_MPIAIJ", 1862 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 1863 PetscFunctionReturn(0); 1864 } 1865 EXTERN_C_END 1866 1867 /*MC 1868 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1869 1870 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1871 and MATMPIAIJ otherwise. As a result, for single process communicators, 1872 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 1873 for communicators controlling multiple processes. It is recommended that you call both of 1874 the above preallocation routines for simplicity. 1875 1876 Options Database Keys: 1877 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 1878 1879 Level: beginner 1880 1881 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ 1882 M*/ 1883 1884 EXTERN_C_BEGIN 1885 #undef __FUNCT__ 1886 #define __FUNCT__ "MatCreate_AIJ" 1887 PetscErrorCode MatCreate_AIJ(Mat A) 1888 { 1889 PetscErrorCode ierr; 1890 int size; 1891 1892 PetscFunctionBegin; 1893 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr); 1894 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1895 if (size == 1) { 1896 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1897 } else { 1898 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1899 } 1900 PetscFunctionReturn(0); 1901 } 1902 EXTERN_C_END 1903 1904 #undef __FUNCT__ 1905 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1906 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1907 { 1908 Mat mat; 1909 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1910 PetscErrorCode ierr; 1911 1912 PetscFunctionBegin; 1913 *newmat = 0; 1914 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1915 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1916 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1917 a = (Mat_MPIAIJ*)mat->data; 1918 1919 mat->factor = matin->factor; 1920 mat->assembled = PETSC_TRUE; 1921 mat->insertmode = NOT_SET_VALUES; 1922 mat->preallocated = PETSC_TRUE; 1923 1924 a->rstart = oldmat->rstart; 1925 a->rend = oldmat->rend; 1926 a->cstart = oldmat->cstart; 1927 a->cend = oldmat->cend; 1928 a->size = oldmat->size; 1929 a->rank = oldmat->rank; 1930 a->donotstash = oldmat->donotstash; 1931 a->roworiented = oldmat->roworiented; 1932 a->rowindices = 0; 1933 a->rowvalues = 0; 1934 a->getrowactive = PETSC_FALSE; 1935 1936 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1937 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1938 if (oldmat->colmap) { 1939 #if defined (PETSC_USE_CTABLE) 1940 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1941 #else 1942 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1943 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1944 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1945 #endif 1946 } else a->colmap = 0; 1947 if (oldmat->garray) { 1948 int len; 1949 len = oldmat->B->n; 1950 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1951 PetscLogObjectMemory(mat,len*sizeof(int)); 1952 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1953 } else a->garray = 0; 1954 1955 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1956 PetscLogObjectParent(mat,a->lvec); 1957 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1958 PetscLogObjectParent(mat,a->Mvctx); 1959 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1960 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1961 PetscLogObjectParent(mat,a->A); 1962 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1963 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1964 PetscLogObjectParent(mat,a->B); 1965 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1966 *newmat = mat; 1967 PetscFunctionReturn(0); 1968 } 1969 1970 #include "petscsys.h" 1971 1972 #undef __FUNCT__ 1973 #define __FUNCT__ "MatLoad_MPIAIJ" 1974 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1975 { 1976 Mat A; 1977 PetscScalar *vals,*svals; 1978 MPI_Comm comm = ((PetscObject)viewer)->comm; 1979 MPI_Status status; 1980 PetscErrorCode ierr; 1981 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1982 int i,nz,j,rstart,rend,fd; 1983 int header[4],*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1984 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1985 int cend,cstart,n; 1986 1987 PetscFunctionBegin; 1988 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1989 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1990 if (!rank) { 1991 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1992 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1993 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1994 if (header[3] < 0) { 1995 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1996 } 1997 } 1998 1999 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2000 M = header[1]; N = header[2]; 2001 /* determine ownership of all rows */ 2002 m = M/size + ((M % size) > rank); 2003 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2004 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2005 rowners[0] = 0; 2006 for (i=2; i<=size; i++) { 2007 rowners[i] += rowners[i-1]; 2008 } 2009 rstart = rowners[rank]; 2010 rend = rowners[rank+1]; 2011 2012 /* distribute row lengths to all processors */ 2013 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 2014 offlens = ourlens + (rend-rstart); 2015 if (!rank) { 2016 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 2017 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2018 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2019 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 2020 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2021 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2022 } else { 2023 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2024 } 2025 2026 if (!rank) { 2027 /* calculate the number of nonzeros on each processor */ 2028 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2029 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2030 for (i=0; i<size; i++) { 2031 for (j=rowners[i]; j< rowners[i+1]; j++) { 2032 procsnz[i] += rowlengths[j]; 2033 } 2034 } 2035 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2036 2037 /* determine max buffer needed and allocate it */ 2038 maxnz = 0; 2039 for (i=0; i<size; i++) { 2040 maxnz = PetscMax(maxnz,procsnz[i]); 2041 } 2042 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2043 2044 /* read in my part of the matrix column indices */ 2045 nz = procsnz[0]; 2046 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 2047 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2048 2049 /* read in every one elses and ship off */ 2050 for (i=1; i<size; i++) { 2051 nz = procsnz[i]; 2052 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2053 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2054 } 2055 ierr = PetscFree(cols);CHKERRQ(ierr); 2056 } else { 2057 /* determine buffer space needed for message */ 2058 nz = 0; 2059 for (i=0; i<m; i++) { 2060 nz += ourlens[i]; 2061 } 2062 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 2063 2064 /* receive message of column indices*/ 2065 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2066 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2067 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2068 } 2069 2070 /* determine column ownership if matrix is not square */ 2071 if (N != M) { 2072 n = N/size + ((N % size) > rank); 2073 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2074 cstart = cend - n; 2075 } else { 2076 cstart = rstart; 2077 cend = rend; 2078 n = cend - cstart; 2079 } 2080 2081 /* loop over local rows, determining number of off diagonal entries */ 2082 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2083 jj = 0; 2084 for (i=0; i<m; i++) { 2085 for (j=0; j<ourlens[i]; j++) { 2086 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2087 jj++; 2088 } 2089 } 2090 2091 /* create our matrix */ 2092 for (i=0; i<m; i++) { 2093 ourlens[i] -= offlens[i]; 2094 } 2095 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2096 ierr = MatSetType(A,type);CHKERRQ(ierr); 2097 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2098 2099 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2100 for (i=0; i<m; i++) { 2101 ourlens[i] += offlens[i]; 2102 } 2103 2104 if (!rank) { 2105 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2106 2107 /* read in my part of the matrix numerical values */ 2108 nz = procsnz[0]; 2109 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2110 2111 /* insert into matrix */ 2112 jj = rstart; 2113 smycols = mycols; 2114 svals = vals; 2115 for (i=0; i<m; i++) { 2116 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2117 smycols += ourlens[i]; 2118 svals += ourlens[i]; 2119 jj++; 2120 } 2121 2122 /* read in other processors and ship out */ 2123 for (i=1; i<size; i++) { 2124 nz = procsnz[i]; 2125 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2126 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2127 } 2128 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2129 } else { 2130 /* receive numeric values */ 2131 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2132 2133 /* receive message of values*/ 2134 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2135 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2136 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2137 2138 /* insert into matrix */ 2139 jj = rstart; 2140 smycols = mycols; 2141 svals = vals; 2142 for (i=0; i<m; i++) { 2143 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2144 smycols += ourlens[i]; 2145 svals += ourlens[i]; 2146 jj++; 2147 } 2148 } 2149 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2150 ierr = PetscFree(vals);CHKERRQ(ierr); 2151 ierr = PetscFree(mycols);CHKERRQ(ierr); 2152 ierr = PetscFree(rowners);CHKERRQ(ierr); 2153 2154 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2155 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2156 *newmat = A; 2157 PetscFunctionReturn(0); 2158 } 2159 2160 #undef __FUNCT__ 2161 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2162 /* 2163 Not great since it makes two copies of the submatrix, first an SeqAIJ 2164 in local and then by concatenating the local matrices the end result. 2165 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2166 */ 2167 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2168 { 2169 PetscErrorCode ierr; 2170 PetscMPIInt rank,size; 2171 int i,m,n,rstart,row,rend,nz,*cwork,j; 2172 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2173 Mat *local,M,Mreuse; 2174 PetscScalar *vwork,*aa; 2175 MPI_Comm comm = mat->comm; 2176 Mat_SeqAIJ *aij; 2177 2178 2179 PetscFunctionBegin; 2180 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2181 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2182 2183 if (call == MAT_REUSE_MATRIX) { 2184 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2185 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2186 local = &Mreuse; 2187 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2188 } else { 2189 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2190 Mreuse = *local; 2191 ierr = PetscFree(local);CHKERRQ(ierr); 2192 } 2193 2194 /* 2195 m - number of local rows 2196 n - number of columns (same on all processors) 2197 rstart - first row in new global matrix generated 2198 */ 2199 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2200 if (call == MAT_INITIAL_MATRIX) { 2201 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2202 ii = aij->i; 2203 jj = aij->j; 2204 2205 /* 2206 Determine the number of non-zeros in the diagonal and off-diagonal 2207 portions of the matrix in order to do correct preallocation 2208 */ 2209 2210 /* first get start and end of "diagonal" columns */ 2211 if (csize == PETSC_DECIDE) { 2212 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2213 if (mglobal == n) { /* square matrix */ 2214 nlocal = m; 2215 } else { 2216 nlocal = n/size + ((n % size) > rank); 2217 } 2218 } else { 2219 nlocal = csize; 2220 } 2221 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2222 rstart = rend - nlocal; 2223 if (rank == size - 1 && rend != n) { 2224 SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n); 2225 } 2226 2227 /* next, compute all the lengths */ 2228 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2229 olens = dlens + m; 2230 for (i=0; i<m; i++) { 2231 jend = ii[i+1] - ii[i]; 2232 olen = 0; 2233 dlen = 0; 2234 for (j=0; j<jend; j++) { 2235 if (*jj < rstart || *jj >= rend) olen++; 2236 else dlen++; 2237 jj++; 2238 } 2239 olens[i] = olen; 2240 dlens[i] = dlen; 2241 } 2242 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2243 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2244 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2245 ierr = PetscFree(dlens);CHKERRQ(ierr); 2246 } else { 2247 int ml,nl; 2248 2249 M = *newmat; 2250 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2251 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2252 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2253 /* 2254 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2255 rather than the slower MatSetValues(). 2256 */ 2257 M->was_assembled = PETSC_TRUE; 2258 M->assembled = PETSC_FALSE; 2259 } 2260 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2261 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2262 ii = aij->i; 2263 jj = aij->j; 2264 aa = aij->a; 2265 for (i=0; i<m; i++) { 2266 row = rstart + i; 2267 nz = ii[i+1] - ii[i]; 2268 cwork = jj; jj += nz; 2269 vwork = aa; aa += nz; 2270 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2271 } 2272 2273 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2274 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2275 *newmat = M; 2276 2277 /* save submatrix used in processor for next request */ 2278 if (call == MAT_INITIAL_MATRIX) { 2279 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2280 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2281 } 2282 2283 PetscFunctionReturn(0); 2284 } 2285 2286 #undef __FUNCT__ 2287 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2288 /*@C 2289 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2290 (the default parallel PETSc format). For good matrix assembly performance 2291 the user should preallocate the matrix storage by setting the parameters 2292 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2293 performance can be increased by more than a factor of 50. 2294 2295 Collective on MPI_Comm 2296 2297 Input Parameters: 2298 + A - the matrix 2299 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2300 (same value is used for all local rows) 2301 . d_nnz - array containing the number of nonzeros in the various rows of the 2302 DIAGONAL portion of the local submatrix (possibly different for each row) 2303 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2304 The size of this array is equal to the number of local rows, i.e 'm'. 2305 You must leave room for the diagonal entry even if it is zero. 2306 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2307 submatrix (same value is used for all local rows). 2308 - o_nnz - array containing the number of nonzeros in the various rows of the 2309 OFF-DIAGONAL portion of the local submatrix (possibly different for 2310 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2311 structure. The size of this array is equal to the number 2312 of local rows, i.e 'm'. 2313 2314 The AIJ format (also called the Yale sparse matrix format or 2315 compressed row storage), is fully compatible with standard Fortran 77 2316 storage. That is, the stored row and column indices can begin at 2317 either one (as in Fortran) or zero. See the users manual for details. 2318 2319 The user MUST specify either the local or global matrix dimensions 2320 (possibly both). 2321 2322 The parallel matrix is partitioned such that the first m0 rows belong to 2323 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2324 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2325 2326 The DIAGONAL portion of the local submatrix of a processor can be defined 2327 as the submatrix which is obtained by extraction the part corresponding 2328 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2329 first row that belongs to the processor, and r2 is the last row belonging 2330 to the this processor. This is a square mxm matrix. The remaining portion 2331 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2332 2333 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2334 2335 By default, this format uses inodes (identical nodes) when possible. 2336 We search for consecutive rows with the same nonzero structure, thereby 2337 reusing matrix information to achieve increased efficiency. 2338 2339 Options Database Keys: 2340 + -mat_aij_no_inode - Do not use inodes 2341 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2342 - -mat_aij_oneindex - Internally use indexing starting at 1 2343 rather than 0. Note that when calling MatSetValues(), 2344 the user still MUST index entries starting at 0! 2345 2346 Example usage: 2347 2348 Consider the following 8x8 matrix with 34 non-zero values, that is 2349 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2350 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2351 as follows: 2352 2353 .vb 2354 1 2 0 | 0 3 0 | 0 4 2355 Proc0 0 5 6 | 7 0 0 | 8 0 2356 9 0 10 | 11 0 0 | 12 0 2357 ------------------------------------- 2358 13 0 14 | 15 16 17 | 0 0 2359 Proc1 0 18 0 | 19 20 21 | 0 0 2360 0 0 0 | 22 23 0 | 24 0 2361 ------------------------------------- 2362 Proc2 25 26 27 | 0 0 28 | 29 0 2363 30 0 0 | 31 32 33 | 0 34 2364 .ve 2365 2366 This can be represented as a collection of submatrices as: 2367 2368 .vb 2369 A B C 2370 D E F 2371 G H I 2372 .ve 2373 2374 Where the submatrices A,B,C are owned by proc0, D,E,F are 2375 owned by proc1, G,H,I are owned by proc2. 2376 2377 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2378 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2379 The 'M','N' parameters are 8,8, and have the same values on all procs. 2380 2381 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2382 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2383 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2384 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2385 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2386 matrix, ans [DF] as another SeqAIJ matrix. 2387 2388 When d_nz, o_nz parameters are specified, d_nz storage elements are 2389 allocated for every row of the local diagonal submatrix, and o_nz 2390 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2391 One way to choose d_nz and o_nz is to use the max nonzerors per local 2392 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2393 In this case, the values of d_nz,o_nz are: 2394 .vb 2395 proc0 : dnz = 2, o_nz = 2 2396 proc1 : dnz = 3, o_nz = 2 2397 proc2 : dnz = 1, o_nz = 4 2398 .ve 2399 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2400 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2401 for proc3. i.e we are using 12+15+10=37 storage locations to store 2402 34 values. 2403 2404 When d_nnz, o_nnz parameters are specified, the storage is specified 2405 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2406 In the above case the values for d_nnz,o_nnz are: 2407 .vb 2408 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2409 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2410 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2411 .ve 2412 Here the space allocated is sum of all the above values i.e 34, and 2413 hence pre-allocation is perfect. 2414 2415 Level: intermediate 2416 2417 .keywords: matrix, aij, compressed row, sparse, parallel 2418 2419 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2420 @*/ 2421 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2422 { 2423 PetscErrorCode ierr,(*f)(Mat,int,const int[],int,const int[]); 2424 2425 PetscFunctionBegin; 2426 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2427 if (f) { 2428 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2429 } 2430 PetscFunctionReturn(0); 2431 } 2432 2433 #undef __FUNCT__ 2434 #define __FUNCT__ "MatCreateMPIAIJ" 2435 /*@C 2436 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2437 (the default parallel PETSc format). For good matrix assembly performance 2438 the user should preallocate the matrix storage by setting the parameters 2439 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2440 performance can be increased by more than a factor of 50. 2441 2442 Collective on MPI_Comm 2443 2444 Input Parameters: 2445 + comm - MPI communicator 2446 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2447 This value should be the same as the local size used in creating the 2448 y vector for the matrix-vector product y = Ax. 2449 . n - This value should be the same as the local size used in creating the 2450 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2451 calculated if N is given) For square matrices n is almost always m. 2452 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2453 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2454 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2455 (same value is used for all local rows) 2456 . d_nnz - array containing the number of nonzeros in the various rows of the 2457 DIAGONAL portion of the local submatrix (possibly different for each row) 2458 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2459 The size of this array is equal to the number of local rows, i.e 'm'. 2460 You must leave room for the diagonal entry even if it is zero. 2461 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2462 submatrix (same value is used for all local rows). 2463 - o_nnz - array containing the number of nonzeros in the various rows of the 2464 OFF-DIAGONAL portion of the local submatrix (possibly different for 2465 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2466 structure. The size of this array is equal to the number 2467 of local rows, i.e 'm'. 2468 2469 Output Parameter: 2470 . A - the matrix 2471 2472 Notes: 2473 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2474 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2475 storage requirements for this matrix. 2476 2477 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2478 processor than it must be used on all processors that share the object for 2479 that argument. 2480 2481 The AIJ format (also called the Yale sparse matrix format or 2482 compressed row storage), is fully compatible with standard Fortran 77 2483 storage. That is, the stored row and column indices can begin at 2484 either one (as in Fortran) or zero. See the users manual for details. 2485 2486 The user MUST specify either the local or global matrix dimensions 2487 (possibly both). 2488 2489 The parallel matrix is partitioned such that the first m0 rows belong to 2490 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2491 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2492 2493 The DIAGONAL portion of the local submatrix of a processor can be defined 2494 as the submatrix which is obtained by extraction the part corresponding 2495 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2496 first row that belongs to the processor, and r2 is the last row belonging 2497 to the this processor. This is a square mxm matrix. The remaining portion 2498 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2499 2500 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2501 2502 When calling this routine with a single process communicator, a matrix of 2503 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2504 type of communicator, use the construction mechanism: 2505 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2506 2507 By default, this format uses inodes (identical nodes) when possible. 2508 We search for consecutive rows with the same nonzero structure, thereby 2509 reusing matrix information to achieve increased efficiency. 2510 2511 Options Database Keys: 2512 + -mat_aij_no_inode - Do not use inodes 2513 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2514 - -mat_aij_oneindex - Internally use indexing starting at 1 2515 rather than 0. Note that when calling MatSetValues(), 2516 the user still MUST index entries starting at 0! 2517 2518 2519 Example usage: 2520 2521 Consider the following 8x8 matrix with 34 non-zero values, that is 2522 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2523 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2524 as follows: 2525 2526 .vb 2527 1 2 0 | 0 3 0 | 0 4 2528 Proc0 0 5 6 | 7 0 0 | 8 0 2529 9 0 10 | 11 0 0 | 12 0 2530 ------------------------------------- 2531 13 0 14 | 15 16 17 | 0 0 2532 Proc1 0 18 0 | 19 20 21 | 0 0 2533 0 0 0 | 22 23 0 | 24 0 2534 ------------------------------------- 2535 Proc2 25 26 27 | 0 0 28 | 29 0 2536 30 0 0 | 31 32 33 | 0 34 2537 .ve 2538 2539 This can be represented as a collection of submatrices as: 2540 2541 .vb 2542 A B C 2543 D E F 2544 G H I 2545 .ve 2546 2547 Where the submatrices A,B,C are owned by proc0, D,E,F are 2548 owned by proc1, G,H,I are owned by proc2. 2549 2550 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2551 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2552 The 'M','N' parameters are 8,8, and have the same values on all procs. 2553 2554 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2555 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2556 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2557 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2558 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2559 matrix, ans [DF] as another SeqAIJ matrix. 2560 2561 When d_nz, o_nz parameters are specified, d_nz storage elements are 2562 allocated for every row of the local diagonal submatrix, and o_nz 2563 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2564 One way to choose d_nz and o_nz is to use the max nonzerors per local 2565 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2566 In this case, the values of d_nz,o_nz are: 2567 .vb 2568 proc0 : dnz = 2, o_nz = 2 2569 proc1 : dnz = 3, o_nz = 2 2570 proc2 : dnz = 1, o_nz = 4 2571 .ve 2572 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2573 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2574 for proc3. i.e we are using 12+15+10=37 storage locations to store 2575 34 values. 2576 2577 When d_nnz, o_nnz parameters are specified, the storage is specified 2578 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2579 In the above case the values for d_nnz,o_nnz are: 2580 .vb 2581 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2582 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2583 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2584 .ve 2585 Here the space allocated is sum of all the above values i.e 34, and 2586 hence pre-allocation is perfect. 2587 2588 Level: intermediate 2589 2590 .keywords: matrix, aij, compressed row, sparse, parallel 2591 2592 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2593 @*/ 2594 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2595 { 2596 PetscErrorCode ierr; 2597 int size; 2598 2599 PetscFunctionBegin; 2600 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2601 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2602 if (size > 1) { 2603 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2604 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2605 } else { 2606 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2607 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2608 } 2609 PetscFunctionReturn(0); 2610 } 2611 2612 #undef __FUNCT__ 2613 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2614 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2615 { 2616 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2617 PetscFunctionBegin; 2618 *Ad = a->A; 2619 *Ao = a->B; 2620 *colmap = a->garray; 2621 PetscFunctionReturn(0); 2622 } 2623 2624 #undef __FUNCT__ 2625 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2626 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2627 { 2628 PetscErrorCode ierr; 2629 int i; 2630 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2631 2632 PetscFunctionBegin; 2633 if (coloring->ctype == IS_COLORING_LOCAL) { 2634 ISColoringValue *allcolors,*colors; 2635 ISColoring ocoloring; 2636 2637 /* set coloring for diagonal portion */ 2638 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2639 2640 /* set coloring for off-diagonal portion */ 2641 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2642 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2643 for (i=0; i<a->B->n; i++) { 2644 colors[i] = allcolors[a->garray[i]]; 2645 } 2646 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2647 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2648 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2649 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2650 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2651 ISColoringValue *colors; 2652 int *larray; 2653 ISColoring ocoloring; 2654 2655 /* set coloring for diagonal portion */ 2656 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2657 for (i=0; i<a->A->n; i++) { 2658 larray[i] = i + a->cstart; 2659 } 2660 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2661 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2662 for (i=0; i<a->A->n; i++) { 2663 colors[i] = coloring->colors[larray[i]]; 2664 } 2665 ierr = PetscFree(larray);CHKERRQ(ierr); 2666 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2667 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2668 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2669 2670 /* set coloring for off-diagonal portion */ 2671 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2672 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2673 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2674 for (i=0; i<a->B->n; i++) { 2675 colors[i] = coloring->colors[larray[i]]; 2676 } 2677 ierr = PetscFree(larray);CHKERRQ(ierr); 2678 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2679 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2680 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2681 } else { 2682 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2683 } 2684 2685 PetscFunctionReturn(0); 2686 } 2687 2688 #undef __FUNCT__ 2689 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2690 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2691 { 2692 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2693 PetscErrorCode ierr; 2694 2695 PetscFunctionBegin; 2696 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2697 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2698 PetscFunctionReturn(0); 2699 } 2700 2701 #undef __FUNCT__ 2702 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2703 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2704 { 2705 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2706 PetscErrorCode ierr; 2707 2708 PetscFunctionBegin; 2709 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2710 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2711 PetscFunctionReturn(0); 2712 } 2713 2714 #undef __FUNCT__ 2715 #define __FUNCT__ "MatMerge" 2716 /*@C 2717 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2718 matrices from each processor 2719 2720 Collective on MPI_Comm 2721 2722 Input Parameters: 2723 + comm - the communicators the parallel matrix will live on 2724 . inmat - the input sequential matrices 2725 . n - number of local columns (or PETSC_DECIDE) 2726 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2727 2728 Output Parameter: 2729 . outmat - the parallel matrix generated 2730 2731 Level: advanced 2732 2733 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2734 2735 @*/ 2736 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2737 { 2738 PetscErrorCode ierr; 2739 int m,N,i,rstart,nnz,I,*dnz,*onz; 2740 const int *indx; 2741 const PetscScalar *values; 2742 PetscMap columnmap,rowmap; 2743 2744 PetscFunctionBegin; 2745 2746 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2747 if (scall == MAT_INITIAL_MATRIX){ 2748 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2749 if (n == PETSC_DECIDE){ 2750 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2751 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2752 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2753 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2754 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2755 } 2756 2757 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2758 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2759 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2760 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2761 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2762 2763 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2764 for (i=0;i<m;i++) { 2765 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2766 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2767 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2768 } 2769 /* This routine will ONLY return MPIAIJ type matrix */ 2770 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2771 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2772 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2773 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2774 2775 } else if (scall == MAT_REUSE_MATRIX){ 2776 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2777 } else { 2778 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 2779 } 2780 2781 for (i=0;i<m;i++) { 2782 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2783 I = i + rstart; 2784 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2785 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2786 } 2787 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2788 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2789 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2790 2791 PetscFunctionReturn(0); 2792 } 2793 2794 #undef __FUNCT__ 2795 #define __FUNCT__ "MatFileSplit" 2796 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2797 { 2798 PetscErrorCode ierr; 2799 PetscMPIInt rank; 2800 int m,N,i,rstart,nnz; 2801 size_t len; 2802 const int *indx; 2803 PetscViewer out; 2804 char *name; 2805 Mat B; 2806 const PetscScalar *values; 2807 2808 PetscFunctionBegin; 2809 2810 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2811 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2812 /* Should this be the type of the diagonal block of A? */ 2813 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2814 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2815 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2816 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2817 for (i=0;i<m;i++) { 2818 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2819 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2820 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2821 } 2822 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2823 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2824 2825 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2826 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2827 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2828 sprintf(name,"%s.%d",outfile,rank); 2829 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2830 ierr = PetscFree(name); 2831 ierr = MatView(B,out);CHKERRQ(ierr); 2832 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2833 ierr = MatDestroy(B);CHKERRQ(ierr); 2834 PetscFunctionReturn(0); 2835 } 2836 2837 typedef struct { /* used by MatMerge_SeqsToMPI for reusing the merged matrix */ 2838 PetscMap rowmap; 2839 int nrecv,nsend,*id_r,*bi,*bj,**ijbuf_r; 2840 int *len_sra; /* array of length 2*size, len_sra[i]/len_sra[size+i] 2841 store length of i-th send/recv matrix values */ 2842 MatScalar *abuf_s; 2843 } Mat_Merge_SeqsToMPI; 2844 2845 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2846 #undef __FUNCT__ 2847 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2848 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2849 { 2850 PetscErrorCode ierr; 2851 Mat_Merge_SeqsToMPI *merge=(Mat_Merge_SeqsToMPI*)A->spptr; 2852 2853 PetscFunctionBegin; 2854 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2855 ierr = PetscFree(merge->abuf_s);CHKERRQ(ierr); 2856 ierr = PetscFree(merge->len_sra);CHKERRQ(ierr); 2857 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2858 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2859 ierr = PetscFree(merge->ijbuf_r);CHKERRQ(ierr); 2860 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2861 ierr = PetscFree(merge);CHKERRQ(ierr); 2862 2863 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2864 PetscFunctionReturn(0); 2865 } 2866 2867 #include "src/mat/utils/freespace.h" 2868 #include "petscbt.h" 2869 #undef __FUNCT__ 2870 #define __FUNCT__ "MatMerge_SeqsToMPI" 2871 /*@C 2872 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2873 matrices from each processor 2874 2875 Collective on MPI_Comm 2876 2877 Input Parameters: 2878 + comm - the communicators the parallel matrix will live on 2879 . seqmat - the input sequential matrices 2880 . m - number of local rows (or PETSC_DECIDE) 2881 . n - number of local columns (or PETSC_DECIDE) 2882 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2883 2884 Output Parameter: 2885 . mpimat - the parallel matrix generated 2886 2887 Level: advanced 2888 2889 Notes: The dimensions of the sequential matrix in each processor 2890 MUST be the same. 2891 2892 @*/ 2893 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 2894 { 2895 PetscErrorCode ierr; 2896 Mat B_seq,B_mpi; 2897 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2898 PetscMPIInt size,rank; 2899 int M=seqmat->m,N=seqmat->n,i,j,*owners,*ai=a->i,*aj=a->j,tag,taga,len,len_a = 0,*len_s,*len_sa,proc,*len_r,*len_ra; 2900 int **ijbuf_r,*ijbuf_s,*nnz_ptr,k,anzi,*bj_i,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0,nextaj; 2901 MPI_Request *s_waits,*r_waits,*s_waitsa,*r_waitsa; 2902 MPI_Status *status; 2903 MatScalar *ba,*aa=a->a,**abuf_r,*abuf_i,*ba_i; 2904 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2905 PetscBT lnkbt; 2906 Mat_Merge_SeqsToMPI *merge; 2907 2908 PetscFunctionBegin; 2909 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2910 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2911 if (scall == MAT_INITIAL_MATRIX){ 2912 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 2913 } else if (scall == MAT_REUSE_MATRIX){ 2914 B_mpi = *mpimat; 2915 merge = (Mat_Merge_SeqsToMPI*)B_mpi->spptr; 2916 bi = merge->bi; 2917 bj = merge->bj; 2918 ijbuf_r = merge->ijbuf_r; 2919 } else { 2920 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 2921 } 2922 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2923 2924 /* determine the number of messages to send, their lengths */ 2925 /*---------------------------------------------------------*/ 2926 if (scall == MAT_INITIAL_MATRIX){ 2927 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 2928 if (m == PETSC_DECIDE) { 2929 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 2930 } else { 2931 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 2932 } 2933 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 2934 ierr = PetscMalloc(size*sizeof(int),&len_s);CHKERRQ(ierr); 2935 ierr = PetscMalloc(2*size*sizeof(int),&merge->len_sra);CHKERRQ(ierr); 2936 } 2937 if (m == PETSC_DECIDE) ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2938 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2939 2940 len_sa = merge->len_sra; 2941 len_ra = merge->len_sra + size; 2942 2943 if (scall == MAT_INITIAL_MATRIX){ 2944 merge->nsend = 0; 2945 len = len_a = 0; 2946 for (proc=0; proc<size; proc++){ 2947 len_s[proc] = len_sa[proc] = 0; 2948 if (proc == rank) continue; 2949 for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */ 2950 len_sa[proc] += ai[i+1] - ai[i]; /* num of cols sent to [proc] */ 2951 } 2952 if (len_sa[proc]) { 2953 merge->nsend++; 2954 len_s[proc] = len_sa[proc] + owners[proc+1] - owners[proc]; 2955 if (len < len_s[proc]) len = len_s[proc]; 2956 if (len_a < len_sa[proc]) len_a = len_sa[proc]; 2957 } 2958 } 2959 2960 /* determine the number and length of messages to receive */ 2961 /*--------------------------------------------------------*/ 2962 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 2963 ierr = PetscGatherMessageLengths(comm,merge->nsend,merge->nrecv,len_s,&merge->id_r,&len_r);CHKERRQ(ierr); 2964 /* 2965 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] nsend: %d, nrecv: %d\n",rank,merge->nsend,merge->nrecv); 2966 for (i=0; i<merge->nrecv; i++){ 2967 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] expects recv len_r=%d from [%d]\n",rank,len_r[i],merge->id_r[i]); 2968 } 2969 */ 2970 2971 /* post the Irecvs corresponding to these messages */ 2972 /*-------------------------------------------------*/ 2973 /* get new tag to keep the communication clean */ 2974 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tag);CHKERRQ(ierr); 2975 ierr = PetscPostIrecvInt(comm,tag,merge->nrecv,merge->id_r,len_r,&ijbuf_r,&r_waits);CHKERRQ(ierr); 2976 2977 /* post the sends */ 2978 /*----------------*/ 2979 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2980 ierr = PetscMalloc((len+1)*sizeof(int),&ijbuf_s);CHKERRQ(ierr); 2981 k = 0; 2982 for (proc=0; proc<size; proc++){ 2983 if (!len_s[proc]) continue; 2984 /* form outgoing ij data to be sent to [proc] */ 2985 nnz_ptr = ijbuf_s + owners[proc+1] - owners[proc]; 2986 for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */ 2987 anzi = ai[i+1] - ai[i]; 2988 if (i==owners[proc]){ 2989 ijbuf_s[i-owners[proc]] = anzi; 2990 } else { 2991 ijbuf_s[i-owners[proc]] = ijbuf_s[i-owners[proc]-1]+ anzi; 2992 } 2993 for (j=0; j <anzi; j++){ 2994 *nnz_ptr = *(aj+ai[i]+j); nnz_ptr++; 2995 } 2996 } 2997 ierr = MPI_Isend(ijbuf_s,len_s[proc],MPI_INT,proc,tag,comm,s_waits+k);CHKERRQ(ierr); 2998 k++; 2999 } 3000 3001 /* receives and sends of ij-structure are complete, deallocate memories */ 3002 /*----------------------------------------------------------------------*/ 3003 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 3004 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 3005 3006 ierr = PetscFree(ijbuf_s);CHKERRQ(ierr); 3007 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3008 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3009 } 3010 3011 /* send and recv matrix values */ 3012 /*-----------------------------*/ 3013 if (scall == MAT_INITIAL_MATRIX){ 3014 ierr = PetscMalloc((len_a+1)*sizeof(MatScalar),&merge->abuf_s);CHKERRQ(ierr); 3015 for (i=0; i<merge->nrecv; i++) len_ra[i] = len_r[i]-m; /* length of seqmat->a to be received from a proc */ 3016 } 3017 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 3018 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,len_ra,&abuf_r,&r_waitsa);CHKERRQ(ierr); 3019 3020 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waitsa);CHKERRQ(ierr); 3021 k = 0; 3022 for (proc=0; proc<size; proc++){ 3023 if (!len_sa[proc]) continue; 3024 /* form outgoing mat value data to be sent to [proc] */ 3025 abuf_i = merge->abuf_s; 3026 for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */ 3027 anzi = ai[i+1] - ai[i]; 3028 for (j=0; j <anzi; j++){ 3029 *abuf_i = *(aa+ai[i]+j); abuf_i++; 3030 } 3031 } 3032 ierr = MPI_Isend(merge->abuf_s,len_sa[proc],MPIU_MATSCALAR,proc,taga,comm,s_waitsa+k);CHKERRQ(ierr); 3033 k++; 3034 } 3035 3036 ierr = MPI_Waitall(merge->nrecv,r_waitsa,status);CHKERRQ(ierr); 3037 ierr = MPI_Waitall(merge->nsend,s_waitsa,status);CHKERRQ(ierr); 3038 ierr = PetscFree(status);CHKERRQ(ierr); 3039 3040 ierr = PetscFree(s_waitsa);CHKERRQ(ierr); 3041 ierr = PetscFree(r_waitsa);CHKERRQ(ierr); 3042 3043 /* create seq matrix B_seq in each processor */ 3044 /*-------------------------------------------*/ 3045 if (scall == MAT_INITIAL_MATRIX){ 3046 ierr = PetscFree(len_s);CHKERRQ(ierr); 3047 /* allocate bi array and free space for accumulating nonzero column info */ 3048 ierr = PetscMalloc((m+1)*sizeof(int),&bi);CHKERRQ(ierr); 3049 bi[0] = 0; 3050 3051 /* create and initialize a linked list */ 3052 nlnk = N+1; 3053 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3054 3055 /* initial FreeSpace size is (nproc)*(num of local nnz(seqmat)) */ 3056 len = 0; 3057 for (i=owners[rank]; i<owners[rank+1]; i++) len += ai[i+1] - ai[i]; 3058 ierr = GetMoreSpace((int)(size*len+1),&free_space);CHKERRQ(ierr); 3059 current_space = free_space; 3060 3061 /* determine symbolic info for each row of B_seq */ 3062 for (i=0;i<m;i++) { 3063 bnzi = 0; 3064 /* add local non-zero cols of this proc's seqmat into lnk */ 3065 arow = owners[rank] + i; 3066 anzi = ai[arow+1] - ai[arow]; 3067 aj = a->j + ai[arow]; 3068 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3069 bnzi += nlnk; 3070 /* add received col data into lnk */ 3071 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3072 /* i-th row */ 3073 if (i == 0){ 3074 anzi = *(ijbuf_r[k]+i); 3075 aj = ijbuf_r[k] + m; 3076 } else { 3077 anzi = *(ijbuf_r[k]+i) - *(ijbuf_r[k]+i-1); 3078 aj = ijbuf_r[k]+m + *(ijbuf_r[k]+i-1); 3079 } 3080 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3081 bnzi += nlnk; 3082 } 3083 3084 /* if free space is not available, make more free space */ 3085 if (current_space->local_remaining<bnzi) { 3086 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3087 nspacedouble++; 3088 } 3089 /* copy data into free space, then initialize lnk */ 3090 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3091 current_space->array += bnzi; 3092 current_space->local_used += bnzi; 3093 current_space->local_remaining -= bnzi; 3094 3095 bi[i+1] = bi[i] + bnzi; 3096 } 3097 ierr = PetscMalloc((bi[m]+1)*sizeof(int),&bj);CHKERRQ(ierr); 3098 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3099 3100 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3101 ierr = PetscFree(len_r);CHKERRQ(ierr); 3102 3103 /* allocate space for ba */ 3104 ierr = PetscMalloc((bi[m]+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr); 3105 ierr = PetscMemzero(ba,(bi[m]+1)*sizeof(MatScalar));CHKERRQ(ierr); 3106 3107 /* put together B_seq */ 3108 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,bi,bj,ba,&B_seq);CHKERRQ(ierr); 3109 ierr = PetscFree(ba);CHKERRQ(ierr); /* bi and bj are saved for reuse */ 3110 3111 /* create symbolic parallel matrix B_mpi by concatinating B_seq */ 3112 /*--------------------------------------------------------------*/ 3113 ierr = MatMerge(comm,B_seq,n,scall,&B_mpi);CHKERRQ(ierr); 3114 } /* endof if (scall == MAT_INITIAL_MATRIX) */ 3115 3116 /* insert mat values of B_mpi */ 3117 /*----------------------------*/ 3118 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3119 3120 /* set values of ba */ 3121 for (i=0; i<m; i++) { 3122 arow = owners[rank] + i; 3123 bj_i = bj+bi[i]; /* col indices of the i-th row of B_seq */ 3124 bnzi = bi[i+1] - bi[i]; 3125 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3126 3127 /* add local non-zero vals of this proc's seqmat into ba */ 3128 anzi = ai[arow+1] - ai[arow]; 3129 aj = a->j + ai[arow]; 3130 aa = a->a + ai[arow]; 3131 nextaj = 0; 3132 for (j=0; nextaj<anzi; j++){ 3133 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3134 ba_i[j] += aa[nextaj++]; 3135 } 3136 } 3137 3138 /* add received vals into ba */ 3139 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3140 /* i-th row */ 3141 if (i == 0){ 3142 anzi = *(ijbuf_r[k]+i); 3143 aj = ijbuf_r[k] + m; 3144 aa = abuf_r[k]; 3145 } else { 3146 anzi = *(ijbuf_r[k]+i) - *(ijbuf_r[k]+i-1); 3147 aj = ijbuf_r[k]+m + *(ijbuf_r[k]+i-1); 3148 aa = abuf_r[k] + *(ijbuf_r[k]+i-1); 3149 } 3150 nextaj = 0; 3151 for (j=0; nextaj<anzi; j++){ 3152 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3153 ba_i[j] += aa[nextaj++]; 3154 } 3155 } 3156 } 3157 3158 ierr = MatSetValues(B_mpi,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3159 } 3160 ierr = MatAssemblyBegin(B_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3161 ierr = MatAssemblyEnd(B_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3162 3163 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3164 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3165 3166 /* attach the supporting struct to B_mpi for reuse */ 3167 /*-------------------------------------------------*/ 3168 if (scall == MAT_INITIAL_MATRIX){ 3169 B_mpi->spptr = (void*)merge; 3170 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3171 merge->bi = bi; 3172 merge->bj = bj; 3173 merge->ijbuf_r = ijbuf_r; 3174 3175 *mpimat = B_mpi; 3176 } 3177 PetscFunctionReturn(0); 3178 } 3179