1 2 #include "src/mat/impls/aij/mpi/mpiaij.h" 3 #include "src/inline/spops.h" 4 5 /* 6 Local utility routine that creates a mapping from the global column 7 number to the local number in the off-diagonal part of the local 8 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 9 a slightly higher hash table cost; without it it is not scalable (each processor 10 has an order N integer array but is fast to acess. 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 17 PetscErrorCode ierr; 18 PetscInt n = aij->B->n,i; 19 20 PetscFunctionBegin; 21 #if defined (PETSC_USE_CTABLE) 22 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 23 for (i=0; i<n; i++){ 24 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 25 } 26 #else 27 ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 28 PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt)); 29 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 30 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 31 #endif 32 PetscFunctionReturn(0); 33 } 34 35 #define CHUNKSIZE 15 36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 40 rmax = aimax[row]; nrow = ailen[row]; \ 41 col1 = col - shift; \ 42 \ 43 low = 0; high = nrow; \ 44 while (high-low > 5) { \ 45 t = (low+high)/2; \ 46 if (rp[t] > col) high = t; \ 47 else low = t; \ 48 } \ 49 for (_i=low; _i<high; _i++) { \ 50 if (rp[_i] > col1) break; \ 51 if (rp[_i] == col1) { \ 52 if (addv == ADD_VALUES) ap[_i] += value; \ 53 else ap[_i] = value; \ 54 goto a_noinsert; \ 55 } \ 56 } \ 57 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 58 if (nonew == 1) goto a_noinsert; \ 59 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 60 if (nrow >= rmax) { \ 61 /* there is no extra room in row, therefore enlarge */ \ 62 PetscInt new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 63 PetscScalar *new_a; \ 64 \ 65 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 66 \ 67 /* malloc new storage space */ \ 68 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \ 69 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 70 new_j = (PetscInt*)(new_a + new_nz); \ 71 new_i = new_j + new_nz; \ 72 \ 73 /* copy over old data into new slots */ \ 74 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 75 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 76 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 77 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 78 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 79 len*sizeof(PetscInt));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 81 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 82 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 83 /* free up old matrix storage */ \ 84 \ 85 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 86 if (!a->singlemalloc) { \ 87 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 88 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 89 } \ 90 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 91 a->singlemalloc = PETSC_TRUE; \ 92 \ 93 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 94 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 95 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \ 96 a->maxnz += CHUNKSIZE; \ 97 a->reallocs++; \ 98 } \ 99 N = nrow++ - 1; a->nz++; \ 100 /* shift up all the later entries in this row */ \ 101 for (ii=N; ii>=_i; ii--) { \ 102 rp[ii+1] = rp[ii]; \ 103 ap[ii+1] = ap[ii]; \ 104 } \ 105 rp[_i] = col1; \ 106 ap[_i] = value; \ 107 a_noinsert: ; \ 108 ailen[row] = nrow; \ 109 } 110 111 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 112 { \ 113 \ 114 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 115 rmax = bimax[row]; nrow = bilen[row]; \ 116 col1 = col - shift; \ 117 \ 118 low = 0; high = nrow; \ 119 while (high-low > 5) { \ 120 t = (low+high)/2; \ 121 if (rp[t] > col) high = t; \ 122 else low = t; \ 123 } \ 124 for (_i=low; _i<high; _i++) { \ 125 if (rp[_i] > col1) break; \ 126 if (rp[_i] == col1) { \ 127 if (addv == ADD_VALUES) ap[_i] += value; \ 128 else ap[_i] = value; \ 129 goto b_noinsert; \ 130 } \ 131 } \ 132 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 133 if (nonew == 1) goto b_noinsert; \ 134 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 135 if (nrow >= rmax) { \ 136 /* there is no extra room in row, therefore enlarge */ \ 137 PetscInt new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 138 PetscScalar *new_a; \ 139 \ 140 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 141 \ 142 /* malloc new storage space */ \ 143 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \ 144 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 145 new_j = (PetscInt*)(new_a + new_nz); \ 146 new_i = new_j + new_nz; \ 147 \ 148 /* copy over old data into new slots */ \ 149 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 150 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 151 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 152 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 153 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 154 len*sizeof(PetscInt));CHKERRQ(ierr); \ 155 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 156 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 157 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 158 /* free up old matrix storage */ \ 159 \ 160 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 161 if (!b->singlemalloc) { \ 162 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 163 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 164 } \ 165 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 166 b->singlemalloc = PETSC_TRUE; \ 167 \ 168 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 169 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 170 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \ 171 b->maxnz += CHUNKSIZE; \ 172 b->reallocs++; \ 173 } \ 174 N = nrow++ - 1; b->nz++; \ 175 /* shift up all the later entries in this row */ \ 176 for (ii=N; ii>=_i; ii--) { \ 177 rp[ii+1] = rp[ii]; \ 178 ap[ii+1] = ap[ii]; \ 179 } \ 180 rp[_i] = col1; \ 181 ap[_i] = value; \ 182 b_noinsert: ; \ 183 bilen[row] = nrow; \ 184 } 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "MatSetValues_MPIAIJ" 188 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 189 { 190 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 191 PetscScalar value; 192 PetscErrorCode ierr; 193 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 194 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 195 PetscTruth roworiented = aij->roworiented; 196 197 /* Some Variables required in the macro */ 198 Mat A = aij->A; 199 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 200 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 201 PetscScalar *aa = a->a; 202 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 203 Mat B = aij->B; 204 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 205 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 206 PetscScalar *ba = b->a; 207 208 PetscInt *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 209 PetscInt nonew = a->nonew,shift=0; 210 PetscScalar *ap; 211 212 PetscFunctionBegin; 213 for (i=0; i<m; i++) { 214 if (im[i] < 0) continue; 215 #if defined(PETSC_USE_DEBUG) 216 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 217 #endif 218 if (im[i] >= rstart && im[i] < rend) { 219 row = im[i] - rstart; 220 for (j=0; j<n; j++) { 221 if (in[j] >= cstart && in[j] < cend){ 222 col = in[j] - cstart; 223 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 224 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 225 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 226 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 227 } else if (in[j] < 0) continue; 228 #if defined(PETSC_USE_DEBUG) 229 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 230 #endif 231 else { 232 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 233 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 234 if (mat->was_assembled) { 235 if (!aij->colmap) { 236 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 237 } 238 #if defined (PETSC_USE_CTABLE) 239 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 240 col--; 241 #else 242 col = aij->colmap[in[j]] - 1; 243 #endif 244 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 245 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 246 col = in[j]; 247 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 248 B = aij->B; 249 b = (Mat_SeqAIJ*)B->data; 250 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 251 ba = b->a; 252 } 253 } else col = in[j]; 254 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 255 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 256 } 257 } 258 } else { 259 if (!aij->donotstash) { 260 if (roworiented) { 261 if (ignorezeroentries && v[i*n] == 0.0) continue; 262 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 263 } else { 264 if (ignorezeroentries && v[i] == 0.0) continue; 265 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 266 } 267 } 268 } 269 } 270 PetscFunctionReturn(0); 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatGetValues_MPIAIJ" 275 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 276 { 277 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 278 PetscErrorCode ierr; 279 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 280 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 281 282 PetscFunctionBegin; 283 for (i=0; i<m; i++) { 284 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 285 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 286 if (idxm[i] >= rstart && idxm[i] < rend) { 287 row = idxm[i] - rstart; 288 for (j=0; j<n; j++) { 289 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 290 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 291 if (idxn[j] >= cstart && idxn[j] < cend){ 292 col = idxn[j] - cstart; 293 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 294 } else { 295 if (!aij->colmap) { 296 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 297 } 298 #if defined (PETSC_USE_CTABLE) 299 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 300 col --; 301 #else 302 col = aij->colmap[idxn[j]] - 1; 303 #endif 304 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 305 else { 306 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 307 } 308 } 309 } 310 } else { 311 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 312 } 313 } 314 PetscFunctionReturn(0); 315 } 316 317 #undef __FUNCT__ 318 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 319 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 320 { 321 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 322 PetscErrorCode ierr; 323 PetscInt nstash,reallocs; 324 InsertMode addv; 325 326 PetscFunctionBegin; 327 if (aij->donotstash) { 328 PetscFunctionReturn(0); 329 } 330 331 /* make sure all processors are either in INSERTMODE or ADDMODE */ 332 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 333 if (addv == (ADD_VALUES|INSERT_VALUES)) { 334 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 335 } 336 mat->insertmode = addv; /* in case this processor had no cache */ 337 338 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 339 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 340 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs); 341 PetscFunctionReturn(0); 342 } 343 344 #undef __FUNCT__ 345 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 346 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 347 { 348 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 349 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 350 PetscErrorCode ierr; 351 PetscMPIInt n; 352 PetscInt i,j,rstart,ncols,flg; 353 PetscInt *row,*col,other_disassembled; 354 PetscScalar *val; 355 InsertMode addv = mat->insertmode; 356 357 PetscFunctionBegin; 358 if (!aij->donotstash) { 359 while (1) { 360 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 361 if (!flg) break; 362 363 for (i=0; i<n;) { 364 /* Now identify the consecutive vals belonging to the same row */ 365 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 366 if (j < n) ncols = j-i; 367 else ncols = n-i; 368 /* Now assemble all these values with a single function call */ 369 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 370 i = j; 371 } 372 } 373 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 374 } 375 a->compressedrow.use = PETSC_FALSE; 376 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 377 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 378 379 /* determine if any processor has disassembled, if so we must 380 also disassemble ourselfs, in order that we may reassemble. */ 381 /* 382 if nonzero structure of submatrix B cannot change then we know that 383 no processor disassembled thus we can skip this stuff 384 */ 385 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 386 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 387 if (mat->was_assembled && !other_disassembled) { 388 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 389 /* reaccess the b because aij->B was changed */ 390 b = (Mat_SeqAIJ *)aij->B->data; 391 } 392 } 393 394 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 395 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 396 } 397 398 b->inode.use = PETSC_FALSE; 399 b->compressedrow.use = PETSC_TRUE; 400 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 401 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 402 403 if (aij->rowvalues) { 404 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 405 aij->rowvalues = 0; 406 } 407 408 /* used by MatAXPY() */ 409 a->xtoy = 0; b->xtoy = 0; 410 a->XtoY = 0; b->XtoY = 0; 411 412 PetscFunctionReturn(0); 413 } 414 415 #undef __FUNCT__ 416 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 417 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 418 { 419 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 420 PetscErrorCode ierr; 421 422 PetscFunctionBegin; 423 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 424 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatZeroRows_MPIAIJ" 430 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 431 { 432 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 433 PetscErrorCode ierr; 434 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag; 435 PetscInt i,N,*rows,*owners = l->rowners; 436 PetscInt *nprocs,j,idx,nsends,row; 437 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 438 PetscInt *rvalues,count,base,slen,*source; 439 PetscInt *lens,*lrows,*values,rstart=l->rstart; 440 MPI_Comm comm = A->comm; 441 MPI_Request *send_waits,*recv_waits; 442 MPI_Status recv_status,*send_status; 443 IS istmp; 444 PetscTruth found; 445 446 PetscFunctionBegin; 447 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 448 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 449 450 /* first count number of contributors to each processor */ 451 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 452 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 453 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 454 for (i=0; i<N; i++) { 455 idx = rows[i]; 456 found = PETSC_FALSE; 457 for (j=0; j<size; j++) { 458 if (idx >= owners[j] && idx < owners[j+1]) { 459 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 460 } 461 } 462 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 463 } 464 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 465 466 /* inform other processors of number of messages and max length*/ 467 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 468 469 /* post receives: */ 470 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 471 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 472 for (i=0; i<nrecvs; i++) { 473 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 474 } 475 476 /* do sends: 477 1) starts[i] gives the starting index in svalues for stuff going to 478 the ith processor 479 */ 480 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 481 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 482 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 483 starts[0] = 0; 484 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 485 for (i=0; i<N; i++) { 486 svalues[starts[owner[i]]++] = rows[i]; 487 } 488 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 489 490 starts[0] = 0; 491 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 492 count = 0; 493 for (i=0; i<size; i++) { 494 if (nprocs[2*i+1]) { 495 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 496 } 497 } 498 ierr = PetscFree(starts);CHKERRQ(ierr); 499 500 base = owners[rank]; 501 502 /* wait on receives */ 503 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 504 source = lens + nrecvs; 505 count = nrecvs; slen = 0; 506 while (count) { 507 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 508 /* unpack receives into our local space */ 509 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 510 source[imdex] = recv_status.MPI_SOURCE; 511 lens[imdex] = n; 512 slen += n; 513 count--; 514 } 515 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 516 517 /* move the data into the send scatter */ 518 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 519 count = 0; 520 for (i=0; i<nrecvs; i++) { 521 values = rvalues + i*nmax; 522 for (j=0; j<lens[i]; j++) { 523 lrows[count++] = values[j] - base; 524 } 525 } 526 ierr = PetscFree(rvalues);CHKERRQ(ierr); 527 ierr = PetscFree(lens);CHKERRQ(ierr); 528 ierr = PetscFree(owner);CHKERRQ(ierr); 529 ierr = PetscFree(nprocs);CHKERRQ(ierr); 530 531 /* actually zap the local rows */ 532 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 533 PetscLogObjectParent(A,istmp); 534 535 /* 536 Zero the required rows. If the "diagonal block" of the matrix 537 is square and the user wishes to set the diagonal we use seperate 538 code so that MatSetValues() is not called for each diagonal allocating 539 new memory, thus calling lots of mallocs and slowing things down. 540 541 Contributed by: Mathew Knepley 542 */ 543 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 544 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 545 if (diag && (l->A->M == l->A->N)) { 546 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 547 } else if (diag) { 548 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 549 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 550 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 551 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 552 } 553 for (i = 0; i < slen; i++) { 554 row = lrows[i] + rstart; 555 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 556 } 557 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 558 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 559 } else { 560 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 561 } 562 ierr = ISDestroy(istmp);CHKERRQ(ierr); 563 ierr = PetscFree(lrows);CHKERRQ(ierr); 564 565 /* wait on sends */ 566 if (nsends) { 567 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 568 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 569 ierr = PetscFree(send_status);CHKERRQ(ierr); 570 } 571 ierr = PetscFree(send_waits);CHKERRQ(ierr); 572 ierr = PetscFree(svalues);CHKERRQ(ierr); 573 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatMult_MPIAIJ" 579 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 580 { 581 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 582 PetscErrorCode ierr; 583 PetscInt nt; 584 585 PetscFunctionBegin; 586 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 587 if (nt != A->n) { 588 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt); 589 } 590 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 591 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 592 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 593 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 594 PetscFunctionReturn(0); 595 } 596 597 #undef __FUNCT__ 598 #define __FUNCT__ "MatMultAdd_MPIAIJ" 599 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 600 { 601 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 602 PetscErrorCode ierr; 603 604 PetscFunctionBegin; 605 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 606 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 607 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 608 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 609 PetscFunctionReturn(0); 610 } 611 612 #undef __FUNCT__ 613 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 614 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 615 { 616 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 617 PetscErrorCode ierr; 618 619 PetscFunctionBegin; 620 /* do nondiagonal part */ 621 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 622 /* send it on its way */ 623 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 624 /* do local part */ 625 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 626 /* receive remote parts: note this assumes the values are not actually */ 627 /* inserted in yy until the next line, which is true for my implementation*/ 628 /* but is not perhaps always true. */ 629 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 630 PetscFunctionReturn(0); 631 } 632 633 EXTERN_C_BEGIN 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 636 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 637 { 638 MPI_Comm comm; 639 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 640 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 641 IS Me,Notme; 642 PetscErrorCode ierr; 643 PetscInt M,N,first,last,*notme,i; 644 PetscMPIInt size; 645 646 PetscFunctionBegin; 647 648 /* Easy test: symmetric diagonal block */ 649 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 650 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 651 if (!*f) PetscFunctionReturn(0); 652 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 653 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 654 if (size == 1) PetscFunctionReturn(0); 655 656 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 657 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 658 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 659 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 660 for (i=0; i<first; i++) notme[i] = i; 661 for (i=last; i<M; i++) notme[i-last+first] = i; 662 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 663 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 664 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 665 Aoff = Aoffs[0]; 666 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 667 Boff = Boffs[0]; 668 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 669 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 670 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 671 ierr = ISDestroy(Me);CHKERRQ(ierr); 672 ierr = ISDestroy(Notme);CHKERRQ(ierr); 673 674 PetscFunctionReturn(0); 675 } 676 EXTERN_C_END 677 678 #undef __FUNCT__ 679 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 680 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 681 { 682 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 683 PetscErrorCode ierr; 684 685 PetscFunctionBegin; 686 /* do nondiagonal part */ 687 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 688 /* send it on its way */ 689 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 690 /* do local part */ 691 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 692 /* receive remote parts: note this assumes the values are not actually */ 693 /* inserted in yy until the next line, which is true for my implementation*/ 694 /* but is not perhaps always true. */ 695 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 696 PetscFunctionReturn(0); 697 } 698 699 /* 700 This only works correctly for square matrices where the subblock A->A is the 701 diagonal block 702 */ 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 705 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 706 { 707 PetscErrorCode ierr; 708 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 709 710 PetscFunctionBegin; 711 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 712 if (a->rstart != a->cstart || a->rend != a->cend) { 713 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 714 } 715 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 716 PetscFunctionReturn(0); 717 } 718 719 #undef __FUNCT__ 720 #define __FUNCT__ "MatScale_MPIAIJ" 721 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 722 { 723 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 724 PetscErrorCode ierr; 725 726 PetscFunctionBegin; 727 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 728 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 729 PetscFunctionReturn(0); 730 } 731 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatDestroy_MPIAIJ" 734 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 735 { 736 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 737 PetscErrorCode ierr; 738 739 PetscFunctionBegin; 740 #if defined(PETSC_USE_LOG) 741 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 742 #endif 743 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 744 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 745 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 746 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 747 #if defined (PETSC_USE_CTABLE) 748 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 749 #else 750 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 751 #endif 752 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 753 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 754 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 755 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 756 ierr = PetscFree(aij);CHKERRQ(ierr); 757 758 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 759 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 760 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 761 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 762 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 763 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 764 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 765 PetscFunctionReturn(0); 766 } 767 768 #undef __FUNCT__ 769 #define __FUNCT__ "MatView_MPIAIJ_Binary" 770 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 771 { 772 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 773 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 774 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 775 PetscErrorCode ierr; 776 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 777 int fd; 778 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 779 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 780 PetscScalar *column_values; 781 782 PetscFunctionBegin; 783 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 784 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 785 nz = A->nz + B->nz; 786 if (!rank) { 787 header[0] = MAT_FILE_COOKIE; 788 header[1] = mat->M; 789 header[2] = mat->N; 790 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 791 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 792 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 793 /* get largest number of rows any processor has */ 794 rlen = mat->m; 795 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 796 for (i=1; i<size; i++) { 797 rlen = PetscMax(rlen,range[i+1] - range[i]); 798 } 799 } else { 800 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 801 rlen = mat->m; 802 } 803 804 /* load up the local row counts */ 805 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 806 for (i=0; i<mat->m; i++) { 807 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 808 } 809 810 /* store the row lengths to the file */ 811 if (!rank) { 812 MPI_Status status; 813 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 814 for (i=1; i<size; i++) { 815 rlen = range[i+1] - range[i]; 816 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 817 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 818 } 819 } else { 820 ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 821 } 822 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 823 824 /* load up the local column indices */ 825 nzmax = nz; /* )th processor needs space a largest processor needs */ 826 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 827 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 828 cnt = 0; 829 for (i=0; i<mat->m; i++) { 830 for (j=B->i[i]; j<B->i[i+1]; j++) { 831 if ( (col = garray[B->j[j]]) > cstart) break; 832 column_indices[cnt++] = col; 833 } 834 for (k=A->i[i]; k<A->i[i+1]; k++) { 835 column_indices[cnt++] = A->j[k] + cstart; 836 } 837 for (; j<B->i[i+1]; j++) { 838 column_indices[cnt++] = garray[B->j[j]]; 839 } 840 } 841 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 842 843 /* store the column indices to the file */ 844 if (!rank) { 845 MPI_Status status; 846 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 847 for (i=1; i<size; i++) { 848 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 849 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 850 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 851 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 852 } 853 } else { 854 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 855 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 856 } 857 ierr = PetscFree(column_indices);CHKERRQ(ierr); 858 859 /* load up the local column values */ 860 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 861 cnt = 0; 862 for (i=0; i<mat->m; i++) { 863 for (j=B->i[i]; j<B->i[i+1]; j++) { 864 if ( garray[B->j[j]] > cstart) break; 865 column_values[cnt++] = B->a[j]; 866 } 867 for (k=A->i[i]; k<A->i[i+1]; k++) { 868 column_values[cnt++] = A->a[k]; 869 } 870 for (; j<B->i[i+1]; j++) { 871 column_values[cnt++] = B->a[j]; 872 } 873 } 874 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 875 876 /* store the column values to the file */ 877 if (!rank) { 878 MPI_Status status; 879 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 880 for (i=1; i<size; i++) { 881 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 882 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 883 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 884 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 885 } 886 } else { 887 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 888 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 889 } 890 ierr = PetscFree(column_values);CHKERRQ(ierr); 891 PetscFunctionReturn(0); 892 } 893 894 #undef __FUNCT__ 895 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 896 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 897 { 898 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 899 PetscErrorCode ierr; 900 PetscMPIInt rank = aij->rank,size = aij->size; 901 PetscTruth isdraw,iascii,isbinary; 902 PetscViewer sviewer; 903 PetscViewerFormat format; 904 905 PetscFunctionBegin; 906 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 907 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 908 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 909 if (iascii) { 910 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 911 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 912 MatInfo info; 913 Mat_SeqAIJ *a = (Mat_SeqAIJ*) aij->A->data; 914 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 915 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 916 if (!a->inode.size) { 917 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 918 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 919 } else { 920 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 921 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 922 } 923 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 924 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 925 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 926 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 927 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 928 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 929 PetscFunctionReturn(0); 930 } else if (format == PETSC_VIEWER_ASCII_INFO) { 931 Mat_SeqAIJ *a = (Mat_SeqAIJ*) aij->A->data; 932 if (a->inode.size) { 933 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",a->inode.node_count,a->inode.limit);CHKERRQ(ierr); 934 } else { 935 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 936 } 937 PetscFunctionReturn(0); 938 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 939 PetscFunctionReturn(0); 940 } 941 } else if (isbinary) { 942 if (size == 1) { 943 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 944 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 945 } else { 946 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 947 } 948 PetscFunctionReturn(0); 949 } else if (isdraw) { 950 PetscDraw draw; 951 PetscTruth isnull; 952 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 953 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 954 } 955 956 if (size == 1) { 957 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 958 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 959 } else { 960 /* assemble the entire matrix onto first processor. */ 961 Mat A; 962 Mat_SeqAIJ *Aloc; 963 PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 964 PetscScalar *a; 965 966 if (!rank) { 967 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 968 } else { 969 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 970 } 971 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 972 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 973 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 974 PetscLogObjectParent(mat,A); 975 976 /* copy over the A part */ 977 Aloc = (Mat_SeqAIJ*)aij->A->data; 978 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 979 row = aij->rstart; 980 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 981 for (i=0; i<m; i++) { 982 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 983 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 984 } 985 aj = Aloc->j; 986 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 987 988 /* copy over the B part */ 989 Aloc = (Mat_SeqAIJ*)aij->B->data; 990 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 991 row = aij->rstart; 992 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 993 ct = cols; 994 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 995 for (i=0; i<m; i++) { 996 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 997 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 998 } 999 ierr = PetscFree(ct);CHKERRQ(ierr); 1000 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1001 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1002 /* 1003 Everyone has to call to draw the matrix since the graphics waits are 1004 synchronized across all processors that share the PetscDraw object 1005 */ 1006 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1007 if (!rank) { 1008 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1009 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1010 } 1011 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1012 ierr = MatDestroy(A);CHKERRQ(ierr); 1013 } 1014 PetscFunctionReturn(0); 1015 } 1016 1017 #undef __FUNCT__ 1018 #define __FUNCT__ "MatView_MPIAIJ" 1019 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1020 { 1021 PetscErrorCode ierr; 1022 PetscTruth iascii,isdraw,issocket,isbinary; 1023 1024 PetscFunctionBegin; 1025 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1026 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1027 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1028 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1029 if (iascii || isdraw || isbinary || issocket) { 1030 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1031 } else { 1032 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1033 } 1034 PetscFunctionReturn(0); 1035 } 1036 1037 1038 1039 #undef __FUNCT__ 1040 #define __FUNCT__ "MatRelax_MPIAIJ" 1041 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1042 { 1043 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1044 PetscErrorCode ierr; 1045 Vec bb1; 1046 PetscScalar mone=-1.0; 1047 1048 PetscFunctionBegin; 1049 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1050 1051 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1052 1053 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1054 if (flag & SOR_ZERO_INITIAL_GUESS) { 1055 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1056 its--; 1057 } 1058 1059 while (its--) { 1060 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1061 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1062 1063 /* update rhs: bb1 = bb - B*x */ 1064 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1065 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1066 1067 /* local sweep */ 1068 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1069 CHKERRQ(ierr); 1070 } 1071 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1072 if (flag & SOR_ZERO_INITIAL_GUESS) { 1073 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1074 its--; 1075 } 1076 while (its--) { 1077 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1078 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1079 1080 /* update rhs: bb1 = bb - B*x */ 1081 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1082 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1083 1084 /* local sweep */ 1085 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1086 CHKERRQ(ierr); 1087 } 1088 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1089 if (flag & SOR_ZERO_INITIAL_GUESS) { 1090 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1091 its--; 1092 } 1093 while (its--) { 1094 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1095 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1096 1097 /* update rhs: bb1 = bb - B*x */ 1098 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1099 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1100 1101 /* local sweep */ 1102 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1103 CHKERRQ(ierr); 1104 } 1105 } else { 1106 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1107 } 1108 1109 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1110 PetscFunctionReturn(0); 1111 } 1112 1113 #undef __FUNCT__ 1114 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1115 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1116 { 1117 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1118 Mat A = mat->A,B = mat->B; 1119 PetscErrorCode ierr; 1120 PetscReal isend[5],irecv[5]; 1121 1122 PetscFunctionBegin; 1123 info->block_size = 1.0; 1124 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1125 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1126 isend[3] = info->memory; isend[4] = info->mallocs; 1127 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1128 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1129 isend[3] += info->memory; isend[4] += info->mallocs; 1130 if (flag == MAT_LOCAL) { 1131 info->nz_used = isend[0]; 1132 info->nz_allocated = isend[1]; 1133 info->nz_unneeded = isend[2]; 1134 info->memory = isend[3]; 1135 info->mallocs = isend[4]; 1136 } else if (flag == MAT_GLOBAL_MAX) { 1137 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1138 info->nz_used = irecv[0]; 1139 info->nz_allocated = irecv[1]; 1140 info->nz_unneeded = irecv[2]; 1141 info->memory = irecv[3]; 1142 info->mallocs = irecv[4]; 1143 } else if (flag == MAT_GLOBAL_SUM) { 1144 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1145 info->nz_used = irecv[0]; 1146 info->nz_allocated = irecv[1]; 1147 info->nz_unneeded = irecv[2]; 1148 info->memory = irecv[3]; 1149 info->mallocs = irecv[4]; 1150 } 1151 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1152 info->fill_ratio_needed = 0; 1153 info->factor_mallocs = 0; 1154 info->rows_global = (double)matin->M; 1155 info->columns_global = (double)matin->N; 1156 info->rows_local = (double)matin->m; 1157 info->columns_local = (double)matin->N; 1158 1159 PetscFunctionReturn(0); 1160 } 1161 1162 #undef __FUNCT__ 1163 #define __FUNCT__ "MatSetOption_MPIAIJ" 1164 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1165 { 1166 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1167 PetscErrorCode ierr; 1168 1169 PetscFunctionBegin; 1170 switch (op) { 1171 case MAT_NO_NEW_NONZERO_LOCATIONS: 1172 case MAT_YES_NEW_NONZERO_LOCATIONS: 1173 case MAT_COLUMNS_UNSORTED: 1174 case MAT_COLUMNS_SORTED: 1175 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1176 case MAT_KEEP_ZEROED_ROWS: 1177 case MAT_NEW_NONZERO_LOCATION_ERR: 1178 case MAT_USE_INODES: 1179 case MAT_DO_NOT_USE_INODES: 1180 case MAT_IGNORE_ZERO_ENTRIES: 1181 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1182 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1183 break; 1184 case MAT_ROW_ORIENTED: 1185 a->roworiented = PETSC_TRUE; 1186 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1187 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1188 break; 1189 case MAT_ROWS_SORTED: 1190 case MAT_ROWS_UNSORTED: 1191 case MAT_YES_NEW_DIAGONALS: 1192 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1193 break; 1194 case MAT_COLUMN_ORIENTED: 1195 a->roworiented = PETSC_FALSE; 1196 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1197 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1198 break; 1199 case MAT_IGNORE_OFF_PROC_ENTRIES: 1200 a->donotstash = PETSC_TRUE; 1201 break; 1202 case MAT_NO_NEW_DIAGONALS: 1203 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1204 case MAT_SYMMETRIC: 1205 case MAT_STRUCTURALLY_SYMMETRIC: 1206 case MAT_HERMITIAN: 1207 case MAT_SYMMETRY_ETERNAL: 1208 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1209 break; 1210 case MAT_NOT_SYMMETRIC: 1211 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1212 case MAT_NOT_HERMITIAN: 1213 case MAT_NOT_SYMMETRY_ETERNAL: 1214 break; 1215 default: 1216 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1217 } 1218 PetscFunctionReturn(0); 1219 } 1220 1221 #undef __FUNCT__ 1222 #define __FUNCT__ "MatGetRow_MPIAIJ" 1223 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1224 { 1225 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1226 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1227 PetscErrorCode ierr; 1228 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1229 PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1230 PetscInt *cmap,*idx_p; 1231 1232 PetscFunctionBegin; 1233 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1234 mat->getrowactive = PETSC_TRUE; 1235 1236 if (!mat->rowvalues && (idx || v)) { 1237 /* 1238 allocate enough space to hold information from the longest row. 1239 */ 1240 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1241 PetscInt max = 1,tmp; 1242 for (i=0; i<matin->m; i++) { 1243 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1244 if (max < tmp) { max = tmp; } 1245 } 1246 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1247 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1248 } 1249 1250 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1251 lrow = row - rstart; 1252 1253 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1254 if (!v) {pvA = 0; pvB = 0;} 1255 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1256 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1257 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1258 nztot = nzA + nzB; 1259 1260 cmap = mat->garray; 1261 if (v || idx) { 1262 if (nztot) { 1263 /* Sort by increasing column numbers, assuming A and B already sorted */ 1264 PetscInt imark = -1; 1265 if (v) { 1266 *v = v_p = mat->rowvalues; 1267 for (i=0; i<nzB; i++) { 1268 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1269 else break; 1270 } 1271 imark = i; 1272 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1273 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1274 } 1275 if (idx) { 1276 *idx = idx_p = mat->rowindices; 1277 if (imark > -1) { 1278 for (i=0; i<imark; i++) { 1279 idx_p[i] = cmap[cworkB[i]]; 1280 } 1281 } else { 1282 for (i=0; i<nzB; i++) { 1283 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1284 else break; 1285 } 1286 imark = i; 1287 } 1288 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1289 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1290 } 1291 } else { 1292 if (idx) *idx = 0; 1293 if (v) *v = 0; 1294 } 1295 } 1296 *nz = nztot; 1297 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1298 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1299 PetscFunctionReturn(0); 1300 } 1301 1302 #undef __FUNCT__ 1303 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1304 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1305 { 1306 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1307 1308 PetscFunctionBegin; 1309 if (!aij->getrowactive) { 1310 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1311 } 1312 aij->getrowactive = PETSC_FALSE; 1313 PetscFunctionReturn(0); 1314 } 1315 1316 #undef __FUNCT__ 1317 #define __FUNCT__ "MatNorm_MPIAIJ" 1318 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1319 { 1320 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1321 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1322 PetscErrorCode ierr; 1323 PetscInt i,j,cstart = aij->cstart; 1324 PetscReal sum = 0.0; 1325 PetscScalar *v; 1326 1327 PetscFunctionBegin; 1328 if (aij->size == 1) { 1329 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1330 } else { 1331 if (type == NORM_FROBENIUS) { 1332 v = amat->a; 1333 for (i=0; i<amat->nz; i++) { 1334 #if defined(PETSC_USE_COMPLEX) 1335 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1336 #else 1337 sum += (*v)*(*v); v++; 1338 #endif 1339 } 1340 v = bmat->a; 1341 for (i=0; i<bmat->nz; i++) { 1342 #if defined(PETSC_USE_COMPLEX) 1343 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1344 #else 1345 sum += (*v)*(*v); v++; 1346 #endif 1347 } 1348 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1349 *norm = sqrt(*norm); 1350 } else if (type == NORM_1) { /* max column norm */ 1351 PetscReal *tmp,*tmp2; 1352 PetscInt *jj,*garray = aij->garray; 1353 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1354 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1355 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1356 *norm = 0.0; 1357 v = amat->a; jj = amat->j; 1358 for (j=0; j<amat->nz; j++) { 1359 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1360 } 1361 v = bmat->a; jj = bmat->j; 1362 for (j=0; j<bmat->nz; j++) { 1363 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1364 } 1365 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1366 for (j=0; j<mat->N; j++) { 1367 if (tmp2[j] > *norm) *norm = tmp2[j]; 1368 } 1369 ierr = PetscFree(tmp);CHKERRQ(ierr); 1370 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1371 } else if (type == NORM_INFINITY) { /* max row norm */ 1372 PetscReal ntemp = 0.0; 1373 for (j=0; j<aij->A->m; j++) { 1374 v = amat->a + amat->i[j]; 1375 sum = 0.0; 1376 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1377 sum += PetscAbsScalar(*v); v++; 1378 } 1379 v = bmat->a + bmat->i[j]; 1380 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1381 sum += PetscAbsScalar(*v); v++; 1382 } 1383 if (sum > ntemp) ntemp = sum; 1384 } 1385 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1386 } else { 1387 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1388 } 1389 } 1390 PetscFunctionReturn(0); 1391 } 1392 1393 #undef __FUNCT__ 1394 #define __FUNCT__ "MatTranspose_MPIAIJ" 1395 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1396 { 1397 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1398 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1399 PetscErrorCode ierr; 1400 PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1401 Mat B; 1402 PetscScalar *array; 1403 1404 PetscFunctionBegin; 1405 if (!matout && M != N) { 1406 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1407 } 1408 1409 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1410 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1411 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1412 1413 /* copy over the A part */ 1414 Aloc = (Mat_SeqAIJ*)a->A->data; 1415 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1416 row = a->rstart; 1417 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1418 for (i=0; i<m; i++) { 1419 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1420 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1421 } 1422 aj = Aloc->j; 1423 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1424 1425 /* copy over the B part */ 1426 Aloc = (Mat_SeqAIJ*)a->B->data; 1427 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1428 row = a->rstart; 1429 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1430 ct = cols; 1431 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1432 for (i=0; i<m; i++) { 1433 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1434 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1435 } 1436 ierr = PetscFree(ct);CHKERRQ(ierr); 1437 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1438 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1439 if (matout) { 1440 *matout = B; 1441 } else { 1442 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1443 } 1444 PetscFunctionReturn(0); 1445 } 1446 1447 #undef __FUNCT__ 1448 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1449 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1450 { 1451 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1452 Mat a = aij->A,b = aij->B; 1453 PetscErrorCode ierr; 1454 PetscInt s1,s2,s3; 1455 1456 PetscFunctionBegin; 1457 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1458 if (rr) { 1459 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1460 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1461 /* Overlap communication with computation. */ 1462 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1463 } 1464 if (ll) { 1465 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1466 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1467 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1468 } 1469 /* scale the diagonal block */ 1470 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1471 1472 if (rr) { 1473 /* Do a scatter end and then right scale the off-diagonal block */ 1474 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1475 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1476 } 1477 1478 PetscFunctionReturn(0); 1479 } 1480 1481 1482 #undef __FUNCT__ 1483 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1484 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1485 { 1486 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1487 PetscErrorCode ierr; 1488 1489 PetscFunctionBegin; 1490 if (!a->rank) { 1491 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1492 } 1493 PetscFunctionReturn(0); 1494 } 1495 1496 #undef __FUNCT__ 1497 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1498 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1499 { 1500 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1501 PetscErrorCode ierr; 1502 1503 PetscFunctionBegin; 1504 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1505 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1506 PetscFunctionReturn(0); 1507 } 1508 #undef __FUNCT__ 1509 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1510 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1511 { 1512 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1513 PetscErrorCode ierr; 1514 1515 PetscFunctionBegin; 1516 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1517 PetscFunctionReturn(0); 1518 } 1519 1520 #undef __FUNCT__ 1521 #define __FUNCT__ "MatEqual_MPIAIJ" 1522 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1523 { 1524 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1525 Mat a,b,c,d; 1526 PetscTruth flg; 1527 PetscErrorCode ierr; 1528 1529 PetscFunctionBegin; 1530 a = matA->A; b = matA->B; 1531 c = matB->A; d = matB->B; 1532 1533 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1534 if (flg) { 1535 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1536 } 1537 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1538 PetscFunctionReturn(0); 1539 } 1540 1541 #undef __FUNCT__ 1542 #define __FUNCT__ "MatCopy_MPIAIJ" 1543 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1544 { 1545 PetscErrorCode ierr; 1546 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1547 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1548 1549 PetscFunctionBegin; 1550 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1551 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1552 /* because of the column compression in the off-processor part of the matrix a->B, 1553 the number of columns in a->B and b->B may be different, hence we cannot call 1554 the MatCopy() directly on the two parts. If need be, we can provide a more 1555 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1556 then copying the submatrices */ 1557 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1558 } else { 1559 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1560 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1561 } 1562 PetscFunctionReturn(0); 1563 } 1564 1565 #undef __FUNCT__ 1566 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1567 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1568 { 1569 PetscErrorCode ierr; 1570 1571 PetscFunctionBegin; 1572 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1573 PetscFunctionReturn(0); 1574 } 1575 1576 #include "petscblaslapack.h" 1577 #undef __FUNCT__ 1578 #define __FUNCT__ "MatAXPY_MPIAIJ" 1579 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1580 { 1581 PetscErrorCode ierr; 1582 PetscInt i; 1583 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1584 PetscBLASInt bnz,one=1; 1585 Mat_SeqAIJ *x,*y; 1586 1587 PetscFunctionBegin; 1588 if (str == SAME_NONZERO_PATTERN) { 1589 x = (Mat_SeqAIJ *)xx->A->data; 1590 y = (Mat_SeqAIJ *)yy->A->data; 1591 bnz = (PetscBLASInt)x->nz; 1592 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1593 x = (Mat_SeqAIJ *)xx->B->data; 1594 y = (Mat_SeqAIJ *)yy->B->data; 1595 bnz = (PetscBLASInt)x->nz; 1596 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1597 } else if (str == SUBSET_NONZERO_PATTERN) { 1598 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1599 1600 x = (Mat_SeqAIJ *)xx->B->data; 1601 y = (Mat_SeqAIJ *)yy->B->data; 1602 if (y->xtoy && y->XtoY != xx->B) { 1603 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1604 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1605 } 1606 if (!y->xtoy) { /* get xtoy */ 1607 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1608 y->XtoY = xx->B; 1609 } 1610 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1611 } else { 1612 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1613 } 1614 PetscFunctionReturn(0); 1615 } 1616 1617 /* -------------------------------------------------------------------*/ 1618 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1619 MatGetRow_MPIAIJ, 1620 MatRestoreRow_MPIAIJ, 1621 MatMult_MPIAIJ, 1622 /* 4*/ MatMultAdd_MPIAIJ, 1623 MatMultTranspose_MPIAIJ, 1624 MatMultTransposeAdd_MPIAIJ, 1625 0, 1626 0, 1627 0, 1628 /*10*/ 0, 1629 0, 1630 0, 1631 MatRelax_MPIAIJ, 1632 MatTranspose_MPIAIJ, 1633 /*15*/ MatGetInfo_MPIAIJ, 1634 MatEqual_MPIAIJ, 1635 MatGetDiagonal_MPIAIJ, 1636 MatDiagonalScale_MPIAIJ, 1637 MatNorm_MPIAIJ, 1638 /*20*/ MatAssemblyBegin_MPIAIJ, 1639 MatAssemblyEnd_MPIAIJ, 1640 0, 1641 MatSetOption_MPIAIJ, 1642 MatZeroEntries_MPIAIJ, 1643 /*25*/ MatZeroRows_MPIAIJ, 1644 0, 1645 0, 1646 0, 1647 0, 1648 /*30*/ MatSetUpPreallocation_MPIAIJ, 1649 0, 1650 0, 1651 0, 1652 0, 1653 /*35*/ MatDuplicate_MPIAIJ, 1654 0, 1655 0, 1656 0, 1657 0, 1658 /*40*/ MatAXPY_MPIAIJ, 1659 MatGetSubMatrices_MPIAIJ, 1660 MatIncreaseOverlap_MPIAIJ, 1661 MatGetValues_MPIAIJ, 1662 MatCopy_MPIAIJ, 1663 /*45*/ MatPrintHelp_MPIAIJ, 1664 MatScale_MPIAIJ, 1665 0, 1666 0, 1667 0, 1668 /*50*/ MatSetBlockSize_MPIAIJ, 1669 0, 1670 0, 1671 0, 1672 0, 1673 /*55*/ MatFDColoringCreate_MPIAIJ, 1674 0, 1675 MatSetUnfactored_MPIAIJ, 1676 0, 1677 0, 1678 /*60*/ MatGetSubMatrix_MPIAIJ, 1679 MatDestroy_MPIAIJ, 1680 MatView_MPIAIJ, 1681 MatGetPetscMaps_Petsc, 1682 0, 1683 /*65*/ 0, 1684 0, 1685 0, 1686 0, 1687 0, 1688 /*70*/ 0, 1689 0, 1690 MatSetColoring_MPIAIJ, 1691 #if defined(PETSC_HAVE_ADIC) 1692 MatSetValuesAdic_MPIAIJ, 1693 #else 1694 0, 1695 #endif 1696 MatSetValuesAdifor_MPIAIJ, 1697 /*75*/ 0, 1698 0, 1699 0, 1700 0, 1701 0, 1702 /*80*/ 0, 1703 0, 1704 0, 1705 0, 1706 /*84*/ MatLoad_MPIAIJ, 1707 0, 1708 0, 1709 0, 1710 0, 1711 0, 1712 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1713 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1714 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1715 MatPtAP_MPIAIJ_MPIAIJ, 1716 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1717 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 1718 0, 1719 0, 1720 0}; 1721 1722 /* ----------------------------------------------------------------------------------------*/ 1723 1724 EXTERN_C_BEGIN 1725 #undef __FUNCT__ 1726 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1727 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 1728 { 1729 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1730 PetscErrorCode ierr; 1731 1732 PetscFunctionBegin; 1733 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1734 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1735 PetscFunctionReturn(0); 1736 } 1737 EXTERN_C_END 1738 1739 EXTERN_C_BEGIN 1740 #undef __FUNCT__ 1741 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1742 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 1743 { 1744 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1745 PetscErrorCode ierr; 1746 1747 PetscFunctionBegin; 1748 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1749 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1750 PetscFunctionReturn(0); 1751 } 1752 EXTERN_C_END 1753 1754 #include "petscpc.h" 1755 EXTERN_C_BEGIN 1756 #undef __FUNCT__ 1757 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1758 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1759 { 1760 Mat_MPIAIJ *b; 1761 PetscErrorCode ierr; 1762 PetscInt i; 1763 1764 PetscFunctionBegin; 1765 B->preallocated = PETSC_TRUE; 1766 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1767 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1768 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1769 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1770 if (d_nnz) { 1771 for (i=0; i<B->m; i++) { 1772 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1773 } 1774 } 1775 if (o_nnz) { 1776 for (i=0; i<B->m; i++) { 1777 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1778 } 1779 } 1780 b = (Mat_MPIAIJ*)B->data; 1781 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1782 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1783 1784 PetscFunctionReturn(0); 1785 } 1786 EXTERN_C_END 1787 1788 /*MC 1789 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1790 1791 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1792 and MATMPIAIJ otherwise. As a result, for single process communicators, 1793 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 1794 for communicators controlling multiple processes. It is recommended that you call both of 1795 the above preallocation routines for simplicity. 1796 1797 Options Database Keys: 1798 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 1799 1800 Level: beginner 1801 1802 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ 1803 M*/ 1804 1805 EXTERN_C_BEGIN 1806 #undef __FUNCT__ 1807 #define __FUNCT__ "MatCreate_AIJ" 1808 PetscErrorCode MatCreate_AIJ(Mat A) 1809 { 1810 PetscErrorCode ierr; 1811 PetscMPIInt size; 1812 1813 PetscFunctionBegin; 1814 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr); 1815 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1816 if (size == 1) { 1817 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1818 } else { 1819 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1820 } 1821 PetscFunctionReturn(0); 1822 } 1823 EXTERN_C_END 1824 1825 #undef __FUNCT__ 1826 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1827 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1828 { 1829 Mat mat; 1830 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1831 PetscErrorCode ierr; 1832 1833 PetscFunctionBegin; 1834 *newmat = 0; 1835 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1836 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1837 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1838 a = (Mat_MPIAIJ*)mat->data; 1839 1840 mat->factor = matin->factor; 1841 mat->bs = matin->bs; 1842 mat->assembled = PETSC_TRUE; 1843 mat->insertmode = NOT_SET_VALUES; 1844 mat->preallocated = PETSC_TRUE; 1845 1846 a->rstart = oldmat->rstart; 1847 a->rend = oldmat->rend; 1848 a->cstart = oldmat->cstart; 1849 a->cend = oldmat->cend; 1850 a->size = oldmat->size; 1851 a->rank = oldmat->rank; 1852 a->donotstash = oldmat->donotstash; 1853 a->roworiented = oldmat->roworiented; 1854 a->rowindices = 0; 1855 a->rowvalues = 0; 1856 a->getrowactive = PETSC_FALSE; 1857 1858 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1859 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1860 if (oldmat->colmap) { 1861 #if defined (PETSC_USE_CTABLE) 1862 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1863 #else 1864 ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1865 PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt)); 1866 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1867 #endif 1868 } else a->colmap = 0; 1869 if (oldmat->garray) { 1870 PetscInt len; 1871 len = oldmat->B->n; 1872 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1873 PetscLogObjectMemory(mat,len*sizeof(PetscInt)); 1874 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1875 } else a->garray = 0; 1876 1877 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1878 PetscLogObjectParent(mat,a->lvec); 1879 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1880 PetscLogObjectParent(mat,a->Mvctx); 1881 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1882 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1883 PetscLogObjectParent(mat,a->A); 1884 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1885 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1886 PetscLogObjectParent(mat,a->B); 1887 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1888 *newmat = mat; 1889 PetscFunctionReturn(0); 1890 } 1891 1892 #include "petscsys.h" 1893 1894 #undef __FUNCT__ 1895 #define __FUNCT__ "MatLoad_MPIAIJ" 1896 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1897 { 1898 Mat A; 1899 PetscScalar *vals,*svals; 1900 MPI_Comm comm = ((PetscObject)viewer)->comm; 1901 MPI_Status status; 1902 PetscErrorCode ierr; 1903 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1904 PetscInt i,nz,j,rstart,rend; 1905 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1906 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1907 PetscInt cend,cstart,n,*rowners; 1908 int fd; 1909 1910 PetscFunctionBegin; 1911 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1912 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1913 if (!rank) { 1914 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1915 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1916 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1917 } 1918 1919 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1920 M = header[1]; N = header[2]; 1921 /* determine ownership of all rows */ 1922 m = M/size + ((M % size) > rank); 1923 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1924 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1925 rowners[0] = 0; 1926 for (i=2; i<=size; i++) { 1927 rowners[i] += rowners[i-1]; 1928 } 1929 rstart = rowners[rank]; 1930 rend = rowners[rank+1]; 1931 1932 /* distribute row lengths to all processors */ 1933 ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr); 1934 if (!rank) { 1935 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 1936 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1937 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1938 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1939 for (j=0; j<m; j++) { 1940 procsnz[0] += ourlens[j]; 1941 } 1942 for (i=1; i<size; i++) { 1943 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 1944 /* calculate the number of nonzeros on each processor */ 1945 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 1946 procsnz[i] += rowlengths[j]; 1947 } 1948 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1949 } 1950 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1951 } else { 1952 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1953 } 1954 1955 if (!rank) { 1956 /* determine max buffer needed and allocate it */ 1957 maxnz = 0; 1958 for (i=0; i<size; i++) { 1959 maxnz = PetscMax(maxnz,procsnz[i]); 1960 } 1961 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1962 1963 /* read in my part of the matrix column indices */ 1964 nz = procsnz[0]; 1965 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1966 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1967 1968 /* read in every one elses and ship off */ 1969 for (i=1; i<size; i++) { 1970 nz = procsnz[i]; 1971 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1972 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1973 } 1974 ierr = PetscFree(cols);CHKERRQ(ierr); 1975 } else { 1976 /* determine buffer space needed for message */ 1977 nz = 0; 1978 for (i=0; i<m; i++) { 1979 nz += ourlens[i]; 1980 } 1981 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1982 1983 /* receive message of column indices*/ 1984 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1985 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1986 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1987 } 1988 1989 /* determine column ownership if matrix is not square */ 1990 if (N != M) { 1991 n = N/size + ((N % size) > rank); 1992 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1993 cstart = cend - n; 1994 } else { 1995 cstart = rstart; 1996 cend = rend; 1997 n = cend - cstart; 1998 } 1999 2000 /* loop over local rows, determining number of off diagonal entries */ 2001 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2002 jj = 0; 2003 for (i=0; i<m; i++) { 2004 for (j=0; j<ourlens[i]; j++) { 2005 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2006 jj++; 2007 } 2008 } 2009 2010 /* create our matrix */ 2011 for (i=0; i<m; i++) { 2012 ourlens[i] -= offlens[i]; 2013 } 2014 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2015 ierr = MatSetType(A,type);CHKERRQ(ierr); 2016 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2017 2018 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2019 for (i=0; i<m; i++) { 2020 ourlens[i] += offlens[i]; 2021 } 2022 2023 if (!rank) { 2024 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2025 2026 /* read in my part of the matrix numerical values */ 2027 nz = procsnz[0]; 2028 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2029 2030 /* insert into matrix */ 2031 jj = rstart; 2032 smycols = mycols; 2033 svals = vals; 2034 for (i=0; i<m; i++) { 2035 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2036 smycols += ourlens[i]; 2037 svals += ourlens[i]; 2038 jj++; 2039 } 2040 2041 /* read in other processors and ship out */ 2042 for (i=1; i<size; i++) { 2043 nz = procsnz[i]; 2044 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2045 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2046 } 2047 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2048 } else { 2049 /* receive numeric values */ 2050 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2051 2052 /* receive message of values*/ 2053 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2054 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2055 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2056 2057 /* insert into matrix */ 2058 jj = rstart; 2059 smycols = mycols; 2060 svals = vals; 2061 for (i=0; i<m; i++) { 2062 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2063 smycols += ourlens[i]; 2064 svals += ourlens[i]; 2065 jj++; 2066 } 2067 } 2068 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2069 ierr = PetscFree(vals);CHKERRQ(ierr); 2070 ierr = PetscFree(mycols);CHKERRQ(ierr); 2071 ierr = PetscFree(rowners);CHKERRQ(ierr); 2072 2073 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2074 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2075 *newmat = A; 2076 PetscFunctionReturn(0); 2077 } 2078 2079 #undef __FUNCT__ 2080 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2081 /* 2082 Not great since it makes two copies of the submatrix, first an SeqAIJ 2083 in local and then by concatenating the local matrices the end result. 2084 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2085 */ 2086 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2087 { 2088 PetscErrorCode ierr; 2089 PetscMPIInt rank,size; 2090 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2091 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2092 Mat *local,M,Mreuse; 2093 PetscScalar *vwork,*aa; 2094 MPI_Comm comm = mat->comm; 2095 Mat_SeqAIJ *aij; 2096 2097 2098 PetscFunctionBegin; 2099 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2100 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2101 2102 if (call == MAT_REUSE_MATRIX) { 2103 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2104 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2105 local = &Mreuse; 2106 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2107 } else { 2108 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2109 Mreuse = *local; 2110 ierr = PetscFree(local);CHKERRQ(ierr); 2111 } 2112 2113 /* 2114 m - number of local rows 2115 n - number of columns (same on all processors) 2116 rstart - first row in new global matrix generated 2117 */ 2118 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2119 if (call == MAT_INITIAL_MATRIX) { 2120 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2121 ii = aij->i; 2122 jj = aij->j; 2123 2124 /* 2125 Determine the number of non-zeros in the diagonal and off-diagonal 2126 portions of the matrix in order to do correct preallocation 2127 */ 2128 2129 /* first get start and end of "diagonal" columns */ 2130 if (csize == PETSC_DECIDE) { 2131 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2132 if (mglobal == n) { /* square matrix */ 2133 nlocal = m; 2134 } else { 2135 nlocal = n/size + ((n % size) > rank); 2136 } 2137 } else { 2138 nlocal = csize; 2139 } 2140 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2141 rstart = rend - nlocal; 2142 if (rank == size - 1 && rend != n) { 2143 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2144 } 2145 2146 /* next, compute all the lengths */ 2147 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2148 olens = dlens + m; 2149 for (i=0; i<m; i++) { 2150 jend = ii[i+1] - ii[i]; 2151 olen = 0; 2152 dlen = 0; 2153 for (j=0; j<jend; j++) { 2154 if (*jj < rstart || *jj >= rend) olen++; 2155 else dlen++; 2156 jj++; 2157 } 2158 olens[i] = olen; 2159 dlens[i] = dlen; 2160 } 2161 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2162 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2163 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2164 ierr = PetscFree(dlens);CHKERRQ(ierr); 2165 } else { 2166 PetscInt ml,nl; 2167 2168 M = *newmat; 2169 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2170 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2171 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2172 /* 2173 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2174 rather than the slower MatSetValues(). 2175 */ 2176 M->was_assembled = PETSC_TRUE; 2177 M->assembled = PETSC_FALSE; 2178 } 2179 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2180 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2181 ii = aij->i; 2182 jj = aij->j; 2183 aa = aij->a; 2184 for (i=0; i<m; i++) { 2185 row = rstart + i; 2186 nz = ii[i+1] - ii[i]; 2187 cwork = jj; jj += nz; 2188 vwork = aa; aa += nz; 2189 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2190 } 2191 2192 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2193 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2194 *newmat = M; 2195 2196 /* save submatrix used in processor for next request */ 2197 if (call == MAT_INITIAL_MATRIX) { 2198 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2199 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2200 } 2201 2202 PetscFunctionReturn(0); 2203 } 2204 2205 EXTERN_C_BEGIN 2206 #undef __FUNCT__ 2207 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2208 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2209 { 2210 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2211 PetscInt m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2212 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2213 const PetscInt *JJ; 2214 PetscScalar *values; 2215 PetscErrorCode ierr; 2216 2217 PetscFunctionBegin; 2218 #if defined(PETSC_OPT_g) 2219 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2220 #endif 2221 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2222 o_nnz = d_nnz + m; 2223 2224 for (i=0; i<m; i++) { 2225 nnz = I[i+1]- I[i]; 2226 JJ = J + I[i]; 2227 nnz_max = PetscMax(nnz_max,nnz); 2228 #if defined(PETSC_OPT_g) 2229 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2230 #endif 2231 for (j=0; j<nnz; j++) { 2232 if (*JJ >= cstart) break; 2233 JJ++; 2234 } 2235 d = 0; 2236 for (; j<nnz; j++) { 2237 if (*JJ++ >= cend) break; 2238 d++; 2239 } 2240 d_nnz[i] = d; 2241 o_nnz[i] = nnz - d; 2242 } 2243 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2244 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2245 2246 if (v) values = (PetscScalar*)v; 2247 else { 2248 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2249 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2250 } 2251 2252 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2253 for (i=0; i<m; i++) { 2254 ii = i + rstart; 2255 nnz = I[i+1]- I[i]; 2256 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr); 2257 } 2258 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2259 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2260 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2261 2262 if (!v) { 2263 ierr = PetscFree(values);CHKERRQ(ierr); 2264 } 2265 PetscFunctionReturn(0); 2266 } 2267 EXTERN_C_END 2268 2269 #undef __FUNCT__ 2270 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2271 /*@C 2272 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2273 (the default parallel PETSc format). 2274 2275 Collective on MPI_Comm 2276 2277 Input Parameters: 2278 + A - the matrix 2279 . i - the indices into j for the start of each local row (starts with zero) 2280 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2281 - v - optional values in the matrix 2282 2283 Level: developer 2284 2285 .keywords: matrix, aij, compressed row, sparse, parallel 2286 2287 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2288 @*/ 2289 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2290 { 2291 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2292 2293 PetscFunctionBegin; 2294 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2295 if (f) { 2296 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2297 } 2298 PetscFunctionReturn(0); 2299 } 2300 2301 #undef __FUNCT__ 2302 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2303 /*@C 2304 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2305 (the default parallel PETSc format). For good matrix assembly performance 2306 the user should preallocate the matrix storage by setting the parameters 2307 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2308 performance can be increased by more than a factor of 50. 2309 2310 Collective on MPI_Comm 2311 2312 Input Parameters: 2313 + A - the matrix 2314 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2315 (same value is used for all local rows) 2316 . d_nnz - array containing the number of nonzeros in the various rows of the 2317 DIAGONAL portion of the local submatrix (possibly different for each row) 2318 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2319 The size of this array is equal to the number of local rows, i.e 'm'. 2320 You must leave room for the diagonal entry even if it is zero. 2321 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2322 submatrix (same value is used for all local rows). 2323 - o_nnz - array containing the number of nonzeros in the various rows of the 2324 OFF-DIAGONAL portion of the local submatrix (possibly different for 2325 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2326 structure. The size of this array is equal to the number 2327 of local rows, i.e 'm'. 2328 2329 If the *_nnz parameter is given then the *_nz parameter is ignored 2330 2331 The AIJ format (also called the Yale sparse matrix format or 2332 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2333 storage. The stored row and column indices begin with zero. See the users manual for details. 2334 2335 The parallel matrix is partitioned such that the first m0 rows belong to 2336 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2337 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2338 2339 The DIAGONAL portion of the local submatrix of a processor can be defined 2340 as the submatrix which is obtained by extraction the part corresponding 2341 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2342 first row that belongs to the processor, and r2 is the last row belonging 2343 to the this processor. This is a square mxm matrix. The remaining portion 2344 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2345 2346 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2347 2348 Example usage: 2349 2350 Consider the following 8x8 matrix with 34 non-zero values, that is 2351 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2352 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2353 as follows: 2354 2355 .vb 2356 1 2 0 | 0 3 0 | 0 4 2357 Proc0 0 5 6 | 7 0 0 | 8 0 2358 9 0 10 | 11 0 0 | 12 0 2359 ------------------------------------- 2360 13 0 14 | 15 16 17 | 0 0 2361 Proc1 0 18 0 | 19 20 21 | 0 0 2362 0 0 0 | 22 23 0 | 24 0 2363 ------------------------------------- 2364 Proc2 25 26 27 | 0 0 28 | 29 0 2365 30 0 0 | 31 32 33 | 0 34 2366 .ve 2367 2368 This can be represented as a collection of submatrices as: 2369 2370 .vb 2371 A B C 2372 D E F 2373 G H I 2374 .ve 2375 2376 Where the submatrices A,B,C are owned by proc0, D,E,F are 2377 owned by proc1, G,H,I are owned by proc2. 2378 2379 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2380 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2381 The 'M','N' parameters are 8,8, and have the same values on all procs. 2382 2383 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2384 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2385 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2386 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2387 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2388 matrix, ans [DF] as another SeqAIJ matrix. 2389 2390 When d_nz, o_nz parameters are specified, d_nz storage elements are 2391 allocated for every row of the local diagonal submatrix, and o_nz 2392 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2393 One way to choose d_nz and o_nz is to use the max nonzerors per local 2394 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2395 In this case, the values of d_nz,o_nz are: 2396 .vb 2397 proc0 : dnz = 2, o_nz = 2 2398 proc1 : dnz = 3, o_nz = 2 2399 proc2 : dnz = 1, o_nz = 4 2400 .ve 2401 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2402 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2403 for proc3. i.e we are using 12+15+10=37 storage locations to store 2404 34 values. 2405 2406 When d_nnz, o_nnz parameters are specified, the storage is specified 2407 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2408 In the above case the values for d_nnz,o_nnz are: 2409 .vb 2410 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2411 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2412 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2413 .ve 2414 Here the space allocated is sum of all the above values i.e 34, and 2415 hence pre-allocation is perfect. 2416 2417 Level: intermediate 2418 2419 .keywords: matrix, aij, compressed row, sparse, parallel 2420 2421 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2422 MPIAIJ 2423 @*/ 2424 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2425 { 2426 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2427 2428 PetscFunctionBegin; 2429 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2430 if (f) { 2431 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2432 } 2433 PetscFunctionReturn(0); 2434 } 2435 2436 #undef __FUNCT__ 2437 #define __FUNCT__ "MatCreateMPIAIJ" 2438 /*@C 2439 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2440 (the default parallel PETSc format). For good matrix assembly performance 2441 the user should preallocate the matrix storage by setting the parameters 2442 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2443 performance can be increased by more than a factor of 50. 2444 2445 Collective on MPI_Comm 2446 2447 Input Parameters: 2448 + comm - MPI communicator 2449 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2450 This value should be the same as the local size used in creating the 2451 y vector for the matrix-vector product y = Ax. 2452 . n - This value should be the same as the local size used in creating the 2453 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2454 calculated if N is given) For square matrices n is almost always m. 2455 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2456 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2457 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2458 (same value is used for all local rows) 2459 . d_nnz - array containing the number of nonzeros in the various rows of the 2460 DIAGONAL portion of the local submatrix (possibly different for each row) 2461 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2462 The size of this array is equal to the number of local rows, i.e 'm'. 2463 You must leave room for the diagonal entry even if it is zero. 2464 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2465 submatrix (same value is used for all local rows). 2466 - o_nnz - array containing the number of nonzeros in the various rows of the 2467 OFF-DIAGONAL portion of the local submatrix (possibly different for 2468 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2469 structure. The size of this array is equal to the number 2470 of local rows, i.e 'm'. 2471 2472 Output Parameter: 2473 . A - the matrix 2474 2475 Notes: 2476 If the *_nnz parameter is given then the *_nz parameter is ignored 2477 2478 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2479 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2480 storage requirements for this matrix. 2481 2482 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2483 processor than it must be used on all processors that share the object for 2484 that argument. 2485 2486 The user MUST specify either the local or global matrix dimensions 2487 (possibly both). 2488 2489 The parallel matrix is partitioned such that the first m0 rows belong to 2490 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2491 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2492 2493 The DIAGONAL portion of the local submatrix of a processor can be defined 2494 as the submatrix which is obtained by extraction the part corresponding 2495 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2496 first row that belongs to the processor, and r2 is the last row belonging 2497 to the this processor. This is a square mxm matrix. The remaining portion 2498 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2499 2500 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2501 2502 When calling this routine with a single process communicator, a matrix of 2503 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2504 type of communicator, use the construction mechanism: 2505 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2506 2507 By default, this format uses inodes (identical nodes) when possible. 2508 We search for consecutive rows with the same nonzero structure, thereby 2509 reusing matrix information to achieve increased efficiency. 2510 2511 Options Database Keys: 2512 + -mat_aij_no_inode - Do not use inodes 2513 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2514 - -mat_aij_oneindex - Internally use indexing starting at 1 2515 rather than 0. Note that when calling MatSetValues(), 2516 the user still MUST index entries starting at 0! 2517 2518 2519 Example usage: 2520 2521 Consider the following 8x8 matrix with 34 non-zero values, that is 2522 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2523 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2524 as follows: 2525 2526 .vb 2527 1 2 0 | 0 3 0 | 0 4 2528 Proc0 0 5 6 | 7 0 0 | 8 0 2529 9 0 10 | 11 0 0 | 12 0 2530 ------------------------------------- 2531 13 0 14 | 15 16 17 | 0 0 2532 Proc1 0 18 0 | 19 20 21 | 0 0 2533 0 0 0 | 22 23 0 | 24 0 2534 ------------------------------------- 2535 Proc2 25 26 27 | 0 0 28 | 29 0 2536 30 0 0 | 31 32 33 | 0 34 2537 .ve 2538 2539 This can be represented as a collection of submatrices as: 2540 2541 .vb 2542 A B C 2543 D E F 2544 G H I 2545 .ve 2546 2547 Where the submatrices A,B,C are owned by proc0, D,E,F are 2548 owned by proc1, G,H,I are owned by proc2. 2549 2550 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2551 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2552 The 'M','N' parameters are 8,8, and have the same values on all procs. 2553 2554 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2555 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2556 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2557 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2558 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2559 matrix, ans [DF] as another SeqAIJ matrix. 2560 2561 When d_nz, o_nz parameters are specified, d_nz storage elements are 2562 allocated for every row of the local diagonal submatrix, and o_nz 2563 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2564 One way to choose d_nz and o_nz is to use the max nonzerors per local 2565 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2566 In this case, the values of d_nz,o_nz are: 2567 .vb 2568 proc0 : dnz = 2, o_nz = 2 2569 proc1 : dnz = 3, o_nz = 2 2570 proc2 : dnz = 1, o_nz = 4 2571 .ve 2572 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2573 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2574 for proc3. i.e we are using 12+15+10=37 storage locations to store 2575 34 values. 2576 2577 When d_nnz, o_nnz parameters are specified, the storage is specified 2578 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2579 In the above case the values for d_nnz,o_nnz are: 2580 .vb 2581 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2582 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2583 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2584 .ve 2585 Here the space allocated is sum of all the above values i.e 34, and 2586 hence pre-allocation is perfect. 2587 2588 Level: intermediate 2589 2590 .keywords: matrix, aij, compressed row, sparse, parallel 2591 2592 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2593 MPIAIJ 2594 @*/ 2595 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2596 { 2597 PetscErrorCode ierr; 2598 PetscMPIInt size; 2599 2600 PetscFunctionBegin; 2601 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2602 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2603 if (size > 1) { 2604 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2605 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2606 } else { 2607 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2608 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2609 } 2610 PetscFunctionReturn(0); 2611 } 2612 2613 #undef __FUNCT__ 2614 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2615 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2616 { 2617 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2618 2619 PetscFunctionBegin; 2620 *Ad = a->A; 2621 *Ao = a->B; 2622 *colmap = a->garray; 2623 PetscFunctionReturn(0); 2624 } 2625 2626 #undef __FUNCT__ 2627 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2628 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2629 { 2630 PetscErrorCode ierr; 2631 PetscInt i; 2632 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2633 2634 PetscFunctionBegin; 2635 if (coloring->ctype == IS_COLORING_LOCAL) { 2636 ISColoringValue *allcolors,*colors; 2637 ISColoring ocoloring; 2638 2639 /* set coloring for diagonal portion */ 2640 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2641 2642 /* set coloring for off-diagonal portion */ 2643 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2644 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2645 for (i=0; i<a->B->n; i++) { 2646 colors[i] = allcolors[a->garray[i]]; 2647 } 2648 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2649 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2650 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2651 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2652 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2653 ISColoringValue *colors; 2654 PetscInt *larray; 2655 ISColoring ocoloring; 2656 2657 /* set coloring for diagonal portion */ 2658 ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2659 for (i=0; i<a->A->n; i++) { 2660 larray[i] = i + a->cstart; 2661 } 2662 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2663 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2664 for (i=0; i<a->A->n; i++) { 2665 colors[i] = coloring->colors[larray[i]]; 2666 } 2667 ierr = PetscFree(larray);CHKERRQ(ierr); 2668 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2669 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2670 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2671 2672 /* set coloring for off-diagonal portion */ 2673 ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2674 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2675 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2676 for (i=0; i<a->B->n; i++) { 2677 colors[i] = coloring->colors[larray[i]]; 2678 } 2679 ierr = PetscFree(larray);CHKERRQ(ierr); 2680 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2681 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2682 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2683 } else { 2684 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2685 } 2686 2687 PetscFunctionReturn(0); 2688 } 2689 2690 #if defined(PETSC_HAVE_ADIC) 2691 #undef __FUNCT__ 2692 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2693 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2694 { 2695 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2696 PetscErrorCode ierr; 2697 2698 PetscFunctionBegin; 2699 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2700 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2701 PetscFunctionReturn(0); 2702 } 2703 #endif 2704 2705 #undef __FUNCT__ 2706 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2707 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2708 { 2709 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2710 PetscErrorCode ierr; 2711 2712 PetscFunctionBegin; 2713 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2714 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2715 PetscFunctionReturn(0); 2716 } 2717 2718 #undef __FUNCT__ 2719 #define __FUNCT__ "MatMerge" 2720 /*@C 2721 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2722 matrices from each processor 2723 2724 Collective on MPI_Comm 2725 2726 Input Parameters: 2727 + comm - the communicators the parallel matrix will live on 2728 . inmat - the input sequential matrices 2729 . n - number of local columns (or PETSC_DECIDE) 2730 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2731 2732 Output Parameter: 2733 . outmat - the parallel matrix generated 2734 2735 Level: advanced 2736 2737 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2738 2739 @*/ 2740 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2741 { 2742 PetscErrorCode ierr; 2743 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2744 PetscInt *indx; 2745 PetscScalar *values; 2746 PetscMap columnmap,rowmap; 2747 2748 PetscFunctionBegin; 2749 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2750 /* 2751 PetscMPIInt rank; 2752 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2753 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N); 2754 */ 2755 if (scall == MAT_INITIAL_MATRIX){ 2756 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2757 if (n == PETSC_DECIDE){ 2758 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2759 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2760 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2761 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2762 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2763 } 2764 2765 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2766 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2767 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2768 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2769 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2770 2771 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2772 for (i=0;i<m;i++) { 2773 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2774 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2775 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2776 } 2777 /* This routine will ONLY return MPIAIJ type matrix */ 2778 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2779 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2780 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2781 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2782 2783 } else if (scall == MAT_REUSE_MATRIX){ 2784 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2785 } else { 2786 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2787 } 2788 2789 for (i=0;i<m;i++) { 2790 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2791 I = i + rstart; 2792 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2793 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2794 } 2795 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2796 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2797 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2798 2799 PetscFunctionReturn(0); 2800 } 2801 2802 #undef __FUNCT__ 2803 #define __FUNCT__ "MatFileSplit" 2804 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2805 { 2806 PetscErrorCode ierr; 2807 PetscMPIInt rank; 2808 PetscInt m,N,i,rstart,nnz; 2809 size_t len; 2810 const PetscInt *indx; 2811 PetscViewer out; 2812 char *name; 2813 Mat B; 2814 const PetscScalar *values; 2815 2816 PetscFunctionBegin; 2817 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2818 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2819 /* Should this be the type of the diagonal block of A? */ 2820 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2821 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2822 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2823 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2824 for (i=0;i<m;i++) { 2825 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2826 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2827 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2828 } 2829 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2830 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2831 2832 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2833 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2834 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2835 sprintf(name,"%s.%d",outfile,rank); 2836 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2837 ierr = PetscFree(name); 2838 ierr = MatView(B,out);CHKERRQ(ierr); 2839 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2840 ierr = MatDestroy(B);CHKERRQ(ierr); 2841 PetscFunctionReturn(0); 2842 } 2843 2844 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2845 #undef __FUNCT__ 2846 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2847 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2848 { 2849 PetscErrorCode ierr; 2850 Mat_Merge_SeqsToMPI *merge; 2851 PetscObjectContainer container; 2852 2853 PetscFunctionBegin; 2854 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2855 if (container) { 2856 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2857 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2858 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2859 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2860 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2861 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2862 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2863 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2864 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2865 if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);} 2866 if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);} 2867 if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);} 2868 2869 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2870 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2871 } 2872 ierr = PetscFree(merge);CHKERRQ(ierr); 2873 2874 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2875 PetscFunctionReturn(0); 2876 } 2877 2878 #include "src/mat/utils/freespace.h" 2879 #include "petscbt.h" 2880 #undef __FUNCT__ 2881 #define __FUNCT__ "MatMerge_SeqsToMPI" 2882 /*@C 2883 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2884 matrices from each processor 2885 2886 Collective on MPI_Comm 2887 2888 Input Parameters: 2889 + comm - the communicators the parallel matrix will live on 2890 . seqmat - the input sequential matrices 2891 . m - number of local rows (or PETSC_DECIDE) 2892 . n - number of local columns (or PETSC_DECIDE) 2893 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2894 2895 Output Parameter: 2896 . mpimat - the parallel matrix generated 2897 2898 Level: advanced 2899 2900 Notes: 2901 The dimensions of the sequential matrix in each processor MUST be the same. 2902 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2903 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2904 @*/ 2905 static PetscEvent logkey_seqstompinum = 0; 2906 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2907 { 2908 PetscErrorCode ierr; 2909 MPI_Comm comm=mpimat->comm; 2910 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2911 PetscMPIInt size,rank,taga,*len_s; 2912 PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2913 PetscInt proc,m; 2914 PetscInt **buf_ri,**buf_rj; 2915 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2916 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2917 MPI_Request *s_waits,*r_waits; 2918 MPI_Status *status; 2919 MatScalar *aa=a->a,**abuf_r,*ba_i; 2920 Mat_Merge_SeqsToMPI *merge; 2921 PetscObjectContainer container; 2922 2923 PetscFunctionBegin; 2924 if (!logkey_seqstompinum) { 2925 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2926 } 2927 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2928 2929 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2930 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2931 2932 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2933 if (container) { 2934 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2935 } 2936 bi = merge->bi; 2937 bj = merge->bj; 2938 buf_ri = merge->buf_ri; 2939 buf_rj = merge->buf_rj; 2940 2941 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2942 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2943 len_s = merge->len_s; 2944 2945 /* send and recv matrix values */ 2946 /*-----------------------------*/ 2947 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2948 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 2949 2950 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2951 for (proc=0,k=0; proc<size; proc++){ 2952 if (!len_s[proc]) continue; 2953 i = owners[proc]; 2954 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 2955 k++; 2956 } 2957 2958 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 2959 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 2960 ierr = PetscFree(status);CHKERRQ(ierr); 2961 2962 ierr = PetscFree(s_waits);CHKERRQ(ierr); 2963 ierr = PetscFree(r_waits);CHKERRQ(ierr); 2964 2965 /* insert mat values of mpimat */ 2966 /*----------------------------*/ 2967 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 2968 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 2969 nextrow = buf_ri_k + merge->nrecv; 2970 nextai = nextrow + merge->nrecv; 2971 2972 for (k=0; k<merge->nrecv; k++){ 2973 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 2974 nrows = *(buf_ri_k[k]); 2975 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 2976 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 2977 } 2978 2979 /* set values of ba */ 2980 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2981 for (i=0; i<m; i++) { 2982 arow = owners[rank] + i; 2983 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 2984 bnzi = bi[i+1] - bi[i]; 2985 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 2986 2987 /* add local non-zero vals of this proc's seqmat into ba */ 2988 anzi = ai[arow+1] - ai[arow]; 2989 aj = a->j + ai[arow]; 2990 aa = a->a + ai[arow]; 2991 nextaj = 0; 2992 for (j=0; nextaj<anzi; j++){ 2993 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2994 ba_i[j] += aa[nextaj++]; 2995 } 2996 } 2997 2998 /* add received vals into ba */ 2999 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3000 /* i-th row */ 3001 if (i == *nextrow[k]) { 3002 anzi = *(nextai[k]+1) - *nextai[k]; 3003 aj = buf_rj[k] + *(nextai[k]); 3004 aa = abuf_r[k] + *(nextai[k]); 3005 nextaj = 0; 3006 for (j=0; nextaj<anzi; j++){ 3007 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3008 ba_i[j] += aa[nextaj++]; 3009 } 3010 } 3011 nextrow[k]++; nextai[k]++; 3012 } 3013 } 3014 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3015 } 3016 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3017 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3018 3019 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3020 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3021 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3022 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3023 PetscFunctionReturn(0); 3024 } 3025 static PetscEvent logkey_seqstompisym = 0; 3026 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3027 { 3028 PetscErrorCode ierr; 3029 Mat B_mpi; 3030 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3031 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3032 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3033 PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 3034 PetscInt len,proc,*dnz,*onz; 3035 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3036 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3037 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3038 MPI_Status *status; 3039 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3040 PetscBT lnkbt; 3041 Mat_Merge_SeqsToMPI *merge; 3042 PetscObjectContainer container; 3043 3044 PetscFunctionBegin; 3045 if (!logkey_seqstompisym) { 3046 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3047 } 3048 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3049 3050 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3051 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3052 3053 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3054 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3055 3056 /* determine row ownership */ 3057 /*---------------------------------------------------------*/ 3058 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3059 if (m == PETSC_DECIDE) { 3060 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 3061 } else { 3062 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3063 } 3064 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 3065 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3066 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3067 3068 if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); } 3069 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3070 3071 /* determine the number of messages to send, their lengths */ 3072 /*---------------------------------------------------------*/ 3073 len_s = merge->len_s; 3074 3075 len = 0; /* length of buf_si[] */ 3076 merge->nsend = 0; 3077 for (proc=0; proc<size; proc++){ 3078 len_si[proc] = 0; 3079 if (proc == rank){ 3080 len_s[proc] = 0; 3081 } else { 3082 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3083 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3084 } 3085 if (len_s[proc]) { 3086 merge->nsend++; 3087 nrows = 0; 3088 for (i=owners[proc]; i<owners[proc+1]; i++){ 3089 if (ai[i+1] > ai[i]) nrows++; 3090 } 3091 len_si[proc] = 2*(nrows+1); 3092 len += len_si[proc]; 3093 } 3094 } 3095 3096 /* determine the number and length of messages to receive for ij-structure */ 3097 /*-------------------------------------------------------------------------*/ 3098 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3099 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3100 3101 /* post the Irecv of j-structure */ 3102 /*-------------------------------*/ 3103 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3104 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3105 3106 /* post the Isend of j-structure */ 3107 /*--------------------------------*/ 3108 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3109 sj_waits = si_waits + merge->nsend; 3110 3111 for (proc=0, k=0; proc<size; proc++){ 3112 if (!len_s[proc]) continue; 3113 i = owners[proc]; 3114 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3115 k++; 3116 } 3117 3118 /* receives and sends of j-structure are complete */ 3119 /*------------------------------------------------*/ 3120 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 3121 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 3122 3123 /* send and recv i-structure */ 3124 /*---------------------------*/ 3125 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3126 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3127 3128 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3129 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3130 for (proc=0,k=0; proc<size; proc++){ 3131 if (!len_s[proc]) continue; 3132 /* form outgoing message for i-structure: 3133 buf_si[0]: nrows to be sent 3134 [1:nrows]: row index (global) 3135 [nrows+1:2*nrows+1]: i-structure index 3136 */ 3137 /*-------------------------------------------*/ 3138 nrows = len_si[proc]/2 - 1; 3139 buf_si_i = buf_si + nrows+1; 3140 buf_si[0] = nrows; 3141 buf_si_i[0] = 0; 3142 nrows = 0; 3143 for (i=owners[proc]; i<owners[proc+1]; i++){ 3144 anzi = ai[i+1] - ai[i]; 3145 if (anzi) { 3146 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3147 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3148 nrows++; 3149 } 3150 } 3151 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3152 k++; 3153 buf_si += len_si[proc]; 3154 } 3155 3156 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 3157 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 3158 3159 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3160 for (i=0; i<merge->nrecv; i++){ 3161 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3162 } 3163 3164 ierr = PetscFree(len_si);CHKERRQ(ierr); 3165 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3166 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3167 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3168 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3169 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3170 ierr = PetscFree(status);CHKERRQ(ierr); 3171 3172 /* compute a local seq matrix in each processor */ 3173 /*----------------------------------------------*/ 3174 /* allocate bi array and free space for accumulating nonzero column info */ 3175 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3176 bi[0] = 0; 3177 3178 /* create and initialize a linked list */ 3179 nlnk = N+1; 3180 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3181 3182 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3183 len = 0; 3184 len = ai[owners[rank+1]] - ai[owners[rank]]; 3185 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3186 current_space = free_space; 3187 3188 /* determine symbolic info for each local row */ 3189 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3190 nextrow = buf_ri_k + merge->nrecv; 3191 nextai = nextrow + merge->nrecv; 3192 for (k=0; k<merge->nrecv; k++){ 3193 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3194 nrows = *buf_ri_k[k]; 3195 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3196 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3197 } 3198 3199 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3200 len = 0; 3201 for (i=0;i<m;i++) { 3202 bnzi = 0; 3203 /* add local non-zero cols of this proc's seqmat into lnk */ 3204 arow = owners[rank] + i; 3205 anzi = ai[arow+1] - ai[arow]; 3206 aj = a->j + ai[arow]; 3207 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3208 bnzi += nlnk; 3209 /* add received col data into lnk */ 3210 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3211 if (i == *nextrow[k]) { /* i-th row */ 3212 anzi = *(nextai[k]+1) - *nextai[k]; 3213 aj = buf_rj[k] + *nextai[k]; 3214 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3215 bnzi += nlnk; 3216 nextrow[k]++; nextai[k]++; 3217 } 3218 } 3219 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3220 3221 /* if free space is not available, make more free space */ 3222 if (current_space->local_remaining<bnzi) { 3223 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3224 nspacedouble++; 3225 } 3226 /* copy data into free space, then initialize lnk */ 3227 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3228 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3229 3230 current_space->array += bnzi; 3231 current_space->local_used += bnzi; 3232 current_space->local_remaining -= bnzi; 3233 3234 bi[i+1] = bi[i] + bnzi; 3235 } 3236 3237 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3238 3239 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3240 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3241 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3242 3243 /* create symbolic parallel matrix B_mpi */ 3244 /*---------------------------------------*/ 3245 if (n==PETSC_DECIDE) { 3246 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr); 3247 } else { 3248 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 3249 } 3250 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3251 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3252 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3253 3254 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3255 B_mpi->assembled = PETSC_FALSE; 3256 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3257 merge->bi = bi; 3258 merge->bj = bj; 3259 merge->buf_ri = buf_ri; 3260 merge->buf_rj = buf_rj; 3261 merge->coi = PETSC_NULL; 3262 merge->coj = PETSC_NULL; 3263 merge->owners_co = PETSC_NULL; 3264 3265 /* attach the supporting struct to B_mpi for reuse */ 3266 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3267 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3268 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3269 *mpimat = B_mpi; 3270 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3271 PetscFunctionReturn(0); 3272 } 3273 3274 static PetscEvent logkey_seqstompi = 0; 3275 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3276 { 3277 PetscErrorCode ierr; 3278 3279 PetscFunctionBegin; 3280 if (!logkey_seqstompi) { 3281 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3282 } 3283 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3284 if (scall == MAT_INITIAL_MATRIX){ 3285 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3286 } 3287 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3288 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3289 PetscFunctionReturn(0); 3290 } 3291 static PetscEvent logkey_getlocalmat = 0; 3292 #undef __FUNCT__ 3293 #define __FUNCT__ "MatGetLocalMat" 3294 /*@C 3295 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3296 3297 Not Collective 3298 3299 Input Parameters: 3300 + A - the matrix 3301 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3302 3303 Output Parameter: 3304 . A_loc - the local sequential matrix generated 3305 3306 Level: developer 3307 3308 @*/ 3309 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3310 { 3311 PetscErrorCode ierr; 3312 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3313 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3314 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3315 PetscScalar *aa=a->a,*ba=b->a,*ca; 3316 PetscInt am=A->m,i,j,k,cstart=mpimat->cstart; 3317 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3318 3319 PetscFunctionBegin; 3320 if (!logkey_getlocalmat) { 3321 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3322 } 3323 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3324 if (scall == MAT_INITIAL_MATRIX){ 3325 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3326 ci[0] = 0; 3327 for (i=0; i<am; i++){ 3328 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3329 } 3330 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3331 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3332 k = 0; 3333 for (i=0; i<am; i++) { 3334 ncols_o = bi[i+1] - bi[i]; 3335 ncols_d = ai[i+1] - ai[i]; 3336 /* off-diagonal portion of A */ 3337 for (jo=0; jo<ncols_o; jo++) { 3338 col = cmap[*bj]; 3339 if (col >= cstart) break; 3340 cj[k] = col; bj++; 3341 ca[k++] = *ba++; 3342 } 3343 /* diagonal portion of A */ 3344 for (j=0; j<ncols_d; j++) { 3345 cj[k] = cstart + *aj++; 3346 ca[k++] = *aa++; 3347 } 3348 /* off-diagonal portion of A */ 3349 for (j=jo; j<ncols_o; j++) { 3350 cj[k] = cmap[*bj++]; 3351 ca[k++] = *ba++; 3352 } 3353 } 3354 /* put together the new matrix */ 3355 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3356 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3357 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3358 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3359 mat->freedata = PETSC_TRUE; 3360 mat->nonew = 0; 3361 } else if (scall == MAT_REUSE_MATRIX){ 3362 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3363 ci = mat->i; cj = mat->j; ca = mat->a; 3364 for (i=0; i<am; i++) { 3365 /* off-diagonal portion of A */ 3366 ncols_o = bi[i+1] - bi[i]; 3367 for (jo=0; jo<ncols_o; jo++) { 3368 col = cmap[*bj]; 3369 if (col >= cstart) break; 3370 *ca++ = *ba++; bj++; 3371 } 3372 /* diagonal portion of A */ 3373 ncols_d = ai[i+1] - ai[i]; 3374 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3375 /* off-diagonal portion of A */ 3376 for (j=jo; j<ncols_o; j++) { 3377 *ca++ = *ba++; bj++; 3378 } 3379 } 3380 } else { 3381 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3382 } 3383 3384 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3385 PetscFunctionReturn(0); 3386 } 3387 3388 static PetscEvent logkey_getlocalmatcondensed = 0; 3389 #undef __FUNCT__ 3390 #define __FUNCT__ "MatGetLocalMatCondensed" 3391 /*@C 3392 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3393 3394 Not Collective 3395 3396 Input Parameters: 3397 + A - the matrix 3398 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3399 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3400 3401 Output Parameter: 3402 . A_loc - the local sequential matrix generated 3403 3404 Level: developer 3405 3406 @*/ 3407 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3408 { 3409 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3410 PetscErrorCode ierr; 3411 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3412 IS isrowa,iscola; 3413 Mat *aloc; 3414 3415 PetscFunctionBegin; 3416 if (!logkey_getlocalmatcondensed) { 3417 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3418 } 3419 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3420 if (!row){ 3421 start = a->rstart; end = a->rend; 3422 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3423 } else { 3424 isrowa = *row; 3425 } 3426 if (!col){ 3427 start = a->cstart; 3428 cmap = a->garray; 3429 nzA = a->A->n; 3430 nzB = a->B->n; 3431 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3432 ncols = 0; 3433 for (i=0; i<nzB; i++) { 3434 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3435 else break; 3436 } 3437 imark = i; 3438 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3439 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3440 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3441 ierr = PetscFree(idx);CHKERRQ(ierr); 3442 } else { 3443 iscola = *col; 3444 } 3445 if (scall != MAT_INITIAL_MATRIX){ 3446 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3447 aloc[0] = *A_loc; 3448 } 3449 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3450 *A_loc = aloc[0]; 3451 ierr = PetscFree(aloc);CHKERRQ(ierr); 3452 if (!row){ 3453 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3454 } 3455 if (!col){ 3456 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3457 } 3458 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3459 PetscFunctionReturn(0); 3460 } 3461 3462 static PetscEvent logkey_GetBrowsOfAcols = 0; 3463 #undef __FUNCT__ 3464 #define __FUNCT__ "MatGetBrowsOfAcols" 3465 /*@C 3466 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3467 3468 Collective on Mat 3469 3470 Input Parameters: 3471 + A,B - the matrices in mpiaij format 3472 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3473 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3474 3475 Output Parameter: 3476 + rowb, colb - index sets of rows and columns of B to extract 3477 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3478 - B_seq - the sequential matrix generated 3479 3480 Level: developer 3481 3482 @*/ 3483 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3484 { 3485 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3486 PetscErrorCode ierr; 3487 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3488 IS isrowb,iscolb; 3489 Mat *bseq; 3490 3491 PetscFunctionBegin; 3492 if (a->cstart != b->rstart || a->cend != b->rend){ 3493 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3494 } 3495 if (!logkey_GetBrowsOfAcols) { 3496 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3497 } 3498 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3499 3500 if (scall == MAT_INITIAL_MATRIX){ 3501 start = a->cstart; 3502 cmap = a->garray; 3503 nzA = a->A->n; 3504 nzB = a->B->n; 3505 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3506 ncols = 0; 3507 for (i=0; i<nzB; i++) { /* row < local row index */ 3508 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3509 else break; 3510 } 3511 imark = i; 3512 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3513 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3514 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3515 ierr = PetscFree(idx);CHKERRQ(ierr); 3516 *brstart = imark; 3517 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3518 } else { 3519 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3520 isrowb = *rowb; iscolb = *colb; 3521 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3522 bseq[0] = *B_seq; 3523 } 3524 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3525 *B_seq = bseq[0]; 3526 ierr = PetscFree(bseq);CHKERRQ(ierr); 3527 if (!rowb){ 3528 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3529 } else { 3530 *rowb = isrowb; 3531 } 3532 if (!colb){ 3533 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3534 } else { 3535 *colb = iscolb; 3536 } 3537 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3538 PetscFunctionReturn(0); 3539 } 3540 3541 static PetscEvent logkey_GetBrowsOfAocols = 0; 3542 #undef __FUNCT__ 3543 #define __FUNCT__ "MatGetBrowsOfAoCols" 3544 /*@C 3545 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3546 of the OFF-DIAGONAL portion of local A 3547 3548 Collective on Mat 3549 3550 Input Parameters: 3551 + A,B - the matrices in mpiaij format 3552 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3553 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3554 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3555 3556 Output Parameter: 3557 + B_oth - the sequential matrix generated 3558 3559 Level: developer 3560 3561 @*/ 3562 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3563 { 3564 VecScatter_MPI_General *gen_to,*gen_from; 3565 PetscErrorCode ierr; 3566 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3567 Mat_SeqAIJ *b_oth; 3568 VecScatter ctx=a->Mvctx; 3569 MPI_Comm comm=ctx->comm; 3570 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3571 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj; 3572 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3573 PetscInt i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3574 MPI_Request *rwaits,*swaits; 3575 MPI_Status *sstatus,rstatus; 3576 PetscInt *cols; 3577 PetscScalar *vals; 3578 PetscMPIInt j; 3579 3580 PetscFunctionBegin; 3581 if (a->cstart != b->rstart || a->cend != b->rend){ 3582 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3583 } 3584 if (!logkey_GetBrowsOfAocols) { 3585 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3586 } 3587 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3588 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3589 3590 gen_to = (VecScatter_MPI_General*)ctx->todata; 3591 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3592 rvalues = gen_from->values; /* holds the length of sending row */ 3593 svalues = gen_to->values; /* holds the length of receiving row */ 3594 nrecvs = gen_from->n; 3595 nsends = gen_to->n; 3596 rwaits = gen_from->requests; 3597 swaits = gen_to->requests; 3598 srow = gen_to->indices; /* local row index to be sent */ 3599 rstarts = gen_from->starts; 3600 sstarts = gen_to->starts; 3601 rprocs = gen_from->procs; 3602 sprocs = gen_to->procs; 3603 sstatus = gen_to->sstatus; 3604 3605 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3606 if (scall == MAT_INITIAL_MATRIX){ 3607 /* i-array */ 3608 /*---------*/ 3609 /* post receives */ 3610 for (i=0; i<nrecvs; i++){ 3611 rowlen = (PetscInt*)rvalues + rstarts[i]; 3612 nrows = rstarts[i+1]-rstarts[i]; 3613 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3614 } 3615 3616 /* pack the outgoing message */ 3617 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3618 rstartsj = sstartsj + nsends +1; 3619 sstartsj[0] = 0; rstartsj[0] = 0; 3620 len = 0; /* total length of j or a array to be sent */ 3621 k = 0; 3622 for (i=0; i<nsends; i++){ 3623 rowlen = (PetscInt*)svalues + sstarts[i]; 3624 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3625 for (j=0; j<nrows; j++) { 3626 row = srow[k] + b->rowners[rank]; /* global row idx */ 3627 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3628 len += rowlen[j]; 3629 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3630 k++; 3631 } 3632 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3633 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3634 } 3635 /* recvs and sends of i-array are completed */ 3636 i = nrecvs; 3637 while (i--) { 3638 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3639 } 3640 if (nsends) { 3641 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3642 } 3643 /* allocate buffers for sending j and a arrays */ 3644 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3645 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3646 3647 /* create i-array of B_oth */ 3648 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3649 b_othi[0] = 0; 3650 len = 0; /* total length of j or a array to be received */ 3651 k = 0; 3652 for (i=0; i<nrecvs; i++){ 3653 rowlen = (PetscInt*)rvalues + rstarts[i]; 3654 nrows = rstarts[i+1]-rstarts[i]; 3655 for (j=0; j<nrows; j++) { 3656 b_othi[k+1] = b_othi[k] + rowlen[j]; 3657 len += rowlen[j]; k++; 3658 } 3659 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3660 } 3661 3662 /* allocate space for j and a arrrays of B_oth */ 3663 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3664 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3665 3666 /* j-array */ 3667 /*---------*/ 3668 /* post receives of j-array */ 3669 for (i=0; i<nrecvs; i++){ 3670 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3671 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3672 } 3673 k = 0; 3674 for (i=0; i<nsends; i++){ 3675 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3676 bufJ = bufj+sstartsj[i]; 3677 for (j=0; j<nrows; j++) { 3678 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3679 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3680 for (l=0; l<ncols; l++){ 3681 *bufJ++ = cols[l]; 3682 } 3683 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3684 } 3685 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3686 } 3687 3688 /* recvs and sends of j-array are completed */ 3689 i = nrecvs; 3690 while (i--) { 3691 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3692 } 3693 if (nsends) { 3694 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3695 } 3696 } else if (scall == MAT_REUSE_MATRIX){ 3697 sstartsj = *startsj; 3698 rstartsj = sstartsj + nsends +1; 3699 bufa = *bufa_ptr; 3700 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3701 b_otha = b_oth->a; 3702 } else { 3703 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3704 } 3705 3706 /* a-array */ 3707 /*---------*/ 3708 /* post receives of a-array */ 3709 for (i=0; i<nrecvs; i++){ 3710 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3711 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3712 } 3713 k = 0; 3714 for (i=0; i<nsends; i++){ 3715 nrows = sstarts[i+1]-sstarts[i]; 3716 bufA = bufa+sstartsj[i]; 3717 for (j=0; j<nrows; j++) { 3718 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3719 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3720 for (l=0; l<ncols; l++){ 3721 *bufA++ = vals[l]; 3722 } 3723 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3724 3725 } 3726 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3727 } 3728 /* recvs and sends of a-array are completed */ 3729 i = nrecvs; 3730 while (i--) { 3731 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3732 } 3733 if (nsends) { 3734 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3735 } 3736 3737 if (scall == MAT_INITIAL_MATRIX){ 3738 /* put together the new matrix */ 3739 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3740 3741 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3742 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3743 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3744 b_oth->freedata = PETSC_TRUE; 3745 b_oth->nonew = 0; 3746 3747 ierr = PetscFree(bufj);CHKERRQ(ierr); 3748 if (!startsj || !bufa_ptr){ 3749 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3750 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3751 } else { 3752 *startsj = sstartsj; 3753 *bufa_ptr = bufa; 3754 } 3755 } 3756 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3757 3758 PetscFunctionReturn(0); 3759 } 3760 3761 /*MC 3762 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3763 3764 Options Database Keys: 3765 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3766 3767 Level: beginner 3768 3769 .seealso: MatCreateMPIAIJ 3770 M*/ 3771 3772 EXTERN_C_BEGIN 3773 #undef __FUNCT__ 3774 #define __FUNCT__ "MatCreate_MPIAIJ" 3775 PetscErrorCode MatCreate_MPIAIJ(Mat B) 3776 { 3777 Mat_MPIAIJ *b; 3778 PetscErrorCode ierr; 3779 PetscInt i; 3780 PetscMPIInt size; 3781 3782 PetscFunctionBegin; 3783 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3784 3785 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3786 B->data = (void*)b; 3787 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3788 B->factor = 0; 3789 B->bs = 1; 3790 B->assembled = PETSC_FALSE; 3791 B->mapping = 0; 3792 3793 B->insertmode = NOT_SET_VALUES; 3794 b->size = size; 3795 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3796 3797 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 3798 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 3799 3800 /* the information in the maps duplicates the information computed below, eventually 3801 we should remove the duplicate information that is not contained in the maps */ 3802 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 3803 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 3804 3805 /* build local table of row and column ownerships */ 3806 ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 3807 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 3808 b->cowners = b->rowners + b->size + 2; 3809 ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3810 b->rowners[0] = 0; 3811 for (i=2; i<=b->size; i++) { 3812 b->rowners[i] += b->rowners[i-1]; 3813 } 3814 b->rstart = b->rowners[b->rank]; 3815 b->rend = b->rowners[b->rank+1]; 3816 ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3817 b->cowners[0] = 0; 3818 for (i=2; i<=b->size; i++) { 3819 b->cowners[i] += b->cowners[i-1]; 3820 } 3821 b->cstart = b->cowners[b->rank]; 3822 b->cend = b->cowners[b->rank+1]; 3823 3824 /* build cache for off array entries formed */ 3825 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3826 b->donotstash = PETSC_FALSE; 3827 b->colmap = 0; 3828 b->garray = 0; 3829 b->roworiented = PETSC_TRUE; 3830 3831 /* stuff used for matrix vector multiply */ 3832 b->lvec = PETSC_NULL; 3833 b->Mvctx = PETSC_NULL; 3834 3835 /* stuff for MatGetRow() */ 3836 b->rowindices = 0; 3837 b->rowvalues = 0; 3838 b->getrowactive = PETSC_FALSE; 3839 3840 /* Explicitly create 2 MATSEQAIJ matrices. */ 3841 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 3842 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3843 PetscLogObjectParent(B,b->A); 3844 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 3845 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3846 PetscLogObjectParent(B,b->B); 3847 3848 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3849 "MatStoreValues_MPIAIJ", 3850 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3851 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3852 "MatRetrieveValues_MPIAIJ", 3853 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3854 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3855 "MatGetDiagonalBlock_MPIAIJ", 3856 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3857 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3858 "MatIsTranspose_MPIAIJ", 3859 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3860 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3861 "MatMPIAIJSetPreallocation_MPIAIJ", 3862 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3863 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3864 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3865 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3866 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3867 "MatDiagonalScaleLocal_MPIAIJ", 3868 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3869 PetscFunctionReturn(0); 3870 } 3871 EXTERN_C_END 3872