1 2 #include "src/mat/impls/aij/mpi/mpiaij.h" 3 #include "src/inline/spops.h" 4 5 /* 6 Local utility routine that creates a mapping from the global column 7 number to the local number in the off-diagonal part of the local 8 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 9 a slightly higher hash table cost; without it it is not scalable (each processor 10 has an order N integer array but is fast to acess. 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 17 PetscErrorCode ierr; 18 PetscInt n = aij->B->n,i; 19 20 PetscFunctionBegin; 21 #if defined (PETSC_USE_CTABLE) 22 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 23 for (i=0; i<n; i++){ 24 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 25 } 26 #else 27 ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 28 PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt)); 29 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 30 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 31 #endif 32 PetscFunctionReturn(0); 33 } 34 35 #define CHUNKSIZE 15 36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 40 rmax = aimax[row]; nrow = ailen[row]; \ 41 col1 = col - shift; \ 42 \ 43 low = 0; high = nrow; \ 44 while (high-low > 5) { \ 45 t = (low+high)/2; \ 46 if (rp[t] > col) high = t; \ 47 else low = t; \ 48 } \ 49 for (_i=low; _i<high; _i++) { \ 50 if (rp[_i] > col1) break; \ 51 if (rp[_i] == col1) { \ 52 if (addv == ADD_VALUES) ap[_i] += value; \ 53 else ap[_i] = value; \ 54 goto a_noinsert; \ 55 } \ 56 } \ 57 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 58 if (nonew == 1) goto a_noinsert; \ 59 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 60 if (nrow >= rmax) { \ 61 /* there is no extra room in row, therefore enlarge */ \ 62 PetscInt new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 63 PetscScalar *new_a; \ 64 \ 65 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 66 \ 67 /* malloc new storage space */ \ 68 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \ 69 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 70 new_j = (PetscInt*)(new_a + new_nz); \ 71 new_i = new_j + new_nz; \ 72 \ 73 /* copy over old data into new slots */ \ 74 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 75 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 76 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 77 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 78 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 79 len*sizeof(PetscInt));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 81 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 82 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 83 /* free up old matrix storage */ \ 84 \ 85 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 86 if (!a->singlemalloc) { \ 87 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 88 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 89 } \ 90 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 91 a->singlemalloc = PETSC_TRUE; \ 92 \ 93 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 94 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 95 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \ 96 a->maxnz += CHUNKSIZE; \ 97 a->reallocs++; \ 98 } \ 99 N = nrow++ - 1; a->nz++; \ 100 /* shift up all the later entries in this row */ \ 101 for (ii=N; ii>=_i; ii--) { \ 102 rp[ii+1] = rp[ii]; \ 103 ap[ii+1] = ap[ii]; \ 104 } \ 105 rp[_i] = col1; \ 106 ap[_i] = value; \ 107 a_noinsert: ; \ 108 ailen[row] = nrow; \ 109 } 110 111 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 112 { \ 113 \ 114 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 115 rmax = bimax[row]; nrow = bilen[row]; \ 116 col1 = col - shift; \ 117 \ 118 low = 0; high = nrow; \ 119 while (high-low > 5) { \ 120 t = (low+high)/2; \ 121 if (rp[t] > col) high = t; \ 122 else low = t; \ 123 } \ 124 for (_i=low; _i<high; _i++) { \ 125 if (rp[_i] > col1) break; \ 126 if (rp[_i] == col1) { \ 127 if (addv == ADD_VALUES) ap[_i] += value; \ 128 else ap[_i] = value; \ 129 goto b_noinsert; \ 130 } \ 131 } \ 132 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 133 if (nonew == 1) goto b_noinsert; \ 134 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 135 if (nrow >= rmax) { \ 136 /* there is no extra room in row, therefore enlarge */ \ 137 PetscInt new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 138 PetscScalar *new_a; \ 139 \ 140 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 141 \ 142 /* malloc new storage space */ \ 143 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \ 144 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 145 new_j = (PetscInt*)(new_a + new_nz); \ 146 new_i = new_j + new_nz; \ 147 \ 148 /* copy over old data into new slots */ \ 149 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 150 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 151 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 152 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 153 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 154 len*sizeof(PetscInt));CHKERRQ(ierr); \ 155 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 156 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 157 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 158 /* free up old matrix storage */ \ 159 \ 160 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 161 if (!b->singlemalloc) { \ 162 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 163 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 164 } \ 165 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 166 b->singlemalloc = PETSC_TRUE; \ 167 \ 168 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 169 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 170 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \ 171 b->maxnz += CHUNKSIZE; \ 172 b->reallocs++; \ 173 } \ 174 N = nrow++ - 1; b->nz++; \ 175 /* shift up all the later entries in this row */ \ 176 for (ii=N; ii>=_i; ii--) { \ 177 rp[ii+1] = rp[ii]; \ 178 ap[ii+1] = ap[ii]; \ 179 } \ 180 rp[_i] = col1; \ 181 ap[_i] = value; \ 182 b_noinsert: ; \ 183 bilen[row] = nrow; \ 184 } 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "MatSetValues_MPIAIJ" 188 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 189 { 190 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 191 PetscScalar value; 192 PetscErrorCode ierr; 193 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 194 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 195 PetscTruth roworiented = aij->roworiented; 196 197 /* Some Variables required in the macro */ 198 Mat A = aij->A; 199 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 200 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 201 PetscScalar *aa = a->a; 202 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 203 Mat B = aij->B; 204 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 205 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 206 PetscScalar *ba = b->a; 207 208 PetscInt *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 209 PetscInt nonew = a->nonew,shift=0; 210 PetscScalar *ap; 211 212 PetscFunctionBegin; 213 for (i=0; i<m; i++) { 214 if (im[i] < 0) continue; 215 #if defined(PETSC_USE_DEBUG) 216 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 217 #endif 218 if (im[i] >= rstart && im[i] < rend) { 219 row = im[i] - rstart; 220 for (j=0; j<n; j++) { 221 if (in[j] >= cstart && in[j] < cend){ 222 col = in[j] - cstart; 223 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 224 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 225 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 226 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 227 } else if (in[j] < 0) continue; 228 #if defined(PETSC_USE_DEBUG) 229 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 230 #endif 231 else { 232 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 233 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 234 if (mat->was_assembled) { 235 if (!aij->colmap) { 236 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 237 } 238 #if defined (PETSC_USE_CTABLE) 239 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 240 col--; 241 #else 242 col = aij->colmap[in[j]] - 1; 243 #endif 244 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 245 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 246 col = in[j]; 247 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 248 B = aij->B; 249 b = (Mat_SeqAIJ*)B->data; 250 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 251 ba = b->a; 252 } 253 } else col = in[j]; 254 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 255 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 256 } 257 } 258 } else { 259 if (!aij->donotstash) { 260 if (roworiented) { 261 if (ignorezeroentries && v[i*n] == 0.0) continue; 262 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 263 } else { 264 if (ignorezeroentries && v[i] == 0.0) continue; 265 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 266 } 267 } 268 } 269 } 270 PetscFunctionReturn(0); 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatGetValues_MPIAIJ" 275 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 276 { 277 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 278 PetscErrorCode ierr; 279 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 280 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 281 282 PetscFunctionBegin; 283 for (i=0; i<m; i++) { 284 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 285 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 286 if (idxm[i] >= rstart && idxm[i] < rend) { 287 row = idxm[i] - rstart; 288 for (j=0; j<n; j++) { 289 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 290 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 291 if (idxn[j] >= cstart && idxn[j] < cend){ 292 col = idxn[j] - cstart; 293 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 294 } else { 295 if (!aij->colmap) { 296 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 297 } 298 #if defined (PETSC_USE_CTABLE) 299 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 300 col --; 301 #else 302 col = aij->colmap[idxn[j]] - 1; 303 #endif 304 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 305 else { 306 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 307 } 308 } 309 } 310 } else { 311 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 312 } 313 } 314 PetscFunctionReturn(0); 315 } 316 317 #undef __FUNCT__ 318 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 319 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 320 { 321 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 322 PetscErrorCode ierr; 323 PetscInt nstash,reallocs; 324 InsertMode addv; 325 326 PetscFunctionBegin; 327 if (aij->donotstash) { 328 PetscFunctionReturn(0); 329 } 330 331 /* make sure all processors are either in INSERTMODE or ADDMODE */ 332 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 333 if (addv == (ADD_VALUES|INSERT_VALUES)) { 334 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 335 } 336 mat->insertmode = addv; /* in case this processor had no cache */ 337 338 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 339 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 340 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs); 341 PetscFunctionReturn(0); 342 } 343 344 #undef __FUNCT__ 345 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 346 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 347 { 348 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 349 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 350 PetscErrorCode ierr; 351 PetscMPIInt n; 352 PetscInt i,j,rstart,ncols,flg; 353 PetscInt *row,*col,other_disassembled; 354 PetscScalar *val; 355 InsertMode addv = mat->insertmode; 356 357 PetscFunctionBegin; 358 if (!aij->donotstash) { 359 while (1) { 360 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 361 if (!flg) break; 362 363 for (i=0; i<n;) { 364 /* Now identify the consecutive vals belonging to the same row */ 365 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 366 if (j < n) ncols = j-i; 367 else ncols = n-i; 368 /* Now assemble all these values with a single function call */ 369 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 370 i = j; 371 } 372 } 373 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 374 } 375 a->compressedrow.use = PETSC_FALSE; 376 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 377 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 378 379 /* determine if any processor has disassembled, if so we must 380 also disassemble ourselfs, in order that we may reassemble. */ 381 /* 382 if nonzero structure of submatrix B cannot change then we know that 383 no processor disassembled thus we can skip this stuff 384 */ 385 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 386 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 387 if (mat->was_assembled && !other_disassembled) { 388 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 389 /* reaccess the b because aij->B was changed */ 390 b = (Mat_SeqAIJ *)aij->B->data; 391 } 392 } 393 394 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 395 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 396 } 397 398 b->inode.use = PETSC_FALSE; 399 b->compressedrow.use = PETSC_TRUE; 400 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 401 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 402 403 if (aij->rowvalues) { 404 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 405 aij->rowvalues = 0; 406 } 407 408 /* used by MatAXPY() */ 409 a->xtoy = 0; b->xtoy = 0; 410 a->XtoY = 0; b->XtoY = 0; 411 412 PetscFunctionReturn(0); 413 } 414 415 #undef __FUNCT__ 416 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 417 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 418 { 419 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 420 PetscErrorCode ierr; 421 422 PetscFunctionBegin; 423 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 424 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatZeroRows_MPIAIJ" 430 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 431 { 432 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 433 PetscErrorCode ierr; 434 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag; 435 PetscInt i,N,*rows,*owners = l->rowners; 436 PetscInt *nprocs,j,idx,nsends,row; 437 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 438 PetscInt *rvalues,count,base,slen,*source; 439 PetscInt *lens,*lrows,*values,rstart=l->rstart; 440 MPI_Comm comm = A->comm; 441 MPI_Request *send_waits,*recv_waits; 442 MPI_Status recv_status,*send_status; 443 IS istmp; 444 PetscTruth found; 445 446 PetscFunctionBegin; 447 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 448 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 449 450 /* first count number of contributors to each processor */ 451 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 452 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 453 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 454 for (i=0; i<N; i++) { 455 idx = rows[i]; 456 found = PETSC_FALSE; 457 for (j=0; j<size; j++) { 458 if (idx >= owners[j] && idx < owners[j+1]) { 459 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 460 } 461 } 462 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 463 } 464 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 465 466 /* inform other processors of number of messages and max length*/ 467 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 468 469 /* post receives: */ 470 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 471 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 472 for (i=0; i<nrecvs; i++) { 473 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 474 } 475 476 /* do sends: 477 1) starts[i] gives the starting index in svalues for stuff going to 478 the ith processor 479 */ 480 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 481 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 482 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 483 starts[0] = 0; 484 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 485 for (i=0; i<N; i++) { 486 svalues[starts[owner[i]]++] = rows[i]; 487 } 488 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 489 490 starts[0] = 0; 491 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 492 count = 0; 493 for (i=0; i<size; i++) { 494 if (nprocs[2*i+1]) { 495 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 496 } 497 } 498 ierr = PetscFree(starts);CHKERRQ(ierr); 499 500 base = owners[rank]; 501 502 /* wait on receives */ 503 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 504 source = lens + nrecvs; 505 count = nrecvs; slen = 0; 506 while (count) { 507 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 508 /* unpack receives into our local space */ 509 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 510 source[imdex] = recv_status.MPI_SOURCE; 511 lens[imdex] = n; 512 slen += n; 513 count--; 514 } 515 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 516 517 /* move the data into the send scatter */ 518 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 519 count = 0; 520 for (i=0; i<nrecvs; i++) { 521 values = rvalues + i*nmax; 522 for (j=0; j<lens[i]; j++) { 523 lrows[count++] = values[j] - base; 524 } 525 } 526 ierr = PetscFree(rvalues);CHKERRQ(ierr); 527 ierr = PetscFree(lens);CHKERRQ(ierr); 528 ierr = PetscFree(owner);CHKERRQ(ierr); 529 ierr = PetscFree(nprocs);CHKERRQ(ierr); 530 531 /* actually zap the local rows */ 532 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 533 PetscLogObjectParent(A,istmp); 534 535 /* 536 Zero the required rows. If the "diagonal block" of the matrix 537 is square and the user wishes to set the diagonal we use seperate 538 code so that MatSetValues() is not called for each diagonal allocating 539 new memory, thus calling lots of mallocs and slowing things down. 540 541 Contributed by: Mathew Knepley 542 */ 543 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 544 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 545 if (diag && (l->A->M == l->A->N)) { 546 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 547 } else if (diag) { 548 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 549 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 550 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 551 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 552 } 553 for (i = 0; i < slen; i++) { 554 row = lrows[i] + rstart; 555 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 556 } 557 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 558 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 559 } else { 560 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 561 } 562 ierr = ISDestroy(istmp);CHKERRQ(ierr); 563 ierr = PetscFree(lrows);CHKERRQ(ierr); 564 565 /* wait on sends */ 566 if (nsends) { 567 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 568 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 569 ierr = PetscFree(send_status);CHKERRQ(ierr); 570 } 571 ierr = PetscFree(send_waits);CHKERRQ(ierr); 572 ierr = PetscFree(svalues);CHKERRQ(ierr); 573 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatMult_MPIAIJ" 579 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 580 { 581 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 582 PetscErrorCode ierr; 583 PetscInt nt; 584 585 PetscFunctionBegin; 586 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 587 if (nt != A->n) { 588 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt); 589 } 590 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 591 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 592 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 593 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 594 PetscFunctionReturn(0); 595 } 596 597 #undef __FUNCT__ 598 #define __FUNCT__ "MatMultAdd_MPIAIJ" 599 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 600 { 601 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 602 PetscErrorCode ierr; 603 604 PetscFunctionBegin; 605 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 606 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 607 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 608 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 609 PetscFunctionReturn(0); 610 } 611 612 #undef __FUNCT__ 613 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 614 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 615 { 616 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 617 PetscErrorCode ierr; 618 619 PetscFunctionBegin; 620 /* do nondiagonal part */ 621 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 622 /* send it on its way */ 623 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 624 /* do local part */ 625 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 626 /* receive remote parts: note this assumes the values are not actually */ 627 /* inserted in yy until the next line, which is true for my implementation*/ 628 /* but is not perhaps always true. */ 629 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 630 PetscFunctionReturn(0); 631 } 632 633 EXTERN_C_BEGIN 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 636 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 637 { 638 MPI_Comm comm; 639 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 640 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 641 IS Me,Notme; 642 PetscErrorCode ierr; 643 PetscInt M,N,first,last,*notme,i; 644 PetscMPIInt size; 645 646 PetscFunctionBegin; 647 648 /* Easy test: symmetric diagonal block */ 649 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 650 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 651 if (!*f) PetscFunctionReturn(0); 652 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 653 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 654 if (size == 1) PetscFunctionReturn(0); 655 656 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 657 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 658 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 659 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 660 for (i=0; i<first; i++) notme[i] = i; 661 for (i=last; i<M; i++) notme[i-last+first] = i; 662 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 663 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 664 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 665 Aoff = Aoffs[0]; 666 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 667 Boff = Boffs[0]; 668 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 669 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 670 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 671 ierr = ISDestroy(Me);CHKERRQ(ierr); 672 ierr = ISDestroy(Notme);CHKERRQ(ierr); 673 674 PetscFunctionReturn(0); 675 } 676 EXTERN_C_END 677 678 #undef __FUNCT__ 679 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 680 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 681 { 682 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 683 PetscErrorCode ierr; 684 685 PetscFunctionBegin; 686 /* do nondiagonal part */ 687 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 688 /* send it on its way */ 689 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 690 /* do local part */ 691 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 692 /* receive remote parts: note this assumes the values are not actually */ 693 /* inserted in yy until the next line, which is true for my implementation*/ 694 /* but is not perhaps always true. */ 695 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 696 PetscFunctionReturn(0); 697 } 698 699 /* 700 This only works correctly for square matrices where the subblock A->A is the 701 diagonal block 702 */ 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 705 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 706 { 707 PetscErrorCode ierr; 708 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 709 710 PetscFunctionBegin; 711 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 712 if (a->rstart != a->cstart || a->rend != a->cend) { 713 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 714 } 715 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 716 PetscFunctionReturn(0); 717 } 718 719 #undef __FUNCT__ 720 #define __FUNCT__ "MatScale_MPIAIJ" 721 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 722 { 723 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 724 PetscErrorCode ierr; 725 726 PetscFunctionBegin; 727 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 728 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 729 PetscFunctionReturn(0); 730 } 731 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatDestroy_MPIAIJ" 734 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 735 { 736 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 737 PetscErrorCode ierr; 738 739 PetscFunctionBegin; 740 #if defined(PETSC_USE_LOG) 741 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 742 #endif 743 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 744 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 745 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 746 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 747 #if defined (PETSC_USE_CTABLE) 748 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 749 #else 750 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 751 #endif 752 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 753 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 754 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 755 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 756 ierr = PetscFree(aij);CHKERRQ(ierr); 757 758 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 759 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 760 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 761 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 762 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 763 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 764 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 765 PetscFunctionReturn(0); 766 } 767 768 #undef __FUNCT__ 769 #define __FUNCT__ "MatView_MPIAIJ_Binary" 770 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 771 { 772 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 773 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 774 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 775 PetscErrorCode ierr; 776 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 777 int fd; 778 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 779 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 780 PetscScalar *column_values; 781 782 PetscFunctionBegin; 783 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 784 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 785 nz = A->nz + B->nz; 786 if (!rank) { 787 header[0] = MAT_FILE_COOKIE; 788 header[1] = mat->M; 789 header[2] = mat->N; 790 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 791 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 792 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 793 /* get largest number of rows any processor has */ 794 rlen = mat->m; 795 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 796 for (i=1; i<size; i++) { 797 rlen = PetscMax(rlen,range[i+1] - range[i]); 798 } 799 } else { 800 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 801 rlen = mat->m; 802 } 803 804 /* load up the local row counts */ 805 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 806 for (i=0; i<mat->m; i++) { 807 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 808 } 809 810 /* store the row lengths to the file */ 811 if (!rank) { 812 MPI_Status status; 813 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 814 for (i=1; i<size; i++) { 815 rlen = range[i+1] - range[i]; 816 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 817 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 818 } 819 } else { 820 ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 821 } 822 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 823 824 /* load up the local column indices */ 825 nzmax = nz; /* )th processor needs space a largest processor needs */ 826 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 827 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 828 cnt = 0; 829 for (i=0; i<mat->m; i++) { 830 for (j=B->i[i]; j<B->i[i+1]; j++) { 831 if ( (col = garray[B->j[j]]) > cstart) break; 832 column_indices[cnt++] = col; 833 } 834 for (k=A->i[i]; k<A->i[i+1]; k++) { 835 column_indices[cnt++] = A->j[k] + cstart; 836 } 837 for (; j<B->i[i+1]; j++) { 838 column_indices[cnt++] = garray[B->j[j]]; 839 } 840 } 841 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 842 843 /* store the column indices to the file */ 844 if (!rank) { 845 MPI_Status status; 846 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 847 for (i=1; i<size; i++) { 848 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 849 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 850 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 851 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 852 } 853 } else { 854 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 855 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 856 } 857 ierr = PetscFree(column_indices);CHKERRQ(ierr); 858 859 /* load up the local column values */ 860 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 861 cnt = 0; 862 for (i=0; i<mat->m; i++) { 863 for (j=B->i[i]; j<B->i[i+1]; j++) { 864 if ( garray[B->j[j]] > cstart) break; 865 column_values[cnt++] = B->a[j]; 866 } 867 for (k=A->i[i]; k<A->i[i+1]; k++) { 868 column_values[cnt++] = A->a[k]; 869 } 870 for (; j<B->i[i+1]; j++) { 871 column_values[cnt++] = B->a[j]; 872 } 873 } 874 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 875 876 /* store the column values to the file */ 877 if (!rank) { 878 MPI_Status status; 879 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 880 for (i=1; i<size; i++) { 881 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 882 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 883 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 884 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 885 } 886 } else { 887 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 888 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 889 } 890 ierr = PetscFree(column_values);CHKERRQ(ierr); 891 PetscFunctionReturn(0); 892 } 893 894 #undef __FUNCT__ 895 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 896 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 897 { 898 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 899 PetscErrorCode ierr; 900 PetscMPIInt rank = aij->rank,size = aij->size; 901 PetscTruth isdraw,iascii,isbinary; 902 PetscViewer sviewer; 903 PetscViewerFormat format; 904 905 PetscFunctionBegin; 906 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 907 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 908 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 909 if (iascii) { 910 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 911 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 912 MatInfo info; 913 Mat_SeqAIJ *a = (Mat_SeqAIJ*) aij->A->data; 914 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 915 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 916 if (!a->inode.size) { 917 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 918 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 919 } else { 920 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 921 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 922 } 923 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 924 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 925 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 926 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 927 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 928 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 929 PetscFunctionReturn(0); 930 } else if (format == PETSC_VIEWER_ASCII_INFO) { 931 Mat_SeqAIJ *a = (Mat_SeqAIJ*) aij->A->data; 932 if (a->inode.size) { 933 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",a->inode.node_count,a->inode.limit);CHKERRQ(ierr); 934 } else { 935 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 936 } 937 PetscFunctionReturn(0); 938 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 939 PetscFunctionReturn(0); 940 } 941 } else if (isbinary) { 942 if (size == 1) { 943 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 944 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 945 } else { 946 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 947 } 948 PetscFunctionReturn(0); 949 } else if (isdraw) { 950 PetscDraw draw; 951 PetscTruth isnull; 952 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 953 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 954 } 955 956 if (size == 1) { 957 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 958 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 959 } else { 960 /* assemble the entire matrix onto first processor. */ 961 Mat A; 962 Mat_SeqAIJ *Aloc; 963 PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 964 PetscScalar *a; 965 966 if (!rank) { 967 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 968 } else { 969 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 970 } 971 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 972 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 973 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 974 PetscLogObjectParent(mat,A); 975 976 /* copy over the A part */ 977 Aloc = (Mat_SeqAIJ*)aij->A->data; 978 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 979 row = aij->rstart; 980 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 981 for (i=0; i<m; i++) { 982 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 983 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 984 } 985 aj = Aloc->j; 986 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 987 988 /* copy over the B part */ 989 Aloc = (Mat_SeqAIJ*)aij->B->data; 990 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 991 row = aij->rstart; 992 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 993 ct = cols; 994 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 995 for (i=0; i<m; i++) { 996 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 997 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 998 } 999 ierr = PetscFree(ct);CHKERRQ(ierr); 1000 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1001 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1002 /* 1003 Everyone has to call to draw the matrix since the graphics waits are 1004 synchronized across all processors that share the PetscDraw object 1005 */ 1006 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1007 if (!rank) { 1008 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1009 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1010 } 1011 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1012 ierr = MatDestroy(A);CHKERRQ(ierr); 1013 } 1014 PetscFunctionReturn(0); 1015 } 1016 1017 #undef __FUNCT__ 1018 #define __FUNCT__ "MatView_MPIAIJ" 1019 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1020 { 1021 PetscErrorCode ierr; 1022 PetscTruth iascii,isdraw,issocket,isbinary; 1023 1024 PetscFunctionBegin; 1025 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1026 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1027 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1028 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1029 if (iascii || isdraw || isbinary || issocket) { 1030 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1031 } else { 1032 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1033 } 1034 PetscFunctionReturn(0); 1035 } 1036 1037 1038 1039 #undef __FUNCT__ 1040 #define __FUNCT__ "MatRelax_MPIAIJ" 1041 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1042 { 1043 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1044 PetscErrorCode ierr; 1045 Vec bb1; 1046 PetscScalar mone=-1.0; 1047 1048 PetscFunctionBegin; 1049 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1050 1051 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1052 1053 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1054 if (flag & SOR_ZERO_INITIAL_GUESS) { 1055 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1056 its--; 1057 } 1058 1059 while (its--) { 1060 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1061 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1062 1063 /* update rhs: bb1 = bb - B*x */ 1064 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1065 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1066 1067 /* local sweep */ 1068 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1069 CHKERRQ(ierr); 1070 } 1071 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1072 if (flag & SOR_ZERO_INITIAL_GUESS) { 1073 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1074 its--; 1075 } 1076 while (its--) { 1077 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1078 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1079 1080 /* update rhs: bb1 = bb - B*x */ 1081 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1082 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1083 1084 /* local sweep */ 1085 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1086 CHKERRQ(ierr); 1087 } 1088 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1089 if (flag & SOR_ZERO_INITIAL_GUESS) { 1090 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1091 its--; 1092 } 1093 while (its--) { 1094 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1095 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1096 1097 /* update rhs: bb1 = bb - B*x */ 1098 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1099 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1100 1101 /* local sweep */ 1102 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1103 CHKERRQ(ierr); 1104 } 1105 } else { 1106 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1107 } 1108 1109 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1110 PetscFunctionReturn(0); 1111 } 1112 1113 #undef __FUNCT__ 1114 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1115 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1116 { 1117 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1118 Mat A = mat->A,B = mat->B; 1119 PetscErrorCode ierr; 1120 PetscReal isend[5],irecv[5]; 1121 1122 PetscFunctionBegin; 1123 info->block_size = 1.0; 1124 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1125 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1126 isend[3] = info->memory; isend[4] = info->mallocs; 1127 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1128 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1129 isend[3] += info->memory; isend[4] += info->mallocs; 1130 if (flag == MAT_LOCAL) { 1131 info->nz_used = isend[0]; 1132 info->nz_allocated = isend[1]; 1133 info->nz_unneeded = isend[2]; 1134 info->memory = isend[3]; 1135 info->mallocs = isend[4]; 1136 } else if (flag == MAT_GLOBAL_MAX) { 1137 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1138 info->nz_used = irecv[0]; 1139 info->nz_allocated = irecv[1]; 1140 info->nz_unneeded = irecv[2]; 1141 info->memory = irecv[3]; 1142 info->mallocs = irecv[4]; 1143 } else if (flag == MAT_GLOBAL_SUM) { 1144 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1145 info->nz_used = irecv[0]; 1146 info->nz_allocated = irecv[1]; 1147 info->nz_unneeded = irecv[2]; 1148 info->memory = irecv[3]; 1149 info->mallocs = irecv[4]; 1150 } 1151 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1152 info->fill_ratio_needed = 0; 1153 info->factor_mallocs = 0; 1154 info->rows_global = (double)matin->M; 1155 info->columns_global = (double)matin->N; 1156 info->rows_local = (double)matin->m; 1157 info->columns_local = (double)matin->N; 1158 1159 PetscFunctionReturn(0); 1160 } 1161 1162 #undef __FUNCT__ 1163 #define __FUNCT__ "MatSetOption_MPIAIJ" 1164 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1165 { 1166 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1167 PetscErrorCode ierr; 1168 1169 PetscFunctionBegin; 1170 switch (op) { 1171 case MAT_NO_NEW_NONZERO_LOCATIONS: 1172 case MAT_YES_NEW_NONZERO_LOCATIONS: 1173 case MAT_COLUMNS_UNSORTED: 1174 case MAT_COLUMNS_SORTED: 1175 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1176 case MAT_KEEP_ZEROED_ROWS: 1177 case MAT_NEW_NONZERO_LOCATION_ERR: 1178 case MAT_USE_INODES: 1179 case MAT_DO_NOT_USE_INODES: 1180 case MAT_IGNORE_ZERO_ENTRIES: 1181 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1182 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1183 break; 1184 case MAT_ROW_ORIENTED: 1185 a->roworiented = PETSC_TRUE; 1186 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1187 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1188 break; 1189 case MAT_ROWS_SORTED: 1190 case MAT_ROWS_UNSORTED: 1191 case MAT_YES_NEW_DIAGONALS: 1192 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1193 break; 1194 case MAT_COLUMN_ORIENTED: 1195 a->roworiented = PETSC_FALSE; 1196 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1197 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1198 break; 1199 case MAT_IGNORE_OFF_PROC_ENTRIES: 1200 a->donotstash = PETSC_TRUE; 1201 break; 1202 case MAT_NO_NEW_DIAGONALS: 1203 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1204 case MAT_SYMMETRIC: 1205 case MAT_STRUCTURALLY_SYMMETRIC: 1206 case MAT_HERMITIAN: 1207 case MAT_SYMMETRY_ETERNAL: 1208 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1209 break; 1210 case MAT_NOT_SYMMETRIC: 1211 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1212 case MAT_NOT_HERMITIAN: 1213 case MAT_NOT_SYMMETRY_ETERNAL: 1214 break; 1215 default: 1216 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1217 } 1218 PetscFunctionReturn(0); 1219 } 1220 1221 #undef __FUNCT__ 1222 #define __FUNCT__ "MatGetRow_MPIAIJ" 1223 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1224 { 1225 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1226 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1227 PetscErrorCode ierr; 1228 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1229 PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1230 PetscInt *cmap,*idx_p; 1231 1232 PetscFunctionBegin; 1233 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1234 mat->getrowactive = PETSC_TRUE; 1235 1236 if (!mat->rowvalues && (idx || v)) { 1237 /* 1238 allocate enough space to hold information from the longest row. 1239 */ 1240 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1241 PetscInt max = 1,tmp; 1242 for (i=0; i<matin->m; i++) { 1243 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1244 if (max < tmp) { max = tmp; } 1245 } 1246 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1247 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1248 } 1249 1250 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1251 lrow = row - rstart; 1252 1253 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1254 if (!v) {pvA = 0; pvB = 0;} 1255 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1256 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1257 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1258 nztot = nzA + nzB; 1259 1260 cmap = mat->garray; 1261 if (v || idx) { 1262 if (nztot) { 1263 /* Sort by increasing column numbers, assuming A and B already sorted */ 1264 PetscInt imark = -1; 1265 if (v) { 1266 *v = v_p = mat->rowvalues; 1267 for (i=0; i<nzB; i++) { 1268 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1269 else break; 1270 } 1271 imark = i; 1272 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1273 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1274 } 1275 if (idx) { 1276 *idx = idx_p = mat->rowindices; 1277 if (imark > -1) { 1278 for (i=0; i<imark; i++) { 1279 idx_p[i] = cmap[cworkB[i]]; 1280 } 1281 } else { 1282 for (i=0; i<nzB; i++) { 1283 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1284 else break; 1285 } 1286 imark = i; 1287 } 1288 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1289 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1290 } 1291 } else { 1292 if (idx) *idx = 0; 1293 if (v) *v = 0; 1294 } 1295 } 1296 *nz = nztot; 1297 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1298 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1299 PetscFunctionReturn(0); 1300 } 1301 1302 #undef __FUNCT__ 1303 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1304 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1305 { 1306 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1307 1308 PetscFunctionBegin; 1309 if (!aij->getrowactive) { 1310 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1311 } 1312 aij->getrowactive = PETSC_FALSE; 1313 PetscFunctionReturn(0); 1314 } 1315 1316 #undef __FUNCT__ 1317 #define __FUNCT__ "MatNorm_MPIAIJ" 1318 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1319 { 1320 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1321 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1322 PetscErrorCode ierr; 1323 PetscInt i,j,cstart = aij->cstart; 1324 PetscReal sum = 0.0; 1325 PetscScalar *v; 1326 1327 PetscFunctionBegin; 1328 if (aij->size == 1) { 1329 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1330 } else { 1331 if (type == NORM_FROBENIUS) { 1332 v = amat->a; 1333 for (i=0; i<amat->nz; i++) { 1334 #if defined(PETSC_USE_COMPLEX) 1335 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1336 #else 1337 sum += (*v)*(*v); v++; 1338 #endif 1339 } 1340 v = bmat->a; 1341 for (i=0; i<bmat->nz; i++) { 1342 #if defined(PETSC_USE_COMPLEX) 1343 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1344 #else 1345 sum += (*v)*(*v); v++; 1346 #endif 1347 } 1348 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1349 *norm = sqrt(*norm); 1350 } else if (type == NORM_1) { /* max column norm */ 1351 PetscReal *tmp,*tmp2; 1352 PetscInt *jj,*garray = aij->garray; 1353 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1354 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1355 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1356 *norm = 0.0; 1357 v = amat->a; jj = amat->j; 1358 for (j=0; j<amat->nz; j++) { 1359 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1360 } 1361 v = bmat->a; jj = bmat->j; 1362 for (j=0; j<bmat->nz; j++) { 1363 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1364 } 1365 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1366 for (j=0; j<mat->N; j++) { 1367 if (tmp2[j] > *norm) *norm = tmp2[j]; 1368 } 1369 ierr = PetscFree(tmp);CHKERRQ(ierr); 1370 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1371 } else if (type == NORM_INFINITY) { /* max row norm */ 1372 PetscReal ntemp = 0.0; 1373 for (j=0; j<aij->A->m; j++) { 1374 v = amat->a + amat->i[j]; 1375 sum = 0.0; 1376 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1377 sum += PetscAbsScalar(*v); v++; 1378 } 1379 v = bmat->a + bmat->i[j]; 1380 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1381 sum += PetscAbsScalar(*v); v++; 1382 } 1383 if (sum > ntemp) ntemp = sum; 1384 } 1385 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1386 } else { 1387 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1388 } 1389 } 1390 PetscFunctionReturn(0); 1391 } 1392 1393 #undef __FUNCT__ 1394 #define __FUNCT__ "MatTranspose_MPIAIJ" 1395 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1396 { 1397 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1398 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1399 PetscErrorCode ierr; 1400 PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1401 Mat B; 1402 PetscScalar *array; 1403 1404 PetscFunctionBegin; 1405 if (!matout && M != N) { 1406 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1407 } 1408 1409 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1410 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1411 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1412 1413 /* copy over the A part */ 1414 Aloc = (Mat_SeqAIJ*)a->A->data; 1415 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1416 row = a->rstart; 1417 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1418 for (i=0; i<m; i++) { 1419 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1420 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1421 } 1422 aj = Aloc->j; 1423 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1424 1425 /* copy over the B part */ 1426 Aloc = (Mat_SeqAIJ*)a->B->data; 1427 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1428 row = a->rstart; 1429 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1430 ct = cols; 1431 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1432 for (i=0; i<m; i++) { 1433 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1434 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1435 } 1436 ierr = PetscFree(ct);CHKERRQ(ierr); 1437 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1438 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1439 if (matout) { 1440 *matout = B; 1441 } else { 1442 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1443 } 1444 PetscFunctionReturn(0); 1445 } 1446 1447 #undef __FUNCT__ 1448 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1449 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1450 { 1451 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1452 Mat a = aij->A,b = aij->B; 1453 PetscErrorCode ierr; 1454 PetscInt s1,s2,s3; 1455 1456 PetscFunctionBegin; 1457 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1458 if (rr) { 1459 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1460 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1461 /* Overlap communication with computation. */ 1462 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1463 } 1464 if (ll) { 1465 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1466 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1467 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1468 } 1469 /* scale the diagonal block */ 1470 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1471 1472 if (rr) { 1473 /* Do a scatter end and then right scale the off-diagonal block */ 1474 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1475 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1476 } 1477 1478 PetscFunctionReturn(0); 1479 } 1480 1481 1482 #undef __FUNCT__ 1483 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1484 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1485 { 1486 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1487 PetscErrorCode ierr; 1488 1489 PetscFunctionBegin; 1490 if (!a->rank) { 1491 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1492 } 1493 PetscFunctionReturn(0); 1494 } 1495 1496 #undef __FUNCT__ 1497 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1498 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1499 { 1500 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1501 PetscErrorCode ierr; 1502 1503 PetscFunctionBegin; 1504 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1505 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1506 PetscFunctionReturn(0); 1507 } 1508 #undef __FUNCT__ 1509 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1510 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1511 { 1512 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1513 PetscErrorCode ierr; 1514 1515 PetscFunctionBegin; 1516 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1517 PetscFunctionReturn(0); 1518 } 1519 1520 #undef __FUNCT__ 1521 #define __FUNCT__ "MatEqual_MPIAIJ" 1522 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1523 { 1524 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1525 Mat a,b,c,d; 1526 PetscTruth flg; 1527 PetscErrorCode ierr; 1528 1529 PetscFunctionBegin; 1530 a = matA->A; b = matA->B; 1531 c = matB->A; d = matB->B; 1532 1533 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1534 if (flg) { 1535 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1536 } 1537 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1538 PetscFunctionReturn(0); 1539 } 1540 1541 #undef __FUNCT__ 1542 #define __FUNCT__ "MatCopy_MPIAIJ" 1543 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1544 { 1545 PetscErrorCode ierr; 1546 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1547 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1548 1549 PetscFunctionBegin; 1550 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1551 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1552 /* because of the column compression in the off-processor part of the matrix a->B, 1553 the number of columns in a->B and b->B may be different, hence we cannot call 1554 the MatCopy() directly on the two parts. If need be, we can provide a more 1555 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1556 then copying the submatrices */ 1557 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1558 } else { 1559 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1560 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1561 } 1562 PetscFunctionReturn(0); 1563 } 1564 1565 #undef __FUNCT__ 1566 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1567 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1568 { 1569 PetscErrorCode ierr; 1570 1571 PetscFunctionBegin; 1572 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1573 PetscFunctionReturn(0); 1574 } 1575 1576 #include "petscblaslapack.h" 1577 #undef __FUNCT__ 1578 #define __FUNCT__ "MatAXPY_MPIAIJ" 1579 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1580 { 1581 PetscErrorCode ierr; 1582 PetscInt i; 1583 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1584 PetscBLASInt bnz,one=1; 1585 Mat_SeqAIJ *x,*y; 1586 1587 PetscFunctionBegin; 1588 if (str == SAME_NONZERO_PATTERN) { 1589 x = (Mat_SeqAIJ *)xx->A->data; 1590 y = (Mat_SeqAIJ *)yy->A->data; 1591 bnz = (PetscBLASInt)x->nz; 1592 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1593 x = (Mat_SeqAIJ *)xx->B->data; 1594 y = (Mat_SeqAIJ *)yy->B->data; 1595 bnz = (PetscBLASInt)x->nz; 1596 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1597 } else if (str == SUBSET_NONZERO_PATTERN) { 1598 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1599 1600 x = (Mat_SeqAIJ *)xx->B->data; 1601 y = (Mat_SeqAIJ *)yy->B->data; 1602 if (y->xtoy && y->XtoY != xx->B) { 1603 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1604 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1605 } 1606 if (!y->xtoy) { /* get xtoy */ 1607 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1608 y->XtoY = xx->B; 1609 } 1610 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1611 } else { 1612 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1613 } 1614 PetscFunctionReturn(0); 1615 } 1616 1617 /* -------------------------------------------------------------------*/ 1618 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1619 MatGetRow_MPIAIJ, 1620 MatRestoreRow_MPIAIJ, 1621 MatMult_MPIAIJ, 1622 /* 4*/ MatMultAdd_MPIAIJ, 1623 MatMultTranspose_MPIAIJ, 1624 MatMultTransposeAdd_MPIAIJ, 1625 0, 1626 0, 1627 0, 1628 /*10*/ 0, 1629 0, 1630 0, 1631 MatRelax_MPIAIJ, 1632 MatTranspose_MPIAIJ, 1633 /*15*/ MatGetInfo_MPIAIJ, 1634 MatEqual_MPIAIJ, 1635 MatGetDiagonal_MPIAIJ, 1636 MatDiagonalScale_MPIAIJ, 1637 MatNorm_MPIAIJ, 1638 /*20*/ MatAssemblyBegin_MPIAIJ, 1639 MatAssemblyEnd_MPIAIJ, 1640 0, 1641 MatSetOption_MPIAIJ, 1642 MatZeroEntries_MPIAIJ, 1643 /*25*/ MatZeroRows_MPIAIJ, 1644 0, 1645 0, 1646 0, 1647 0, 1648 /*30*/ MatSetUpPreallocation_MPIAIJ, 1649 0, 1650 0, 1651 0, 1652 0, 1653 /*35*/ MatDuplicate_MPIAIJ, 1654 0, 1655 0, 1656 0, 1657 0, 1658 /*40*/ MatAXPY_MPIAIJ, 1659 MatGetSubMatrices_MPIAIJ, 1660 MatIncreaseOverlap_MPIAIJ, 1661 MatGetValues_MPIAIJ, 1662 MatCopy_MPIAIJ, 1663 /*45*/ MatPrintHelp_MPIAIJ, 1664 MatScale_MPIAIJ, 1665 0, 1666 0, 1667 0, 1668 /*50*/ MatSetBlockSize_MPIAIJ, 1669 0, 1670 0, 1671 0, 1672 0, 1673 /*55*/ MatFDColoringCreate_MPIAIJ, 1674 0, 1675 MatSetUnfactored_MPIAIJ, 1676 0, 1677 0, 1678 /*60*/ MatGetSubMatrix_MPIAIJ, 1679 MatDestroy_MPIAIJ, 1680 MatView_MPIAIJ, 1681 MatGetPetscMaps_Petsc, 1682 0, 1683 /*65*/ 0, 1684 0, 1685 0, 1686 0, 1687 0, 1688 /*70*/ 0, 1689 0, 1690 MatSetColoring_MPIAIJ, 1691 #if defined(PETSC_HAVE_ADIC) 1692 MatSetValuesAdic_MPIAIJ, 1693 #else 1694 0, 1695 #endif 1696 MatSetValuesAdifor_MPIAIJ, 1697 /*75*/ 0, 1698 0, 1699 0, 1700 0, 1701 0, 1702 /*80*/ 0, 1703 0, 1704 0, 1705 0, 1706 /*84*/ MatLoad_MPIAIJ, 1707 0, 1708 0, 1709 0, 1710 0, 1711 0, 1712 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1713 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1714 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1715 MatPtAP_Basic, 1716 MatPtAPSymbolic_MPIAIJ, 1717 /*95*/ MatPtAPNumeric_MPIAIJ, 1718 0, 1719 0, 1720 0, 1721 0, 1722 /*100*/0, 1723 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1724 MatPtAPNumeric_MPIAIJ_MPIAIJ, 1725 }; 1726 1727 /* ----------------------------------------------------------------------------------------*/ 1728 1729 EXTERN_C_BEGIN 1730 #undef __FUNCT__ 1731 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1732 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 1733 { 1734 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1735 PetscErrorCode ierr; 1736 1737 PetscFunctionBegin; 1738 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1739 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1740 PetscFunctionReturn(0); 1741 } 1742 EXTERN_C_END 1743 1744 EXTERN_C_BEGIN 1745 #undef __FUNCT__ 1746 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1747 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 1748 { 1749 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1750 PetscErrorCode ierr; 1751 1752 PetscFunctionBegin; 1753 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1754 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1755 PetscFunctionReturn(0); 1756 } 1757 EXTERN_C_END 1758 1759 #include "petscpc.h" 1760 EXTERN_C_BEGIN 1761 #undef __FUNCT__ 1762 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1763 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1764 { 1765 Mat_MPIAIJ *b; 1766 PetscErrorCode ierr; 1767 PetscInt i; 1768 1769 PetscFunctionBegin; 1770 B->preallocated = PETSC_TRUE; 1771 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1772 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1773 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1774 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1775 if (d_nnz) { 1776 for (i=0; i<B->m; i++) { 1777 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1778 } 1779 } 1780 if (o_nnz) { 1781 for (i=0; i<B->m; i++) { 1782 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1783 } 1784 } 1785 b = (Mat_MPIAIJ*)B->data; 1786 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1787 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1788 1789 PetscFunctionReturn(0); 1790 } 1791 EXTERN_C_END 1792 1793 #undef __FUNCT__ 1794 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1795 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1796 { 1797 Mat mat; 1798 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1799 PetscErrorCode ierr; 1800 1801 PetscFunctionBegin; 1802 *newmat = 0; 1803 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1804 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1805 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1806 a = (Mat_MPIAIJ*)mat->data; 1807 1808 mat->factor = matin->factor; 1809 mat->bs = matin->bs; 1810 mat->assembled = PETSC_TRUE; 1811 mat->insertmode = NOT_SET_VALUES; 1812 mat->preallocated = PETSC_TRUE; 1813 1814 a->rstart = oldmat->rstart; 1815 a->rend = oldmat->rend; 1816 a->cstart = oldmat->cstart; 1817 a->cend = oldmat->cend; 1818 a->size = oldmat->size; 1819 a->rank = oldmat->rank; 1820 a->donotstash = oldmat->donotstash; 1821 a->roworiented = oldmat->roworiented; 1822 a->rowindices = 0; 1823 a->rowvalues = 0; 1824 a->getrowactive = PETSC_FALSE; 1825 1826 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1827 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1828 if (oldmat->colmap) { 1829 #if defined (PETSC_USE_CTABLE) 1830 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1831 #else 1832 ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1833 PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt)); 1834 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1835 #endif 1836 } else a->colmap = 0; 1837 if (oldmat->garray) { 1838 PetscInt len; 1839 len = oldmat->B->n; 1840 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1841 PetscLogObjectMemory(mat,len*sizeof(PetscInt)); 1842 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1843 } else a->garray = 0; 1844 1845 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1846 PetscLogObjectParent(mat,a->lvec); 1847 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1848 PetscLogObjectParent(mat,a->Mvctx); 1849 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1850 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1851 PetscLogObjectParent(mat,a->A); 1852 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1853 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1854 PetscLogObjectParent(mat,a->B); 1855 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1856 *newmat = mat; 1857 PetscFunctionReturn(0); 1858 } 1859 1860 #include "petscsys.h" 1861 1862 #undef __FUNCT__ 1863 #define __FUNCT__ "MatLoad_MPIAIJ" 1864 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1865 { 1866 Mat A; 1867 PetscScalar *vals,*svals; 1868 MPI_Comm comm = ((PetscObject)viewer)->comm; 1869 MPI_Status status; 1870 PetscErrorCode ierr; 1871 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1872 PetscInt i,nz,j,rstart,rend; 1873 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1874 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1875 PetscInt cend,cstart,n,*rowners; 1876 int fd; 1877 1878 PetscFunctionBegin; 1879 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1880 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1881 if (!rank) { 1882 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1883 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1884 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1885 } 1886 1887 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1888 M = header[1]; N = header[2]; 1889 /* determine ownership of all rows */ 1890 m = M/size + ((M % size) > rank); 1891 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1892 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1893 rowners[0] = 0; 1894 for (i=2; i<=size; i++) { 1895 rowners[i] += rowners[i-1]; 1896 } 1897 rstart = rowners[rank]; 1898 rend = rowners[rank+1]; 1899 1900 /* distribute row lengths to all processors */ 1901 ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr); 1902 if (!rank) { 1903 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 1904 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1905 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1906 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1907 for (j=0; j<m; j++) { 1908 procsnz[0] += ourlens[j]; 1909 } 1910 for (i=1; i<size; i++) { 1911 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 1912 /* calculate the number of nonzeros on each processor */ 1913 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 1914 procsnz[i] += rowlengths[j]; 1915 } 1916 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1917 } 1918 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1919 } else { 1920 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1921 } 1922 1923 if (!rank) { 1924 /* determine max buffer needed and allocate it */ 1925 maxnz = 0; 1926 for (i=0; i<size; i++) { 1927 maxnz = PetscMax(maxnz,procsnz[i]); 1928 } 1929 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1930 1931 /* read in my part of the matrix column indices */ 1932 nz = procsnz[0]; 1933 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1934 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1935 1936 /* read in every one elses and ship off */ 1937 for (i=1; i<size; i++) { 1938 nz = procsnz[i]; 1939 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1940 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1941 } 1942 ierr = PetscFree(cols);CHKERRQ(ierr); 1943 } else { 1944 /* determine buffer space needed for message */ 1945 nz = 0; 1946 for (i=0; i<m; i++) { 1947 nz += ourlens[i]; 1948 } 1949 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1950 1951 /* receive message of column indices*/ 1952 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1953 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1954 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1955 } 1956 1957 /* determine column ownership if matrix is not square */ 1958 if (N != M) { 1959 n = N/size + ((N % size) > rank); 1960 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1961 cstart = cend - n; 1962 } else { 1963 cstart = rstart; 1964 cend = rend; 1965 n = cend - cstart; 1966 } 1967 1968 /* loop over local rows, determining number of off diagonal entries */ 1969 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1970 jj = 0; 1971 for (i=0; i<m; i++) { 1972 for (j=0; j<ourlens[i]; j++) { 1973 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1974 jj++; 1975 } 1976 } 1977 1978 /* create our matrix */ 1979 for (i=0; i<m; i++) { 1980 ourlens[i] -= offlens[i]; 1981 } 1982 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 1983 ierr = MatSetType(A,type);CHKERRQ(ierr); 1984 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 1985 1986 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1987 for (i=0; i<m; i++) { 1988 ourlens[i] += offlens[i]; 1989 } 1990 1991 if (!rank) { 1992 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1993 1994 /* read in my part of the matrix numerical values */ 1995 nz = procsnz[0]; 1996 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1997 1998 /* insert into matrix */ 1999 jj = rstart; 2000 smycols = mycols; 2001 svals = vals; 2002 for (i=0; i<m; i++) { 2003 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2004 smycols += ourlens[i]; 2005 svals += ourlens[i]; 2006 jj++; 2007 } 2008 2009 /* read in other processors and ship out */ 2010 for (i=1; i<size; i++) { 2011 nz = procsnz[i]; 2012 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2013 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2014 } 2015 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2016 } else { 2017 /* receive numeric values */ 2018 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2019 2020 /* receive message of values*/ 2021 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2022 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2023 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2024 2025 /* insert into matrix */ 2026 jj = rstart; 2027 smycols = mycols; 2028 svals = vals; 2029 for (i=0; i<m; i++) { 2030 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2031 smycols += ourlens[i]; 2032 svals += ourlens[i]; 2033 jj++; 2034 } 2035 } 2036 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2037 ierr = PetscFree(vals);CHKERRQ(ierr); 2038 ierr = PetscFree(mycols);CHKERRQ(ierr); 2039 ierr = PetscFree(rowners);CHKERRQ(ierr); 2040 2041 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2042 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2043 *newmat = A; 2044 PetscFunctionReturn(0); 2045 } 2046 2047 #undef __FUNCT__ 2048 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2049 /* 2050 Not great since it makes two copies of the submatrix, first an SeqAIJ 2051 in local and then by concatenating the local matrices the end result. 2052 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2053 */ 2054 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2055 { 2056 PetscErrorCode ierr; 2057 PetscMPIInt rank,size; 2058 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2059 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2060 Mat *local,M,Mreuse; 2061 PetscScalar *vwork,*aa; 2062 MPI_Comm comm = mat->comm; 2063 Mat_SeqAIJ *aij; 2064 2065 2066 PetscFunctionBegin; 2067 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2068 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2069 2070 if (call == MAT_REUSE_MATRIX) { 2071 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2072 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2073 local = &Mreuse; 2074 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2075 } else { 2076 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2077 Mreuse = *local; 2078 ierr = PetscFree(local);CHKERRQ(ierr); 2079 } 2080 2081 /* 2082 m - number of local rows 2083 n - number of columns (same on all processors) 2084 rstart - first row in new global matrix generated 2085 */ 2086 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2087 if (call == MAT_INITIAL_MATRIX) { 2088 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2089 ii = aij->i; 2090 jj = aij->j; 2091 2092 /* 2093 Determine the number of non-zeros in the diagonal and off-diagonal 2094 portions of the matrix in order to do correct preallocation 2095 */ 2096 2097 /* first get start and end of "diagonal" columns */ 2098 if (csize == PETSC_DECIDE) { 2099 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2100 if (mglobal == n) { /* square matrix */ 2101 nlocal = m; 2102 } else { 2103 nlocal = n/size + ((n % size) > rank); 2104 } 2105 } else { 2106 nlocal = csize; 2107 } 2108 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2109 rstart = rend - nlocal; 2110 if (rank == size - 1 && rend != n) { 2111 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2112 } 2113 2114 /* next, compute all the lengths */ 2115 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2116 olens = dlens + m; 2117 for (i=0; i<m; i++) { 2118 jend = ii[i+1] - ii[i]; 2119 olen = 0; 2120 dlen = 0; 2121 for (j=0; j<jend; j++) { 2122 if (*jj < rstart || *jj >= rend) olen++; 2123 else dlen++; 2124 jj++; 2125 } 2126 olens[i] = olen; 2127 dlens[i] = dlen; 2128 } 2129 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2130 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2131 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2132 ierr = PetscFree(dlens);CHKERRQ(ierr); 2133 } else { 2134 PetscInt ml,nl; 2135 2136 M = *newmat; 2137 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2138 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2139 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2140 /* 2141 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2142 rather than the slower MatSetValues(). 2143 */ 2144 M->was_assembled = PETSC_TRUE; 2145 M->assembled = PETSC_FALSE; 2146 } 2147 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2148 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2149 ii = aij->i; 2150 jj = aij->j; 2151 aa = aij->a; 2152 for (i=0; i<m; i++) { 2153 row = rstart + i; 2154 nz = ii[i+1] - ii[i]; 2155 cwork = jj; jj += nz; 2156 vwork = aa; aa += nz; 2157 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2158 } 2159 2160 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2161 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2162 *newmat = M; 2163 2164 /* save submatrix used in processor for next request */ 2165 if (call == MAT_INITIAL_MATRIX) { 2166 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2167 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2168 } 2169 2170 PetscFunctionReturn(0); 2171 } 2172 2173 EXTERN_C_BEGIN 2174 #undef __FUNCT__ 2175 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2176 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2177 { 2178 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2179 PetscInt m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2180 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2181 const PetscInt *JJ; 2182 PetscScalar *values; 2183 PetscErrorCode ierr; 2184 2185 PetscFunctionBegin; 2186 #if defined(PETSC_OPT_g) 2187 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2188 #endif 2189 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2190 o_nnz = d_nnz + m; 2191 2192 for (i=0; i<m; i++) { 2193 nnz = I[i+1]- I[i]; 2194 JJ = J + I[i]; 2195 nnz_max = PetscMax(nnz_max,nnz); 2196 #if defined(PETSC_OPT_g) 2197 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2198 #endif 2199 for (j=0; j<nnz; j++) { 2200 if (*JJ >= cstart) break; 2201 JJ++; 2202 } 2203 d = 0; 2204 for (; j<nnz; j++) { 2205 if (*JJ++ >= cend) break; 2206 d++; 2207 } 2208 d_nnz[i] = d; 2209 o_nnz[i] = nnz - d; 2210 } 2211 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2212 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2213 2214 if (v) values = (PetscScalar*)v; 2215 else { 2216 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2217 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2218 } 2219 2220 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2221 for (i=0; i<m; i++) { 2222 ii = i + rstart; 2223 nnz = I[i+1]- I[i]; 2224 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr); 2225 } 2226 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2227 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2228 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2229 2230 if (!v) { 2231 ierr = PetscFree(values);CHKERRQ(ierr); 2232 } 2233 PetscFunctionReturn(0); 2234 } 2235 EXTERN_C_END 2236 2237 #undef __FUNCT__ 2238 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2239 /*@C 2240 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2241 (the default parallel PETSc format). 2242 2243 Collective on MPI_Comm 2244 2245 Input Parameters: 2246 + A - the matrix 2247 . i - the indices into j for the start of each local row (starts with zero) 2248 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2249 - v - optional values in the matrix 2250 2251 Level: developer 2252 2253 .keywords: matrix, aij, compressed row, sparse, parallel 2254 2255 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2256 @*/ 2257 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2258 { 2259 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2260 2261 PetscFunctionBegin; 2262 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2263 if (f) { 2264 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2265 } 2266 PetscFunctionReturn(0); 2267 } 2268 2269 #undef __FUNCT__ 2270 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2271 /*@C 2272 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2273 (the default parallel PETSc format). For good matrix assembly performance 2274 the user should preallocate the matrix storage by setting the parameters 2275 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2276 performance can be increased by more than a factor of 50. 2277 2278 Collective on MPI_Comm 2279 2280 Input Parameters: 2281 + A - the matrix 2282 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2283 (same value is used for all local rows) 2284 . d_nnz - array containing the number of nonzeros in the various rows of the 2285 DIAGONAL portion of the local submatrix (possibly different for each row) 2286 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2287 The size of this array is equal to the number of local rows, i.e 'm'. 2288 You must leave room for the diagonal entry even if it is zero. 2289 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2290 submatrix (same value is used for all local rows). 2291 - o_nnz - array containing the number of nonzeros in the various rows of the 2292 OFF-DIAGONAL portion of the local submatrix (possibly different for 2293 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2294 structure. The size of this array is equal to the number 2295 of local rows, i.e 'm'. 2296 2297 If the *_nnz parameter is given then the *_nz parameter is ignored 2298 2299 The AIJ format (also called the Yale sparse matrix format or 2300 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2301 storage. The stored row and column indices begin with zero. See the users manual for details. 2302 2303 The parallel matrix is partitioned such that the first m0 rows belong to 2304 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2305 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2306 2307 The DIAGONAL portion of the local submatrix of a processor can be defined 2308 as the submatrix which is obtained by extraction the part corresponding 2309 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2310 first row that belongs to the processor, and r2 is the last row belonging 2311 to the this processor. This is a square mxm matrix. The remaining portion 2312 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2313 2314 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2315 2316 Example usage: 2317 2318 Consider the following 8x8 matrix with 34 non-zero values, that is 2319 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2320 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2321 as follows: 2322 2323 .vb 2324 1 2 0 | 0 3 0 | 0 4 2325 Proc0 0 5 6 | 7 0 0 | 8 0 2326 9 0 10 | 11 0 0 | 12 0 2327 ------------------------------------- 2328 13 0 14 | 15 16 17 | 0 0 2329 Proc1 0 18 0 | 19 20 21 | 0 0 2330 0 0 0 | 22 23 0 | 24 0 2331 ------------------------------------- 2332 Proc2 25 26 27 | 0 0 28 | 29 0 2333 30 0 0 | 31 32 33 | 0 34 2334 .ve 2335 2336 This can be represented as a collection of submatrices as: 2337 2338 .vb 2339 A B C 2340 D E F 2341 G H I 2342 .ve 2343 2344 Where the submatrices A,B,C are owned by proc0, D,E,F are 2345 owned by proc1, G,H,I are owned by proc2. 2346 2347 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2348 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2349 The 'M','N' parameters are 8,8, and have the same values on all procs. 2350 2351 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2352 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2353 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2354 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2355 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2356 matrix, ans [DF] as another SeqAIJ matrix. 2357 2358 When d_nz, o_nz parameters are specified, d_nz storage elements are 2359 allocated for every row of the local diagonal submatrix, and o_nz 2360 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2361 One way to choose d_nz and o_nz is to use the max nonzerors per local 2362 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2363 In this case, the values of d_nz,o_nz are: 2364 .vb 2365 proc0 : dnz = 2, o_nz = 2 2366 proc1 : dnz = 3, o_nz = 2 2367 proc2 : dnz = 1, o_nz = 4 2368 .ve 2369 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2370 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2371 for proc3. i.e we are using 12+15+10=37 storage locations to store 2372 34 values. 2373 2374 When d_nnz, o_nnz parameters are specified, the storage is specified 2375 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2376 In the above case the values for d_nnz,o_nnz are: 2377 .vb 2378 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2379 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2380 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2381 .ve 2382 Here the space allocated is sum of all the above values i.e 34, and 2383 hence pre-allocation is perfect. 2384 2385 Level: intermediate 2386 2387 .keywords: matrix, aij, compressed row, sparse, parallel 2388 2389 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2390 MPIAIJ 2391 @*/ 2392 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2393 { 2394 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2395 2396 PetscFunctionBegin; 2397 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2398 if (f) { 2399 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2400 } 2401 PetscFunctionReturn(0); 2402 } 2403 2404 #undef __FUNCT__ 2405 #define __FUNCT__ "MatCreateMPIAIJ" 2406 /*@C 2407 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2408 (the default parallel PETSc format). For good matrix assembly performance 2409 the user should preallocate the matrix storage by setting the parameters 2410 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2411 performance can be increased by more than a factor of 50. 2412 2413 Collective on MPI_Comm 2414 2415 Input Parameters: 2416 + comm - MPI communicator 2417 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2418 This value should be the same as the local size used in creating the 2419 y vector for the matrix-vector product y = Ax. 2420 . n - This value should be the same as the local size used in creating the 2421 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2422 calculated if N is given) For square matrices n is almost always m. 2423 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2424 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2425 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2426 (same value is used for all local rows) 2427 . d_nnz - array containing the number of nonzeros in the various rows of the 2428 DIAGONAL portion of the local submatrix (possibly different for each row) 2429 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2430 The size of this array is equal to the number of local rows, i.e 'm'. 2431 You must leave room for the diagonal entry even if it is zero. 2432 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2433 submatrix (same value is used for all local rows). 2434 - o_nnz - array containing the number of nonzeros in the various rows of the 2435 OFF-DIAGONAL portion of the local submatrix (possibly different for 2436 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2437 structure. The size of this array is equal to the number 2438 of local rows, i.e 'm'. 2439 2440 Output Parameter: 2441 . A - the matrix 2442 2443 Notes: 2444 If the *_nnz parameter is given then the *_nz parameter is ignored 2445 2446 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2447 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2448 storage requirements for this matrix. 2449 2450 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2451 processor than it must be used on all processors that share the object for 2452 that argument. 2453 2454 The user MUST specify either the local or global matrix dimensions 2455 (possibly both). 2456 2457 The parallel matrix is partitioned such that the first m0 rows belong to 2458 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2459 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2460 2461 The DIAGONAL portion of the local submatrix of a processor can be defined 2462 as the submatrix which is obtained by extraction the part corresponding 2463 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2464 first row that belongs to the processor, and r2 is the last row belonging 2465 to the this processor. This is a square mxm matrix. The remaining portion 2466 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2467 2468 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2469 2470 When calling this routine with a single process communicator, a matrix of 2471 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2472 type of communicator, use the construction mechanism: 2473 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2474 2475 By default, this format uses inodes (identical nodes) when possible. 2476 We search for consecutive rows with the same nonzero structure, thereby 2477 reusing matrix information to achieve increased efficiency. 2478 2479 Options Database Keys: 2480 + -mat_aij_no_inode - Do not use inodes 2481 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2482 - -mat_aij_oneindex - Internally use indexing starting at 1 2483 rather than 0. Note that when calling MatSetValues(), 2484 the user still MUST index entries starting at 0! 2485 2486 2487 Example usage: 2488 2489 Consider the following 8x8 matrix with 34 non-zero values, that is 2490 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2491 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2492 as follows: 2493 2494 .vb 2495 1 2 0 | 0 3 0 | 0 4 2496 Proc0 0 5 6 | 7 0 0 | 8 0 2497 9 0 10 | 11 0 0 | 12 0 2498 ------------------------------------- 2499 13 0 14 | 15 16 17 | 0 0 2500 Proc1 0 18 0 | 19 20 21 | 0 0 2501 0 0 0 | 22 23 0 | 24 0 2502 ------------------------------------- 2503 Proc2 25 26 27 | 0 0 28 | 29 0 2504 30 0 0 | 31 32 33 | 0 34 2505 .ve 2506 2507 This can be represented as a collection of submatrices as: 2508 2509 .vb 2510 A B C 2511 D E F 2512 G H I 2513 .ve 2514 2515 Where the submatrices A,B,C are owned by proc0, D,E,F are 2516 owned by proc1, G,H,I are owned by proc2. 2517 2518 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2519 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2520 The 'M','N' parameters are 8,8, and have the same values on all procs. 2521 2522 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2523 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2524 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2525 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2526 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2527 matrix, ans [DF] as another SeqAIJ matrix. 2528 2529 When d_nz, o_nz parameters are specified, d_nz storage elements are 2530 allocated for every row of the local diagonal submatrix, and o_nz 2531 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2532 One way to choose d_nz and o_nz is to use the max nonzerors per local 2533 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2534 In this case, the values of d_nz,o_nz are: 2535 .vb 2536 proc0 : dnz = 2, o_nz = 2 2537 proc1 : dnz = 3, o_nz = 2 2538 proc2 : dnz = 1, o_nz = 4 2539 .ve 2540 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2541 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2542 for proc3. i.e we are using 12+15+10=37 storage locations to store 2543 34 values. 2544 2545 When d_nnz, o_nnz parameters are specified, the storage is specified 2546 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2547 In the above case the values for d_nnz,o_nnz are: 2548 .vb 2549 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2550 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2551 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2552 .ve 2553 Here the space allocated is sum of all the above values i.e 34, and 2554 hence pre-allocation is perfect. 2555 2556 Level: intermediate 2557 2558 .keywords: matrix, aij, compressed row, sparse, parallel 2559 2560 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2561 MPIAIJ 2562 @*/ 2563 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2564 { 2565 PetscErrorCode ierr; 2566 PetscMPIInt size; 2567 2568 PetscFunctionBegin; 2569 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2570 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2571 if (size > 1) { 2572 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2573 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2574 } else { 2575 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2576 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2577 } 2578 PetscFunctionReturn(0); 2579 } 2580 2581 #undef __FUNCT__ 2582 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2583 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2584 { 2585 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2586 2587 PetscFunctionBegin; 2588 *Ad = a->A; 2589 *Ao = a->B; 2590 *colmap = a->garray; 2591 PetscFunctionReturn(0); 2592 } 2593 2594 #undef __FUNCT__ 2595 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2596 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2597 { 2598 PetscErrorCode ierr; 2599 PetscInt i; 2600 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2601 2602 PetscFunctionBegin; 2603 if (coloring->ctype == IS_COLORING_LOCAL) { 2604 ISColoringValue *allcolors,*colors; 2605 ISColoring ocoloring; 2606 2607 /* set coloring for diagonal portion */ 2608 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2609 2610 /* set coloring for off-diagonal portion */ 2611 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2612 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2613 for (i=0; i<a->B->n; i++) { 2614 colors[i] = allcolors[a->garray[i]]; 2615 } 2616 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2617 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2618 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2619 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2620 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2621 ISColoringValue *colors; 2622 PetscInt *larray; 2623 ISColoring ocoloring; 2624 2625 /* set coloring for diagonal portion */ 2626 ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2627 for (i=0; i<a->A->n; i++) { 2628 larray[i] = i + a->cstart; 2629 } 2630 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2631 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2632 for (i=0; i<a->A->n; i++) { 2633 colors[i] = coloring->colors[larray[i]]; 2634 } 2635 ierr = PetscFree(larray);CHKERRQ(ierr); 2636 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2637 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2638 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2639 2640 /* set coloring for off-diagonal portion */ 2641 ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2642 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2643 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2644 for (i=0; i<a->B->n; i++) { 2645 colors[i] = coloring->colors[larray[i]]; 2646 } 2647 ierr = PetscFree(larray);CHKERRQ(ierr); 2648 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2649 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2650 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2651 } else { 2652 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2653 } 2654 2655 PetscFunctionReturn(0); 2656 } 2657 2658 #if defined(PETSC_HAVE_ADIC) 2659 #undef __FUNCT__ 2660 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2661 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2662 { 2663 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2664 PetscErrorCode ierr; 2665 2666 PetscFunctionBegin; 2667 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2668 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2669 PetscFunctionReturn(0); 2670 } 2671 #endif 2672 2673 #undef __FUNCT__ 2674 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2675 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2676 { 2677 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2678 PetscErrorCode ierr; 2679 2680 PetscFunctionBegin; 2681 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2682 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2683 PetscFunctionReturn(0); 2684 } 2685 2686 #undef __FUNCT__ 2687 #define __FUNCT__ "MatMerge" 2688 /*@C 2689 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2690 matrices from each processor 2691 2692 Collective on MPI_Comm 2693 2694 Input Parameters: 2695 + comm - the communicators the parallel matrix will live on 2696 . inmat - the input sequential matrices 2697 . n - number of local columns (or PETSC_DECIDE) 2698 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2699 2700 Output Parameter: 2701 . outmat - the parallel matrix generated 2702 2703 Level: advanced 2704 2705 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2706 2707 @*/ 2708 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2709 { 2710 PetscErrorCode ierr; 2711 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2712 PetscInt *indx; 2713 PetscScalar *values; 2714 PetscMap columnmap,rowmap; 2715 2716 PetscFunctionBegin; 2717 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2718 /* 2719 PetscMPIInt rank; 2720 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2721 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N); 2722 */ 2723 if (scall == MAT_INITIAL_MATRIX){ 2724 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2725 if (n == PETSC_DECIDE){ 2726 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2727 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2728 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2729 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2730 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2731 } 2732 2733 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2734 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2735 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2736 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2737 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2738 2739 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2740 for (i=0;i<m;i++) { 2741 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2742 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2743 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2744 } 2745 /* This routine will ONLY return MPIAIJ type matrix */ 2746 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2747 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2748 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2749 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2750 2751 } else if (scall == MAT_REUSE_MATRIX){ 2752 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2753 } else { 2754 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2755 } 2756 2757 for (i=0;i<m;i++) { 2758 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2759 I = i + rstart; 2760 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2761 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2762 } 2763 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2764 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2765 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2766 2767 PetscFunctionReturn(0); 2768 } 2769 2770 #undef __FUNCT__ 2771 #define __FUNCT__ "MatFileSplit" 2772 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2773 { 2774 PetscErrorCode ierr; 2775 PetscMPIInt rank; 2776 PetscInt m,N,i,rstart,nnz; 2777 size_t len; 2778 const PetscInt *indx; 2779 PetscViewer out; 2780 char *name; 2781 Mat B; 2782 const PetscScalar *values; 2783 2784 PetscFunctionBegin; 2785 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2786 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2787 /* Should this be the type of the diagonal block of A? */ 2788 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2789 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2790 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2791 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2792 for (i=0;i<m;i++) { 2793 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2794 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2795 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2796 } 2797 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2798 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2799 2800 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2801 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2802 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2803 sprintf(name,"%s.%d",outfile,rank); 2804 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2805 ierr = PetscFree(name); 2806 ierr = MatView(B,out);CHKERRQ(ierr); 2807 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2808 ierr = MatDestroy(B);CHKERRQ(ierr); 2809 PetscFunctionReturn(0); 2810 } 2811 2812 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2813 #undef __FUNCT__ 2814 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2815 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2816 { 2817 PetscErrorCode ierr; 2818 Mat_Merge_SeqsToMPI *merge; 2819 PetscObjectContainer container; 2820 2821 PetscFunctionBegin; 2822 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2823 if (container) { 2824 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2825 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2826 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2827 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2828 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2829 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2830 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2831 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2832 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2833 if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);} 2834 if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);} 2835 if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);} 2836 2837 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2838 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2839 } 2840 ierr = PetscFree(merge);CHKERRQ(ierr); 2841 2842 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2843 PetscFunctionReturn(0); 2844 } 2845 2846 #include "src/mat/utils/freespace.h" 2847 #include "petscbt.h" 2848 #undef __FUNCT__ 2849 #define __FUNCT__ "MatMerge_SeqsToMPI" 2850 /*@C 2851 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2852 matrices from each processor 2853 2854 Collective on MPI_Comm 2855 2856 Input Parameters: 2857 + comm - the communicators the parallel matrix will live on 2858 . seqmat - the input sequential matrices 2859 . m - number of local rows (or PETSC_DECIDE) 2860 . n - number of local columns (or PETSC_DECIDE) 2861 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2862 2863 Output Parameter: 2864 . mpimat - the parallel matrix generated 2865 2866 Level: advanced 2867 2868 Notes: 2869 The dimensions of the sequential matrix in each processor MUST be the same. 2870 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2871 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2872 @*/ 2873 static PetscEvent logkey_seqstompinum = 0; 2874 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2875 { 2876 PetscErrorCode ierr; 2877 MPI_Comm comm=mpimat->comm; 2878 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2879 PetscMPIInt size,rank,taga,*len_s; 2880 PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2881 PetscInt proc,m; 2882 PetscInt **buf_ri,**buf_rj; 2883 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2884 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2885 MPI_Request *s_waits,*r_waits; 2886 MPI_Status *status; 2887 MatScalar *aa=a->a,**abuf_r,*ba_i; 2888 Mat_Merge_SeqsToMPI *merge; 2889 PetscObjectContainer container; 2890 2891 PetscFunctionBegin; 2892 if (!logkey_seqstompinum) { 2893 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2894 } 2895 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2896 2897 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2898 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2899 2900 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2901 if (container) { 2902 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2903 } 2904 bi = merge->bi; 2905 bj = merge->bj; 2906 buf_ri = merge->buf_ri; 2907 buf_rj = merge->buf_rj; 2908 2909 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2910 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2911 len_s = merge->len_s; 2912 2913 /* send and recv matrix values */ 2914 /*-----------------------------*/ 2915 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2916 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 2917 2918 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2919 for (proc=0,k=0; proc<size; proc++){ 2920 if (!len_s[proc]) continue; 2921 i = owners[proc]; 2922 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 2923 k++; 2924 } 2925 2926 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 2927 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 2928 ierr = PetscFree(status);CHKERRQ(ierr); 2929 2930 ierr = PetscFree(s_waits);CHKERRQ(ierr); 2931 ierr = PetscFree(r_waits);CHKERRQ(ierr); 2932 2933 /* insert mat values of mpimat */ 2934 /*----------------------------*/ 2935 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 2936 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 2937 nextrow = buf_ri_k + merge->nrecv; 2938 nextai = nextrow + merge->nrecv; 2939 2940 for (k=0; k<merge->nrecv; k++){ 2941 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 2942 nrows = *(buf_ri_k[k]); 2943 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 2944 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 2945 } 2946 2947 /* set values of ba */ 2948 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2949 for (i=0; i<m; i++) { 2950 arow = owners[rank] + i; 2951 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 2952 bnzi = bi[i+1] - bi[i]; 2953 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 2954 2955 /* add local non-zero vals of this proc's seqmat into ba */ 2956 anzi = ai[arow+1] - ai[arow]; 2957 aj = a->j + ai[arow]; 2958 aa = a->a + ai[arow]; 2959 nextaj = 0; 2960 for (j=0; nextaj<anzi; j++){ 2961 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2962 ba_i[j] += aa[nextaj++]; 2963 } 2964 } 2965 2966 /* add received vals into ba */ 2967 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 2968 /* i-th row */ 2969 if (i == *nextrow[k]) { 2970 anzi = *(nextai[k]+1) - *nextai[k]; 2971 aj = buf_rj[k] + *(nextai[k]); 2972 aa = abuf_r[k] + *(nextai[k]); 2973 nextaj = 0; 2974 for (j=0; nextaj<anzi; j++){ 2975 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2976 ba_i[j] += aa[nextaj++]; 2977 } 2978 } 2979 nextrow[k]++; nextai[k]++; 2980 } 2981 } 2982 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 2983 } 2984 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2985 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2986 2987 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 2988 ierr = PetscFree(ba_i);CHKERRQ(ierr); 2989 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 2990 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2991 PetscFunctionReturn(0); 2992 } 2993 static PetscEvent logkey_seqstompisym = 0; 2994 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 2995 { 2996 PetscErrorCode ierr; 2997 Mat B_mpi; 2998 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2999 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3000 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3001 PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 3002 PetscInt len,proc,*dnz,*onz; 3003 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3004 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3005 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3006 MPI_Status *status; 3007 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3008 PetscBT lnkbt; 3009 Mat_Merge_SeqsToMPI *merge; 3010 PetscObjectContainer container; 3011 3012 PetscFunctionBegin; 3013 if (!logkey_seqstompisym) { 3014 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3015 } 3016 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3017 3018 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3019 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3020 3021 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3022 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3023 3024 /* determine row ownership */ 3025 /*---------------------------------------------------------*/ 3026 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3027 if (m == PETSC_DECIDE) { 3028 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 3029 } else { 3030 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3031 } 3032 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 3033 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3034 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3035 3036 if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); } 3037 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3038 3039 /* determine the number of messages to send, their lengths */ 3040 /*---------------------------------------------------------*/ 3041 len_s = merge->len_s; 3042 3043 len = 0; /* length of buf_si[] */ 3044 merge->nsend = 0; 3045 for (proc=0; proc<size; proc++){ 3046 len_si[proc] = 0; 3047 if (proc == rank){ 3048 len_s[proc] = 0; 3049 } else { 3050 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3051 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3052 } 3053 if (len_s[proc]) { 3054 merge->nsend++; 3055 nrows = 0; 3056 for (i=owners[proc]; i<owners[proc+1]; i++){ 3057 if (ai[i+1] > ai[i]) nrows++; 3058 } 3059 len_si[proc] = 2*(nrows+1); 3060 len += len_si[proc]; 3061 } 3062 } 3063 3064 /* determine the number and length of messages to receive for ij-structure */ 3065 /*-------------------------------------------------------------------------*/ 3066 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3067 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3068 3069 /* post the Irecv of j-structure */ 3070 /*-------------------------------*/ 3071 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3072 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3073 3074 /* post the Isend of j-structure */ 3075 /*--------------------------------*/ 3076 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3077 sj_waits = si_waits + merge->nsend; 3078 3079 for (proc=0, k=0; proc<size; proc++){ 3080 if (!len_s[proc]) continue; 3081 i = owners[proc]; 3082 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3083 k++; 3084 } 3085 3086 /* receives and sends of j-structure are complete */ 3087 /*------------------------------------------------*/ 3088 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 3089 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 3090 3091 /* send and recv i-structure */ 3092 /*---------------------------*/ 3093 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3094 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3095 3096 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3097 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3098 for (proc=0,k=0; proc<size; proc++){ 3099 if (!len_s[proc]) continue; 3100 /* form outgoing message for i-structure: 3101 buf_si[0]: nrows to be sent 3102 [1:nrows]: row index (global) 3103 [nrows+1:2*nrows+1]: i-structure index 3104 */ 3105 /*-------------------------------------------*/ 3106 nrows = len_si[proc]/2 - 1; 3107 buf_si_i = buf_si + nrows+1; 3108 buf_si[0] = nrows; 3109 buf_si_i[0] = 0; 3110 nrows = 0; 3111 for (i=owners[proc]; i<owners[proc+1]; i++){ 3112 anzi = ai[i+1] - ai[i]; 3113 if (anzi) { 3114 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3115 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3116 nrows++; 3117 } 3118 } 3119 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3120 k++; 3121 buf_si += len_si[proc]; 3122 } 3123 3124 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 3125 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 3126 3127 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3128 for (i=0; i<merge->nrecv; i++){ 3129 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3130 } 3131 3132 ierr = PetscFree(len_si);CHKERRQ(ierr); 3133 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3134 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3135 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3136 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3137 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3138 ierr = PetscFree(status);CHKERRQ(ierr); 3139 3140 /* compute a local seq matrix in each processor */ 3141 /*----------------------------------------------*/ 3142 /* allocate bi array and free space for accumulating nonzero column info */ 3143 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3144 bi[0] = 0; 3145 3146 /* create and initialize a linked list */ 3147 nlnk = N+1; 3148 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3149 3150 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3151 len = 0; 3152 len = ai[owners[rank+1]] - ai[owners[rank]]; 3153 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3154 current_space = free_space; 3155 3156 /* determine symbolic info for each local row */ 3157 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3158 nextrow = buf_ri_k + merge->nrecv; 3159 nextai = nextrow + merge->nrecv; 3160 for (k=0; k<merge->nrecv; k++){ 3161 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3162 nrows = *buf_ri_k[k]; 3163 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3164 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3165 } 3166 3167 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3168 len = 0; 3169 for (i=0;i<m;i++) { 3170 bnzi = 0; 3171 /* add local non-zero cols of this proc's seqmat into lnk */ 3172 arow = owners[rank] + i; 3173 anzi = ai[arow+1] - ai[arow]; 3174 aj = a->j + ai[arow]; 3175 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3176 bnzi += nlnk; 3177 /* add received col data into lnk */ 3178 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3179 if (i == *nextrow[k]) { /* i-th row */ 3180 anzi = *(nextai[k]+1) - *nextai[k]; 3181 aj = buf_rj[k] + *nextai[k]; 3182 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3183 bnzi += nlnk; 3184 nextrow[k]++; nextai[k]++; 3185 } 3186 } 3187 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3188 3189 /* if free space is not available, make more free space */ 3190 if (current_space->local_remaining<bnzi) { 3191 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3192 nspacedouble++; 3193 } 3194 /* copy data into free space, then initialize lnk */ 3195 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3196 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3197 3198 current_space->array += bnzi; 3199 current_space->local_used += bnzi; 3200 current_space->local_remaining -= bnzi; 3201 3202 bi[i+1] = bi[i] + bnzi; 3203 } 3204 3205 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3206 3207 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3208 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3209 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3210 3211 /* create symbolic parallel matrix B_mpi */ 3212 /*---------------------------------------*/ 3213 if (n==PETSC_DECIDE) { 3214 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr); 3215 } else { 3216 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 3217 } 3218 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3219 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3220 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3221 3222 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3223 B_mpi->assembled = PETSC_FALSE; 3224 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3225 merge->bi = bi; 3226 merge->bj = bj; 3227 merge->buf_ri = buf_ri; 3228 merge->buf_rj = buf_rj; 3229 merge->coi = PETSC_NULL; 3230 merge->coj = PETSC_NULL; 3231 merge->owners_co = PETSC_NULL; 3232 3233 /* attach the supporting struct to B_mpi for reuse */ 3234 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3235 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3236 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3237 *mpimat = B_mpi; 3238 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3239 PetscFunctionReturn(0); 3240 } 3241 3242 static PetscEvent logkey_seqstompi = 0; 3243 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3244 { 3245 PetscErrorCode ierr; 3246 3247 PetscFunctionBegin; 3248 if (!logkey_seqstompi) { 3249 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3250 } 3251 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3252 if (scall == MAT_INITIAL_MATRIX){ 3253 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3254 } 3255 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3256 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3257 PetscFunctionReturn(0); 3258 } 3259 static PetscEvent logkey_getlocalmat = 0; 3260 #undef __FUNCT__ 3261 #define __FUNCT__ "MatGetLocalMat" 3262 /*@C 3263 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3264 3265 Not Collective 3266 3267 Input Parameters: 3268 + A - the matrix 3269 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3270 3271 Output Parameter: 3272 . A_loc - the local sequential matrix generated 3273 3274 Level: developer 3275 3276 @*/ 3277 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3278 { 3279 PetscErrorCode ierr; 3280 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3281 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3282 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3283 PetscScalar *aa=a->a,*ba=b->a,*ca; 3284 PetscInt am=A->m,i,j,k,cstart=mpimat->cstart; 3285 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3286 3287 PetscFunctionBegin; 3288 if (!logkey_getlocalmat) { 3289 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3290 } 3291 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3292 if (scall == MAT_INITIAL_MATRIX){ 3293 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3294 ci[0] = 0; 3295 for (i=0; i<am; i++){ 3296 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3297 } 3298 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3299 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3300 k = 0; 3301 for (i=0; i<am; i++) { 3302 ncols_o = bi[i+1] - bi[i]; 3303 ncols_d = ai[i+1] - ai[i]; 3304 /* off-diagonal portion of A */ 3305 for (jo=0; jo<ncols_o; jo++) { 3306 col = cmap[*bj]; 3307 if (col >= cstart) break; 3308 cj[k] = col; bj++; 3309 ca[k++] = *ba++; 3310 } 3311 /* diagonal portion of A */ 3312 for (j=0; j<ncols_d; j++) { 3313 cj[k] = cstart + *aj++; 3314 ca[k++] = *aa++; 3315 } 3316 /* off-diagonal portion of A */ 3317 for (j=jo; j<ncols_o; j++) { 3318 cj[k] = cmap[*bj++]; 3319 ca[k++] = *ba++; 3320 } 3321 } 3322 /* put together the new matrix */ 3323 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3324 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3325 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3326 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3327 mat->freedata = PETSC_TRUE; 3328 mat->nonew = 0; 3329 } else if (scall == MAT_REUSE_MATRIX){ 3330 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3331 ci = mat->i; cj = mat->j; ca = mat->a; 3332 for (i=0; i<am; i++) { 3333 /* off-diagonal portion of A */ 3334 ncols_o = bi[i+1] - bi[i]; 3335 for (jo=0; jo<ncols_o; jo++) { 3336 col = cmap[*bj]; 3337 if (col >= cstart) break; 3338 *ca++ = *ba++; bj++; 3339 } 3340 /* diagonal portion of A */ 3341 ncols_d = ai[i+1] - ai[i]; 3342 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3343 /* off-diagonal portion of A */ 3344 for (j=jo; j<ncols_o; j++) { 3345 *ca++ = *ba++; bj++; 3346 } 3347 } 3348 } else { 3349 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3350 } 3351 3352 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3353 PetscFunctionReturn(0); 3354 } 3355 3356 static PetscEvent logkey_getlocalmatcondensed = 0; 3357 #undef __FUNCT__ 3358 #define __FUNCT__ "MatGetLocalMatCondensed" 3359 /*@C 3360 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3361 3362 Not Collective 3363 3364 Input Parameters: 3365 + A - the matrix 3366 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3367 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3368 3369 Output Parameter: 3370 . A_loc - the local sequential matrix generated 3371 3372 Level: developer 3373 3374 @*/ 3375 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3376 { 3377 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3378 PetscErrorCode ierr; 3379 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3380 IS isrowa,iscola; 3381 Mat *aloc; 3382 3383 PetscFunctionBegin; 3384 if (!logkey_getlocalmatcondensed) { 3385 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3386 } 3387 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3388 if (!row){ 3389 start = a->rstart; end = a->rend; 3390 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3391 } else { 3392 isrowa = *row; 3393 } 3394 if (!col){ 3395 start = a->cstart; 3396 cmap = a->garray; 3397 nzA = a->A->n; 3398 nzB = a->B->n; 3399 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3400 ncols = 0; 3401 for (i=0; i<nzB; i++) { 3402 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3403 else break; 3404 } 3405 imark = i; 3406 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3407 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3408 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3409 ierr = PetscFree(idx);CHKERRQ(ierr); 3410 } else { 3411 iscola = *col; 3412 } 3413 if (scall != MAT_INITIAL_MATRIX){ 3414 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3415 aloc[0] = *A_loc; 3416 } 3417 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3418 *A_loc = aloc[0]; 3419 ierr = PetscFree(aloc);CHKERRQ(ierr); 3420 if (!row){ 3421 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3422 } 3423 if (!col){ 3424 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3425 } 3426 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3427 PetscFunctionReturn(0); 3428 } 3429 3430 static PetscEvent logkey_GetBrowsOfAcols = 0; 3431 #undef __FUNCT__ 3432 #define __FUNCT__ "MatGetBrowsOfAcols" 3433 /*@C 3434 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3435 3436 Collective on Mat 3437 3438 Input Parameters: 3439 + A,B - the matrices in mpiaij format 3440 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3441 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3442 3443 Output Parameter: 3444 + rowb, colb - index sets of rows and columns of B to extract 3445 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3446 - B_seq - the sequential matrix generated 3447 3448 Level: developer 3449 3450 @*/ 3451 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3452 { 3453 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3454 PetscErrorCode ierr; 3455 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3456 IS isrowb,iscolb; 3457 Mat *bseq; 3458 3459 PetscFunctionBegin; 3460 if (a->cstart != b->rstart || a->cend != b->rend){ 3461 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3462 } 3463 if (!logkey_GetBrowsOfAcols) { 3464 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3465 } 3466 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3467 3468 if (scall == MAT_INITIAL_MATRIX){ 3469 start = a->cstart; 3470 cmap = a->garray; 3471 nzA = a->A->n; 3472 nzB = a->B->n; 3473 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3474 ncols = 0; 3475 for (i=0; i<nzB; i++) { /* row < local row index */ 3476 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3477 else break; 3478 } 3479 imark = i; 3480 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3481 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3482 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3483 ierr = PetscFree(idx);CHKERRQ(ierr); 3484 *brstart = imark; 3485 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3486 } else { 3487 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3488 isrowb = *rowb; iscolb = *colb; 3489 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3490 bseq[0] = *B_seq; 3491 } 3492 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3493 *B_seq = bseq[0]; 3494 ierr = PetscFree(bseq);CHKERRQ(ierr); 3495 if (!rowb){ 3496 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3497 } else { 3498 *rowb = isrowb; 3499 } 3500 if (!colb){ 3501 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3502 } else { 3503 *colb = iscolb; 3504 } 3505 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3506 PetscFunctionReturn(0); 3507 } 3508 3509 static PetscEvent logkey_GetBrowsOfAocols = 0; 3510 #undef __FUNCT__ 3511 #define __FUNCT__ "MatGetBrowsOfAoCols" 3512 /*@C 3513 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3514 of the OFF-DIAGONAL portion of local A 3515 3516 Collective on Mat 3517 3518 Input Parameters: 3519 + A,B - the matrices in mpiaij format 3520 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3521 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3522 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3523 3524 Output Parameter: 3525 + B_oth - the sequential matrix generated 3526 3527 Level: developer 3528 3529 @*/ 3530 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3531 { 3532 VecScatter_MPI_General *gen_to,*gen_from; 3533 PetscErrorCode ierr; 3534 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3535 Mat_SeqAIJ *b_oth; 3536 VecScatter ctx=a->Mvctx; 3537 MPI_Comm comm=ctx->comm; 3538 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3539 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj; 3540 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3541 PetscInt i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3542 MPI_Request *rwaits,*swaits; 3543 MPI_Status *sstatus,rstatus; 3544 PetscInt *cols; 3545 PetscScalar *vals; 3546 PetscMPIInt j; 3547 3548 PetscFunctionBegin; 3549 if (a->cstart != b->rstart || a->cend != b->rend){ 3550 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3551 } 3552 if (!logkey_GetBrowsOfAocols) { 3553 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3554 } 3555 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3556 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3557 3558 gen_to = (VecScatter_MPI_General*)ctx->todata; 3559 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3560 rvalues = gen_from->values; /* holds the length of sending row */ 3561 svalues = gen_to->values; /* holds the length of receiving row */ 3562 nrecvs = gen_from->n; 3563 nsends = gen_to->n; 3564 rwaits = gen_from->requests; 3565 swaits = gen_to->requests; 3566 srow = gen_to->indices; /* local row index to be sent */ 3567 rstarts = gen_from->starts; 3568 sstarts = gen_to->starts; 3569 rprocs = gen_from->procs; 3570 sprocs = gen_to->procs; 3571 sstatus = gen_to->sstatus; 3572 3573 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3574 if (scall == MAT_INITIAL_MATRIX){ 3575 /* i-array */ 3576 /*---------*/ 3577 /* post receives */ 3578 for (i=0; i<nrecvs; i++){ 3579 rowlen = (PetscInt*)rvalues + rstarts[i]; 3580 nrows = rstarts[i+1]-rstarts[i]; 3581 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3582 } 3583 3584 /* pack the outgoing message */ 3585 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3586 rstartsj = sstartsj + nsends +1; 3587 sstartsj[0] = 0; rstartsj[0] = 0; 3588 len = 0; /* total length of j or a array to be sent */ 3589 k = 0; 3590 for (i=0; i<nsends; i++){ 3591 rowlen = (PetscInt*)svalues + sstarts[i]; 3592 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3593 for (j=0; j<nrows; j++) { 3594 row = srow[k] + b->rowners[rank]; /* global row idx */ 3595 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3596 len += rowlen[j]; 3597 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3598 k++; 3599 } 3600 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3601 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3602 } 3603 /* recvs and sends of i-array are completed */ 3604 i = nrecvs; 3605 while (i--) { 3606 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3607 } 3608 if (nsends) { 3609 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3610 } 3611 /* allocate buffers for sending j and a arrays */ 3612 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3613 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3614 3615 /* create i-array of B_oth */ 3616 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3617 b_othi[0] = 0; 3618 len = 0; /* total length of j or a array to be received */ 3619 k = 0; 3620 for (i=0; i<nrecvs; i++){ 3621 rowlen = (PetscInt*)rvalues + rstarts[i]; 3622 nrows = rstarts[i+1]-rstarts[i]; 3623 for (j=0; j<nrows; j++) { 3624 b_othi[k+1] = b_othi[k] + rowlen[j]; 3625 len += rowlen[j]; k++; 3626 } 3627 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3628 } 3629 3630 /* allocate space for j and a arrrays of B_oth */ 3631 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3632 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3633 3634 /* j-array */ 3635 /*---------*/ 3636 /* post receives of j-array */ 3637 for (i=0; i<nrecvs; i++){ 3638 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3639 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3640 } 3641 k = 0; 3642 for (i=0; i<nsends; i++){ 3643 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3644 bufJ = bufj+sstartsj[i]; 3645 for (j=0; j<nrows; j++) { 3646 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3647 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3648 for (l=0; l<ncols; l++){ 3649 *bufJ++ = cols[l]; 3650 } 3651 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3652 } 3653 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3654 } 3655 3656 /* recvs and sends of j-array are completed */ 3657 i = nrecvs; 3658 while (i--) { 3659 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3660 } 3661 if (nsends) { 3662 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3663 } 3664 } else if (scall == MAT_REUSE_MATRIX){ 3665 sstartsj = *startsj; 3666 rstartsj = sstartsj + nsends +1; 3667 bufa = *bufa_ptr; 3668 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3669 b_otha = b_oth->a; 3670 } else { 3671 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3672 } 3673 3674 /* a-array */ 3675 /*---------*/ 3676 /* post receives of a-array */ 3677 for (i=0; i<nrecvs; i++){ 3678 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3679 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3680 } 3681 k = 0; 3682 for (i=0; i<nsends; i++){ 3683 nrows = sstarts[i+1]-sstarts[i]; 3684 bufA = bufa+sstartsj[i]; 3685 for (j=0; j<nrows; j++) { 3686 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3687 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3688 for (l=0; l<ncols; l++){ 3689 *bufA++ = vals[l]; 3690 } 3691 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3692 3693 } 3694 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3695 } 3696 /* recvs and sends of a-array are completed */ 3697 i = nrecvs; 3698 while (i--) { 3699 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3700 } 3701 if (nsends) { 3702 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3703 } 3704 3705 if (scall == MAT_INITIAL_MATRIX){ 3706 /* put together the new matrix */ 3707 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3708 3709 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3710 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3711 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3712 b_oth->freedata = PETSC_TRUE; 3713 b_oth->nonew = 0; 3714 3715 ierr = PetscFree(bufj);CHKERRQ(ierr); 3716 if (!startsj || !bufa_ptr){ 3717 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3718 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3719 } else { 3720 *startsj = sstartsj; 3721 *bufa_ptr = bufa; 3722 } 3723 } 3724 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3725 3726 PetscFunctionReturn(0); 3727 } 3728 3729 /*MC 3730 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3731 3732 Options Database Keys: 3733 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3734 3735 Level: beginner 3736 3737 .seealso: MatCreateMPIAIJ 3738 M*/ 3739 3740 EXTERN_C_BEGIN 3741 #undef __FUNCT__ 3742 #define __FUNCT__ "MatCreate_MPIAIJ" 3743 PetscErrorCode MatCreate_MPIAIJ(Mat B) 3744 { 3745 Mat_MPIAIJ *b; 3746 PetscErrorCode ierr; 3747 PetscInt i; 3748 PetscMPIInt size; 3749 3750 PetscFunctionBegin; 3751 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3752 3753 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3754 B->data = (void*)b; 3755 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3756 B->factor = 0; 3757 B->bs = 1; 3758 B->assembled = PETSC_FALSE; 3759 B->mapping = 0; 3760 3761 B->insertmode = NOT_SET_VALUES; 3762 b->size = size; 3763 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3764 3765 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 3766 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 3767 3768 /* the information in the maps duplicates the information computed below, eventually 3769 we should remove the duplicate information that is not contained in the maps */ 3770 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 3771 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 3772 3773 /* build local table of row and column ownerships */ 3774 ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 3775 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 3776 b->cowners = b->rowners + b->size + 2; 3777 ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3778 b->rowners[0] = 0; 3779 for (i=2; i<=b->size; i++) { 3780 b->rowners[i] += b->rowners[i-1]; 3781 } 3782 b->rstart = b->rowners[b->rank]; 3783 b->rend = b->rowners[b->rank+1]; 3784 ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3785 b->cowners[0] = 0; 3786 for (i=2; i<=b->size; i++) { 3787 b->cowners[i] += b->cowners[i-1]; 3788 } 3789 b->cstart = b->cowners[b->rank]; 3790 b->cend = b->cowners[b->rank+1]; 3791 3792 /* build cache for off array entries formed */ 3793 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3794 b->donotstash = PETSC_FALSE; 3795 b->colmap = 0; 3796 b->garray = 0; 3797 b->roworiented = PETSC_TRUE; 3798 3799 /* stuff used for matrix vector multiply */ 3800 b->lvec = PETSC_NULL; 3801 b->Mvctx = PETSC_NULL; 3802 3803 /* stuff for MatGetRow() */ 3804 b->rowindices = 0; 3805 b->rowvalues = 0; 3806 b->getrowactive = PETSC_FALSE; 3807 3808 /* Explicitly create 2 MATSEQAIJ matrices. */ 3809 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 3810 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3811 PetscLogObjectParent(B,b->A); 3812 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 3813 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3814 PetscLogObjectParent(B,b->B); 3815 3816 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3817 "MatStoreValues_MPIAIJ", 3818 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3819 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3820 "MatRetrieveValues_MPIAIJ", 3821 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3822 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3823 "MatGetDiagonalBlock_MPIAIJ", 3824 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3825 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3826 "MatIsTranspose_MPIAIJ", 3827 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3828 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3829 "MatMPIAIJSetPreallocation_MPIAIJ", 3830 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3831 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3832 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3833 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3834 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3835 "MatDiagonalScaleLocal_MPIAIJ", 3836 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3837 PetscFunctionReturn(0); 3838 } 3839 EXTERN_C_END 3840