1 2 #include "src/mat/impls/aij/mpi/mpiaij.h" 3 #include "src/inline/spops.h" 4 5 /* 6 Local utility routine that creates a mapping from the global column 7 number to the local number in the off-diagonal part of the local 8 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 9 a slightly higher hash table cost; without it it is not scalable (each processor 10 has an order N integer array but is fast to acess. 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 17 PetscErrorCode ierr; 18 PetscInt n = aij->B->n,i; 19 20 PetscFunctionBegin; 21 #if defined (PETSC_USE_CTABLE) 22 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 23 for (i=0; i<n; i++){ 24 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 25 } 26 #else 27 ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 28 PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt)); 29 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 30 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 31 #endif 32 PetscFunctionReturn(0); 33 } 34 35 #define CHUNKSIZE 15 36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 40 rmax = aimax[row]; nrow = ailen[row]; \ 41 col1 = col - shift; \ 42 \ 43 low = 0; high = nrow; \ 44 while (high-low > 5) { \ 45 t = (low+high)/2; \ 46 if (rp[t] > col) high = t; \ 47 else low = t; \ 48 } \ 49 for (_i=low; _i<high; _i++) { \ 50 if (rp[_i] > col1) break; \ 51 if (rp[_i] == col1) { \ 52 if (addv == ADD_VALUES) ap[_i] += value; \ 53 else ap[_i] = value; \ 54 goto a_noinsert; \ 55 } \ 56 } \ 57 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 58 if (nonew == 1) goto a_noinsert; \ 59 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 60 if (nrow >= rmax) { \ 61 /* there is no extra room in row, therefore enlarge */ \ 62 PetscInt new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 63 PetscScalar *new_a; \ 64 \ 65 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 66 \ 67 /* malloc new storage space */ \ 68 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \ 69 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 70 new_j = (PetscInt*)(new_a + new_nz); \ 71 new_i = new_j + new_nz; \ 72 \ 73 /* copy over old data into new slots */ \ 74 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 75 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 76 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 77 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 78 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 79 len*sizeof(PetscInt));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 81 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 82 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 83 /* free up old matrix storage */ \ 84 \ 85 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 86 if (!a->singlemalloc) { \ 87 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 88 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 89 } \ 90 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 91 a->singlemalloc = PETSC_TRUE; \ 92 \ 93 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 94 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 95 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \ 96 a->maxnz += CHUNKSIZE; \ 97 a->reallocs++; \ 98 } \ 99 N = nrow++ - 1; a->nz++; \ 100 /* shift up all the later entries in this row */ \ 101 for (ii=N; ii>=_i; ii--) { \ 102 rp[ii+1] = rp[ii]; \ 103 ap[ii+1] = ap[ii]; \ 104 } \ 105 rp[_i] = col1; \ 106 ap[_i] = value; \ 107 a_noinsert: ; \ 108 ailen[row] = nrow; \ 109 } 110 111 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 112 { \ 113 \ 114 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 115 rmax = bimax[row]; nrow = bilen[row]; \ 116 col1 = col - shift; \ 117 \ 118 low = 0; high = nrow; \ 119 while (high-low > 5) { \ 120 t = (low+high)/2; \ 121 if (rp[t] > col) high = t; \ 122 else low = t; \ 123 } \ 124 for (_i=low; _i<high; _i++) { \ 125 if (rp[_i] > col1) break; \ 126 if (rp[_i] == col1) { \ 127 if (addv == ADD_VALUES) ap[_i] += value; \ 128 else ap[_i] = value; \ 129 goto b_noinsert; \ 130 } \ 131 } \ 132 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 133 if (nonew == 1) goto b_noinsert; \ 134 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 135 if (nrow >= rmax) { \ 136 /* there is no extra room in row, therefore enlarge */ \ 137 PetscInt new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 138 PetscScalar *new_a; \ 139 \ 140 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 141 \ 142 /* malloc new storage space */ \ 143 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \ 144 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 145 new_j = (PetscInt*)(new_a + new_nz); \ 146 new_i = new_j + new_nz; \ 147 \ 148 /* copy over old data into new slots */ \ 149 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 150 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 151 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 152 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 153 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 154 len*sizeof(PetscInt));CHKERRQ(ierr); \ 155 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 156 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 157 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 158 /* free up old matrix storage */ \ 159 \ 160 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 161 if (!b->singlemalloc) { \ 162 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 163 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 164 } \ 165 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 166 b->singlemalloc = PETSC_TRUE; \ 167 \ 168 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 169 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 170 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \ 171 b->maxnz += CHUNKSIZE; \ 172 b->reallocs++; \ 173 } \ 174 N = nrow++ - 1; b->nz++; \ 175 /* shift up all the later entries in this row */ \ 176 for (ii=N; ii>=_i; ii--) { \ 177 rp[ii+1] = rp[ii]; \ 178 ap[ii+1] = ap[ii]; \ 179 } \ 180 rp[_i] = col1; \ 181 ap[_i] = value; \ 182 b_noinsert: ; \ 183 bilen[row] = nrow; \ 184 } 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "MatSetValues_MPIAIJ" 188 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 189 { 190 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 191 PetscScalar value; 192 PetscErrorCode ierr; 193 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 194 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 195 PetscTruth roworiented = aij->roworiented; 196 197 /* Some Variables required in the macro */ 198 Mat A = aij->A; 199 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 200 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 201 PetscScalar *aa = a->a; 202 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 203 Mat B = aij->B; 204 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 205 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 206 PetscScalar *ba = b->a; 207 208 PetscInt *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 209 PetscInt nonew = a->nonew,shift=0; 210 PetscScalar *ap; 211 212 PetscFunctionBegin; 213 for (i=0; i<m; i++) { 214 if (im[i] < 0) continue; 215 #if defined(PETSC_USE_DEBUG) 216 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 217 #endif 218 if (im[i] >= rstart && im[i] < rend) { 219 row = im[i] - rstart; 220 for (j=0; j<n; j++) { 221 if (in[j] >= cstart && in[j] < cend){ 222 col = in[j] - cstart; 223 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 224 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 225 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 226 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 227 } else if (in[j] < 0) continue; 228 #if defined(PETSC_USE_DEBUG) 229 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 230 #endif 231 else { 232 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 233 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 234 if (mat->was_assembled) { 235 if (!aij->colmap) { 236 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 237 } 238 #if defined (PETSC_USE_CTABLE) 239 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 240 col--; 241 #else 242 col = aij->colmap[in[j]] - 1; 243 #endif 244 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 245 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 246 col = in[j]; 247 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 248 B = aij->B; 249 b = (Mat_SeqAIJ*)B->data; 250 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 251 ba = b->a; 252 } 253 } else col = in[j]; 254 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 255 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 256 } 257 } 258 } else { 259 if (!aij->donotstash) { 260 if (roworiented) { 261 if (ignorezeroentries && v[i*n] == 0.0) continue; 262 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 263 } else { 264 if (ignorezeroentries && v[i] == 0.0) continue; 265 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 266 } 267 } 268 } 269 } 270 PetscFunctionReturn(0); 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatGetValues_MPIAIJ" 275 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 276 { 277 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 278 PetscErrorCode ierr; 279 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 280 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 281 282 PetscFunctionBegin; 283 for (i=0; i<m; i++) { 284 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 285 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 286 if (idxm[i] >= rstart && idxm[i] < rend) { 287 row = idxm[i] - rstart; 288 for (j=0; j<n; j++) { 289 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 290 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 291 if (idxn[j] >= cstart && idxn[j] < cend){ 292 col = idxn[j] - cstart; 293 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 294 } else { 295 if (!aij->colmap) { 296 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 297 } 298 #if defined (PETSC_USE_CTABLE) 299 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 300 col --; 301 #else 302 col = aij->colmap[idxn[j]] - 1; 303 #endif 304 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 305 else { 306 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 307 } 308 } 309 } 310 } else { 311 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 312 } 313 } 314 PetscFunctionReturn(0); 315 } 316 317 #undef __FUNCT__ 318 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 319 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 320 { 321 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 322 PetscErrorCode ierr; 323 PetscInt nstash,reallocs; 324 InsertMode addv; 325 326 PetscFunctionBegin; 327 if (aij->donotstash) { 328 PetscFunctionReturn(0); 329 } 330 331 /* make sure all processors are either in INSERTMODE or ADDMODE */ 332 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 333 if (addv == (ADD_VALUES|INSERT_VALUES)) { 334 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 335 } 336 mat->insertmode = addv; /* in case this processor had no cache */ 337 338 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 339 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 340 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs); 341 PetscFunctionReturn(0); 342 } 343 344 #undef __FUNCT__ 345 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 346 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 347 { 348 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 349 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 350 PetscErrorCode ierr; 351 PetscMPIInt n; 352 PetscInt i,j,rstart,ncols,flg; 353 PetscInt *row,*col,other_disassembled; 354 PetscScalar *val; 355 InsertMode addv = mat->insertmode; 356 357 PetscFunctionBegin; 358 if (!aij->donotstash) { 359 while (1) { 360 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 361 if (!flg) break; 362 363 for (i=0; i<n;) { 364 /* Now identify the consecutive vals belonging to the same row */ 365 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 366 if (j < n) ncols = j-i; 367 else ncols = n-i; 368 /* Now assemble all these values with a single function call */ 369 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 370 i = j; 371 } 372 } 373 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 374 } 375 a->compressedrow.use = PETSC_FALSE; 376 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 377 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 378 379 /* determine if any processor has disassembled, if so we must 380 also disassemble ourselfs, in order that we may reassemble. */ 381 /* 382 if nonzero structure of submatrix B cannot change then we know that 383 no processor disassembled thus we can skip this stuff 384 */ 385 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 386 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 387 if (mat->was_assembled && !other_disassembled) { 388 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 389 /* reaccess the b because aij->B was changed */ 390 b = (Mat_SeqAIJ *)aij->B->data; 391 } 392 } 393 394 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 395 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 396 } 397 398 b->inode.use = PETSC_FALSE; 399 b->compressedrow.use = PETSC_TRUE; 400 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 401 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 402 403 if (aij->rowvalues) { 404 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 405 aij->rowvalues = 0; 406 } 407 408 /* used by MatAXPY() */ 409 a->xtoy = 0; b->xtoy = 0; 410 a->XtoY = 0; b->XtoY = 0; 411 412 PetscFunctionReturn(0); 413 } 414 415 #undef __FUNCT__ 416 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 417 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 418 { 419 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 420 PetscErrorCode ierr; 421 422 PetscFunctionBegin; 423 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 424 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatZeroRows_MPIAIJ" 430 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 431 { 432 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 433 PetscErrorCode ierr; 434 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag; 435 PetscInt i,N,*rows,*owners = l->rowners; 436 PetscInt *nprocs,j,idx,nsends,row; 437 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 438 PetscInt *rvalues,count,base,slen,*source; 439 PetscInt *lens,*lrows,*values,rstart=l->rstart; 440 MPI_Comm comm = A->comm; 441 MPI_Request *send_waits,*recv_waits; 442 MPI_Status recv_status,*send_status; 443 IS istmp; 444 PetscTruth found; 445 446 PetscFunctionBegin; 447 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 448 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 449 450 /* first count number of contributors to each processor */ 451 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 452 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 453 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 454 for (i=0; i<N; i++) { 455 idx = rows[i]; 456 found = PETSC_FALSE; 457 for (j=0; j<size; j++) { 458 if (idx >= owners[j] && idx < owners[j+1]) { 459 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 460 } 461 } 462 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 463 } 464 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 465 466 /* inform other processors of number of messages and max length*/ 467 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 468 469 /* post receives: */ 470 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 471 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 472 for (i=0; i<nrecvs; i++) { 473 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 474 } 475 476 /* do sends: 477 1) starts[i] gives the starting index in svalues for stuff going to 478 the ith processor 479 */ 480 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 481 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 482 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 483 starts[0] = 0; 484 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 485 for (i=0; i<N; i++) { 486 svalues[starts[owner[i]]++] = rows[i]; 487 } 488 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 489 490 starts[0] = 0; 491 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 492 count = 0; 493 for (i=0; i<size; i++) { 494 if (nprocs[2*i+1]) { 495 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 496 } 497 } 498 ierr = PetscFree(starts);CHKERRQ(ierr); 499 500 base = owners[rank]; 501 502 /* wait on receives */ 503 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 504 source = lens + nrecvs; 505 count = nrecvs; slen = 0; 506 while (count) { 507 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 508 /* unpack receives into our local space */ 509 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 510 source[imdex] = recv_status.MPI_SOURCE; 511 lens[imdex] = n; 512 slen += n; 513 count--; 514 } 515 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 516 517 /* move the data into the send scatter */ 518 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 519 count = 0; 520 for (i=0; i<nrecvs; i++) { 521 values = rvalues + i*nmax; 522 for (j=0; j<lens[i]; j++) { 523 lrows[count++] = values[j] - base; 524 } 525 } 526 ierr = PetscFree(rvalues);CHKERRQ(ierr); 527 ierr = PetscFree(lens);CHKERRQ(ierr); 528 ierr = PetscFree(owner);CHKERRQ(ierr); 529 ierr = PetscFree(nprocs);CHKERRQ(ierr); 530 531 /* actually zap the local rows */ 532 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 533 PetscLogObjectParent(A,istmp); 534 535 /* 536 Zero the required rows. If the "diagonal block" of the matrix 537 is square and the user wishes to set the diagonal we use seperate 538 code so that MatSetValues() is not called for each diagonal allocating 539 new memory, thus calling lots of mallocs and slowing things down. 540 541 Contributed by: Mathew Knepley 542 */ 543 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 544 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 545 if (diag && (l->A->M == l->A->N)) { 546 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 547 } else if (diag) { 548 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 549 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 550 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 551 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 552 } 553 for (i = 0; i < slen; i++) { 554 row = lrows[i] + rstart; 555 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 556 } 557 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 558 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 559 } else { 560 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 561 } 562 ierr = ISDestroy(istmp);CHKERRQ(ierr); 563 ierr = PetscFree(lrows);CHKERRQ(ierr); 564 565 /* wait on sends */ 566 if (nsends) { 567 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 568 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 569 ierr = PetscFree(send_status);CHKERRQ(ierr); 570 } 571 ierr = PetscFree(send_waits);CHKERRQ(ierr); 572 ierr = PetscFree(svalues);CHKERRQ(ierr); 573 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatMult_MPIAIJ" 579 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 580 { 581 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 582 PetscErrorCode ierr; 583 PetscInt nt; 584 585 PetscFunctionBegin; 586 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 587 if (nt != A->n) { 588 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt); 589 } 590 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 591 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 592 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 593 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 594 PetscFunctionReturn(0); 595 } 596 597 #undef __FUNCT__ 598 #define __FUNCT__ "MatMultAdd_MPIAIJ" 599 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 600 { 601 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 602 PetscErrorCode ierr; 603 604 PetscFunctionBegin; 605 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 606 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 607 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 608 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 609 PetscFunctionReturn(0); 610 } 611 612 #undef __FUNCT__ 613 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 614 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 615 { 616 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 617 PetscErrorCode ierr; 618 619 PetscFunctionBegin; 620 /* do nondiagonal part */ 621 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 622 /* send it on its way */ 623 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 624 /* do local part */ 625 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 626 /* receive remote parts: note this assumes the values are not actually */ 627 /* inserted in yy until the next line, which is true for my implementation*/ 628 /* but is not perhaps always true. */ 629 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 630 PetscFunctionReturn(0); 631 } 632 633 EXTERN_C_BEGIN 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 636 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 637 { 638 MPI_Comm comm; 639 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 640 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 641 IS Me,Notme; 642 PetscErrorCode ierr; 643 PetscInt M,N,first,last,*notme,i; 644 PetscMPIInt size; 645 646 PetscFunctionBegin; 647 648 /* Easy test: symmetric diagonal block */ 649 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 650 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 651 if (!*f) PetscFunctionReturn(0); 652 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 653 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 654 if (size == 1) PetscFunctionReturn(0); 655 656 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 657 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 658 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 659 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 660 for (i=0; i<first; i++) notme[i] = i; 661 for (i=last; i<M; i++) notme[i-last+first] = i; 662 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 663 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 664 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 665 Aoff = Aoffs[0]; 666 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 667 Boff = Boffs[0]; 668 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 669 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 670 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 671 ierr = ISDestroy(Me);CHKERRQ(ierr); 672 ierr = ISDestroy(Notme);CHKERRQ(ierr); 673 674 PetscFunctionReturn(0); 675 } 676 EXTERN_C_END 677 678 #undef __FUNCT__ 679 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 680 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 681 { 682 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 683 PetscErrorCode ierr; 684 685 PetscFunctionBegin; 686 /* do nondiagonal part */ 687 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 688 /* send it on its way */ 689 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 690 /* do local part */ 691 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 692 /* receive remote parts: note this assumes the values are not actually */ 693 /* inserted in yy until the next line, which is true for my implementation*/ 694 /* but is not perhaps always true. */ 695 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 696 PetscFunctionReturn(0); 697 } 698 699 /* 700 This only works correctly for square matrices where the subblock A->A is the 701 diagonal block 702 */ 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 705 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 706 { 707 PetscErrorCode ierr; 708 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 709 710 PetscFunctionBegin; 711 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 712 if (a->rstart != a->cstart || a->rend != a->cend) { 713 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 714 } 715 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 716 PetscFunctionReturn(0); 717 } 718 719 #undef __FUNCT__ 720 #define __FUNCT__ "MatScale_MPIAIJ" 721 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 722 { 723 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 724 PetscErrorCode ierr; 725 726 PetscFunctionBegin; 727 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 728 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 729 PetscFunctionReturn(0); 730 } 731 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatDestroy_MPIAIJ" 734 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 735 { 736 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 737 PetscErrorCode ierr; 738 739 PetscFunctionBegin; 740 #if defined(PETSC_USE_LOG) 741 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 742 #endif 743 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 744 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 745 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 746 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 747 #if defined (PETSC_USE_CTABLE) 748 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 749 #else 750 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 751 #endif 752 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 753 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 754 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 755 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 756 ierr = PetscFree(aij);CHKERRQ(ierr); 757 758 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 759 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 760 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 761 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 762 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 763 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 764 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 765 PetscFunctionReturn(0); 766 } 767 768 #undef __FUNCT__ 769 #define __FUNCT__ "MatView_MPIAIJ_Binary" 770 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 771 { 772 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 773 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 774 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 775 PetscErrorCode ierr; 776 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 777 int fd; 778 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 779 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 780 PetscScalar *column_values; 781 782 PetscFunctionBegin; 783 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 784 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 785 nz = A->nz + B->nz; 786 if (!rank) { 787 header[0] = MAT_FILE_COOKIE; 788 header[1] = mat->M; 789 header[2] = mat->N; 790 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 791 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 792 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 793 /* get largest number of rows any processor has */ 794 rlen = mat->m; 795 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 796 for (i=1; i<size; i++) { 797 rlen = PetscMax(rlen,range[i+1] - range[i]); 798 } 799 } else { 800 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 801 rlen = mat->m; 802 } 803 804 /* load up the local row counts */ 805 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 806 for (i=0; i<mat->m; i++) { 807 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 808 } 809 810 /* store the row lengths to the file */ 811 if (!rank) { 812 MPI_Status status; 813 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 814 for (i=1; i<size; i++) { 815 rlen = range[i+1] - range[i]; 816 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 817 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 818 } 819 } else { 820 ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 821 } 822 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 823 824 /* load up the local column indices */ 825 nzmax = nz; /* )th processor needs space a largest processor needs */ 826 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 827 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 828 cnt = 0; 829 for (i=0; i<mat->m; i++) { 830 for (j=B->i[i]; j<B->i[i+1]; j++) { 831 if ( (col = garray[B->j[j]]) > cstart) break; 832 column_indices[cnt++] = col; 833 } 834 for (k=A->i[i]; k<A->i[i+1]; k++) { 835 column_indices[cnt++] = A->j[k] + cstart; 836 } 837 for (; j<B->i[i+1]; j++) { 838 column_indices[cnt++] = garray[B->j[j]]; 839 } 840 } 841 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 842 843 /* store the column indices to the file */ 844 if (!rank) { 845 MPI_Status status; 846 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 847 for (i=1; i<size; i++) { 848 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 849 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 850 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 851 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 852 } 853 } else { 854 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 855 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 856 } 857 ierr = PetscFree(column_indices);CHKERRQ(ierr); 858 859 /* load up the local column values */ 860 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 861 cnt = 0; 862 for (i=0; i<mat->m; i++) { 863 for (j=B->i[i]; j<B->i[i+1]; j++) { 864 if ( garray[B->j[j]] > cstart) break; 865 column_values[cnt++] = B->a[j]; 866 } 867 for (k=A->i[i]; k<A->i[i+1]; k++) { 868 column_values[cnt++] = A->a[k]; 869 } 870 for (; j<B->i[i+1]; j++) { 871 column_values[cnt++] = B->a[j]; 872 } 873 } 874 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 875 876 /* store the column values to the file */ 877 if (!rank) { 878 MPI_Status status; 879 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 880 for (i=1; i<size; i++) { 881 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 882 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 883 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 884 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 885 } 886 } else { 887 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 888 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 889 } 890 ierr = PetscFree(column_values);CHKERRQ(ierr); 891 PetscFunctionReturn(0); 892 } 893 894 #undef __FUNCT__ 895 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 896 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 897 { 898 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 899 PetscErrorCode ierr; 900 PetscMPIInt rank = aij->rank,size = aij->size; 901 PetscTruth isdraw,iascii,isbinary; 902 PetscViewer sviewer; 903 PetscViewerFormat format; 904 905 PetscFunctionBegin; 906 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 907 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 908 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 909 if (iascii) { 910 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 911 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 912 MatInfo info; 913 Mat_SeqAIJ *a = (Mat_SeqAIJ*) aij->A->data; 914 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 915 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 916 if (!a->inode.size) { 917 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 918 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 919 } else { 920 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 921 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 922 } 923 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 924 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 925 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 926 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 927 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 928 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 929 PetscFunctionReturn(0); 930 } else if (format == PETSC_VIEWER_ASCII_INFO) { 931 Mat_SeqAIJ *a = (Mat_SeqAIJ*) aij->A->data; 932 if (a->inode.size) { 933 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",a->inode.node_count,a->inode.limit);CHKERRQ(ierr); 934 } else { 935 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 936 } 937 PetscFunctionReturn(0); 938 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 939 PetscFunctionReturn(0); 940 } 941 } else if (isbinary) { 942 if (size == 1) { 943 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 944 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 945 } else { 946 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 947 } 948 PetscFunctionReturn(0); 949 } else if (isdraw) { 950 PetscDraw draw; 951 PetscTruth isnull; 952 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 953 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 954 } 955 956 if (size == 1) { 957 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 958 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 959 } else { 960 /* assemble the entire matrix onto first processor. */ 961 Mat A; 962 Mat_SeqAIJ *Aloc; 963 PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 964 PetscScalar *a; 965 966 if (!rank) { 967 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 968 } else { 969 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 970 } 971 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 972 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 973 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 974 PetscLogObjectParent(mat,A); 975 976 /* copy over the A part */ 977 Aloc = (Mat_SeqAIJ*)aij->A->data; 978 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 979 row = aij->rstart; 980 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 981 for (i=0; i<m; i++) { 982 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 983 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 984 } 985 aj = Aloc->j; 986 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 987 988 /* copy over the B part */ 989 Aloc = (Mat_SeqAIJ*)aij->B->data; 990 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 991 row = aij->rstart; 992 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 993 ct = cols; 994 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 995 for (i=0; i<m; i++) { 996 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 997 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 998 } 999 ierr = PetscFree(ct);CHKERRQ(ierr); 1000 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1001 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1002 /* 1003 Everyone has to call to draw the matrix since the graphics waits are 1004 synchronized across all processors that share the PetscDraw object 1005 */ 1006 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1007 if (!rank) { 1008 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1009 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1010 } 1011 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1012 ierr = MatDestroy(A);CHKERRQ(ierr); 1013 } 1014 PetscFunctionReturn(0); 1015 } 1016 1017 #undef __FUNCT__ 1018 #define __FUNCT__ "MatView_MPIAIJ" 1019 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1020 { 1021 PetscErrorCode ierr; 1022 PetscTruth iascii,isdraw,issocket,isbinary; 1023 1024 PetscFunctionBegin; 1025 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1026 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1027 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1028 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1029 if (iascii || isdraw || isbinary || issocket) { 1030 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1031 } else { 1032 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1033 } 1034 PetscFunctionReturn(0); 1035 } 1036 1037 1038 1039 #undef __FUNCT__ 1040 #define __FUNCT__ "MatRelax_MPIAIJ" 1041 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1042 { 1043 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1044 PetscErrorCode ierr; 1045 Vec bb1; 1046 PetscScalar mone=-1.0; 1047 1048 PetscFunctionBegin; 1049 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1050 1051 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1052 1053 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1054 if (flag & SOR_ZERO_INITIAL_GUESS) { 1055 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1056 its--; 1057 } 1058 1059 while (its--) { 1060 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1061 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1062 1063 /* update rhs: bb1 = bb - B*x */ 1064 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1065 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1066 1067 /* local sweep */ 1068 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1069 CHKERRQ(ierr); 1070 } 1071 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1072 if (flag & SOR_ZERO_INITIAL_GUESS) { 1073 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1074 its--; 1075 } 1076 while (its--) { 1077 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1078 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1079 1080 /* update rhs: bb1 = bb - B*x */ 1081 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1082 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1083 1084 /* local sweep */ 1085 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1086 CHKERRQ(ierr); 1087 } 1088 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1089 if (flag & SOR_ZERO_INITIAL_GUESS) { 1090 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1091 its--; 1092 } 1093 while (its--) { 1094 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1095 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1096 1097 /* update rhs: bb1 = bb - B*x */ 1098 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1099 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1100 1101 /* local sweep */ 1102 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1103 CHKERRQ(ierr); 1104 } 1105 } else { 1106 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1107 } 1108 1109 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1110 PetscFunctionReturn(0); 1111 } 1112 1113 #undef __FUNCT__ 1114 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1115 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1116 { 1117 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1118 Mat A = mat->A,B = mat->B; 1119 PetscErrorCode ierr; 1120 PetscReal isend[5],irecv[5]; 1121 1122 PetscFunctionBegin; 1123 info->block_size = 1.0; 1124 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1125 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1126 isend[3] = info->memory; isend[4] = info->mallocs; 1127 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1128 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1129 isend[3] += info->memory; isend[4] += info->mallocs; 1130 if (flag == MAT_LOCAL) { 1131 info->nz_used = isend[0]; 1132 info->nz_allocated = isend[1]; 1133 info->nz_unneeded = isend[2]; 1134 info->memory = isend[3]; 1135 info->mallocs = isend[4]; 1136 } else if (flag == MAT_GLOBAL_MAX) { 1137 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1138 info->nz_used = irecv[0]; 1139 info->nz_allocated = irecv[1]; 1140 info->nz_unneeded = irecv[2]; 1141 info->memory = irecv[3]; 1142 info->mallocs = irecv[4]; 1143 } else if (flag == MAT_GLOBAL_SUM) { 1144 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1145 info->nz_used = irecv[0]; 1146 info->nz_allocated = irecv[1]; 1147 info->nz_unneeded = irecv[2]; 1148 info->memory = irecv[3]; 1149 info->mallocs = irecv[4]; 1150 } 1151 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1152 info->fill_ratio_needed = 0; 1153 info->factor_mallocs = 0; 1154 info->rows_global = (double)matin->M; 1155 info->columns_global = (double)matin->N; 1156 info->rows_local = (double)matin->m; 1157 info->columns_local = (double)matin->N; 1158 1159 PetscFunctionReturn(0); 1160 } 1161 1162 #undef __FUNCT__ 1163 #define __FUNCT__ "MatSetOption_MPIAIJ" 1164 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1165 { 1166 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1167 PetscErrorCode ierr; 1168 1169 PetscFunctionBegin; 1170 switch (op) { 1171 case MAT_NO_NEW_NONZERO_LOCATIONS: 1172 case MAT_YES_NEW_NONZERO_LOCATIONS: 1173 case MAT_COLUMNS_UNSORTED: 1174 case MAT_COLUMNS_SORTED: 1175 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1176 case MAT_KEEP_ZEROED_ROWS: 1177 case MAT_NEW_NONZERO_LOCATION_ERR: 1178 case MAT_USE_INODES: 1179 case MAT_DO_NOT_USE_INODES: 1180 case MAT_IGNORE_ZERO_ENTRIES: 1181 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1182 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1183 break; 1184 case MAT_ROW_ORIENTED: 1185 a->roworiented = PETSC_TRUE; 1186 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1187 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1188 break; 1189 case MAT_ROWS_SORTED: 1190 case MAT_ROWS_UNSORTED: 1191 case MAT_YES_NEW_DIAGONALS: 1192 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1193 break; 1194 case MAT_COLUMN_ORIENTED: 1195 a->roworiented = PETSC_FALSE; 1196 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1197 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1198 break; 1199 case MAT_IGNORE_OFF_PROC_ENTRIES: 1200 a->donotstash = PETSC_TRUE; 1201 break; 1202 case MAT_NO_NEW_DIAGONALS: 1203 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1204 case MAT_SYMMETRIC: 1205 case MAT_STRUCTURALLY_SYMMETRIC: 1206 case MAT_HERMITIAN: 1207 case MAT_SYMMETRY_ETERNAL: 1208 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1209 break; 1210 case MAT_NOT_SYMMETRIC: 1211 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1212 case MAT_NOT_HERMITIAN: 1213 case MAT_NOT_SYMMETRY_ETERNAL: 1214 break; 1215 default: 1216 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1217 } 1218 PetscFunctionReturn(0); 1219 } 1220 1221 #undef __FUNCT__ 1222 #define __FUNCT__ "MatGetRow_MPIAIJ" 1223 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1224 { 1225 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1226 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1227 PetscErrorCode ierr; 1228 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1229 PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1230 PetscInt *cmap,*idx_p; 1231 1232 PetscFunctionBegin; 1233 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1234 mat->getrowactive = PETSC_TRUE; 1235 1236 if (!mat->rowvalues && (idx || v)) { 1237 /* 1238 allocate enough space to hold information from the longest row. 1239 */ 1240 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1241 PetscInt max = 1,tmp; 1242 for (i=0; i<matin->m; i++) { 1243 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1244 if (max < tmp) { max = tmp; } 1245 } 1246 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1247 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1248 } 1249 1250 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1251 lrow = row - rstart; 1252 1253 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1254 if (!v) {pvA = 0; pvB = 0;} 1255 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1256 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1257 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1258 nztot = nzA + nzB; 1259 1260 cmap = mat->garray; 1261 if (v || idx) { 1262 if (nztot) { 1263 /* Sort by increasing column numbers, assuming A and B already sorted */ 1264 PetscInt imark = -1; 1265 if (v) { 1266 *v = v_p = mat->rowvalues; 1267 for (i=0; i<nzB; i++) { 1268 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1269 else break; 1270 } 1271 imark = i; 1272 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1273 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1274 } 1275 if (idx) { 1276 *idx = idx_p = mat->rowindices; 1277 if (imark > -1) { 1278 for (i=0; i<imark; i++) { 1279 idx_p[i] = cmap[cworkB[i]]; 1280 } 1281 } else { 1282 for (i=0; i<nzB; i++) { 1283 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1284 else break; 1285 } 1286 imark = i; 1287 } 1288 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1289 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1290 } 1291 } else { 1292 if (idx) *idx = 0; 1293 if (v) *v = 0; 1294 } 1295 } 1296 *nz = nztot; 1297 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1298 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1299 PetscFunctionReturn(0); 1300 } 1301 1302 #undef __FUNCT__ 1303 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1304 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1305 { 1306 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1307 1308 PetscFunctionBegin; 1309 if (!aij->getrowactive) { 1310 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1311 } 1312 aij->getrowactive = PETSC_FALSE; 1313 PetscFunctionReturn(0); 1314 } 1315 1316 #undef __FUNCT__ 1317 #define __FUNCT__ "MatNorm_MPIAIJ" 1318 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1319 { 1320 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1321 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1322 PetscErrorCode ierr; 1323 PetscInt i,j,cstart = aij->cstart; 1324 PetscReal sum = 0.0; 1325 PetscScalar *v; 1326 1327 PetscFunctionBegin; 1328 if (aij->size == 1) { 1329 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1330 } else { 1331 if (type == NORM_FROBENIUS) { 1332 v = amat->a; 1333 for (i=0; i<amat->nz; i++) { 1334 #if defined(PETSC_USE_COMPLEX) 1335 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1336 #else 1337 sum += (*v)*(*v); v++; 1338 #endif 1339 } 1340 v = bmat->a; 1341 for (i=0; i<bmat->nz; i++) { 1342 #if defined(PETSC_USE_COMPLEX) 1343 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1344 #else 1345 sum += (*v)*(*v); v++; 1346 #endif 1347 } 1348 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1349 *norm = sqrt(*norm); 1350 } else if (type == NORM_1) { /* max column norm */ 1351 PetscReal *tmp,*tmp2; 1352 PetscInt *jj,*garray = aij->garray; 1353 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1354 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1355 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1356 *norm = 0.0; 1357 v = amat->a; jj = amat->j; 1358 for (j=0; j<amat->nz; j++) { 1359 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1360 } 1361 v = bmat->a; jj = bmat->j; 1362 for (j=0; j<bmat->nz; j++) { 1363 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1364 } 1365 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1366 for (j=0; j<mat->N; j++) { 1367 if (tmp2[j] > *norm) *norm = tmp2[j]; 1368 } 1369 ierr = PetscFree(tmp);CHKERRQ(ierr); 1370 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1371 } else if (type == NORM_INFINITY) { /* max row norm */ 1372 PetscReal ntemp = 0.0; 1373 for (j=0; j<aij->A->m; j++) { 1374 v = amat->a + amat->i[j]; 1375 sum = 0.0; 1376 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1377 sum += PetscAbsScalar(*v); v++; 1378 } 1379 v = bmat->a + bmat->i[j]; 1380 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1381 sum += PetscAbsScalar(*v); v++; 1382 } 1383 if (sum > ntemp) ntemp = sum; 1384 } 1385 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1386 } else { 1387 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1388 } 1389 } 1390 PetscFunctionReturn(0); 1391 } 1392 1393 #undef __FUNCT__ 1394 #define __FUNCT__ "MatTranspose_MPIAIJ" 1395 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1396 { 1397 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1398 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1399 PetscErrorCode ierr; 1400 PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1401 Mat B; 1402 PetscScalar *array; 1403 1404 PetscFunctionBegin; 1405 if (!matout && M != N) { 1406 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1407 } 1408 1409 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1410 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1411 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1412 1413 /* copy over the A part */ 1414 Aloc = (Mat_SeqAIJ*)a->A->data; 1415 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1416 row = a->rstart; 1417 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1418 for (i=0; i<m; i++) { 1419 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1420 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1421 } 1422 aj = Aloc->j; 1423 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1424 1425 /* copy over the B part */ 1426 Aloc = (Mat_SeqAIJ*)a->B->data; 1427 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1428 row = a->rstart; 1429 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1430 ct = cols; 1431 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1432 for (i=0; i<m; i++) { 1433 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1434 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1435 } 1436 ierr = PetscFree(ct);CHKERRQ(ierr); 1437 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1438 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1439 if (matout) { 1440 *matout = B; 1441 } else { 1442 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1443 } 1444 PetscFunctionReturn(0); 1445 } 1446 1447 #undef __FUNCT__ 1448 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1449 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1450 { 1451 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1452 Mat a = aij->A,b = aij->B; 1453 PetscErrorCode ierr; 1454 PetscInt s1,s2,s3; 1455 1456 PetscFunctionBegin; 1457 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1458 if (rr) { 1459 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1460 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1461 /* Overlap communication with computation. */ 1462 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1463 } 1464 if (ll) { 1465 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1466 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1467 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1468 } 1469 /* scale the diagonal block */ 1470 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1471 1472 if (rr) { 1473 /* Do a scatter end and then right scale the off-diagonal block */ 1474 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1475 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1476 } 1477 1478 PetscFunctionReturn(0); 1479 } 1480 1481 1482 #undef __FUNCT__ 1483 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1484 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1485 { 1486 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1487 PetscErrorCode ierr; 1488 1489 PetscFunctionBegin; 1490 if (!a->rank) { 1491 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1492 } 1493 PetscFunctionReturn(0); 1494 } 1495 1496 #undef __FUNCT__ 1497 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1498 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1499 { 1500 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1501 PetscErrorCode ierr; 1502 1503 PetscFunctionBegin; 1504 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1505 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1506 PetscFunctionReturn(0); 1507 } 1508 #undef __FUNCT__ 1509 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1510 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1511 { 1512 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1513 PetscErrorCode ierr; 1514 1515 PetscFunctionBegin; 1516 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1517 PetscFunctionReturn(0); 1518 } 1519 1520 #undef __FUNCT__ 1521 #define __FUNCT__ "MatEqual_MPIAIJ" 1522 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1523 { 1524 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1525 Mat a,b,c,d; 1526 PetscTruth flg; 1527 PetscErrorCode ierr; 1528 1529 PetscFunctionBegin; 1530 a = matA->A; b = matA->B; 1531 c = matB->A; d = matB->B; 1532 1533 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1534 if (flg) { 1535 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1536 } 1537 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1538 PetscFunctionReturn(0); 1539 } 1540 1541 #undef __FUNCT__ 1542 #define __FUNCT__ "MatCopy_MPIAIJ" 1543 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1544 { 1545 PetscErrorCode ierr; 1546 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1547 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1548 1549 PetscFunctionBegin; 1550 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1551 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1552 /* because of the column compression in the off-processor part of the matrix a->B, 1553 the number of columns in a->B and b->B may be different, hence we cannot call 1554 the MatCopy() directly on the two parts. If need be, we can provide a more 1555 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1556 then copying the submatrices */ 1557 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1558 } else { 1559 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1560 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1561 } 1562 PetscFunctionReturn(0); 1563 } 1564 1565 #undef __FUNCT__ 1566 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1567 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1568 { 1569 PetscErrorCode ierr; 1570 1571 PetscFunctionBegin; 1572 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1573 PetscFunctionReturn(0); 1574 } 1575 1576 #include "petscblaslapack.h" 1577 #undef __FUNCT__ 1578 #define __FUNCT__ "MatAXPY_MPIAIJ" 1579 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1580 { 1581 PetscErrorCode ierr; 1582 PetscInt i; 1583 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1584 PetscBLASInt bnz,one=1; 1585 Mat_SeqAIJ *x,*y; 1586 1587 PetscFunctionBegin; 1588 if (str == SAME_NONZERO_PATTERN) { 1589 x = (Mat_SeqAIJ *)xx->A->data; 1590 y = (Mat_SeqAIJ *)yy->A->data; 1591 bnz = (PetscBLASInt)x->nz; 1592 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1593 x = (Mat_SeqAIJ *)xx->B->data; 1594 y = (Mat_SeqAIJ *)yy->B->data; 1595 bnz = (PetscBLASInt)x->nz; 1596 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1597 } else if (str == SUBSET_NONZERO_PATTERN) { 1598 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1599 1600 x = (Mat_SeqAIJ *)xx->B->data; 1601 y = (Mat_SeqAIJ *)yy->B->data; 1602 if (y->xtoy && y->XtoY != xx->B) { 1603 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1604 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1605 } 1606 if (!y->xtoy) { /* get xtoy */ 1607 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1608 y->XtoY = xx->B; 1609 } 1610 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1611 } else { 1612 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1613 } 1614 PetscFunctionReturn(0); 1615 } 1616 1617 /* -------------------------------------------------------------------*/ 1618 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1619 MatGetRow_MPIAIJ, 1620 MatRestoreRow_MPIAIJ, 1621 MatMult_MPIAIJ, 1622 /* 4*/ MatMultAdd_MPIAIJ, 1623 MatMultTranspose_MPIAIJ, 1624 MatMultTransposeAdd_MPIAIJ, 1625 0, 1626 0, 1627 0, 1628 /*10*/ 0, 1629 0, 1630 0, 1631 MatRelax_MPIAIJ, 1632 MatTranspose_MPIAIJ, 1633 /*15*/ MatGetInfo_MPIAIJ, 1634 MatEqual_MPIAIJ, 1635 MatGetDiagonal_MPIAIJ, 1636 MatDiagonalScale_MPIAIJ, 1637 MatNorm_MPIAIJ, 1638 /*20*/ MatAssemblyBegin_MPIAIJ, 1639 MatAssemblyEnd_MPIAIJ, 1640 0, 1641 MatSetOption_MPIAIJ, 1642 MatZeroEntries_MPIAIJ, 1643 /*25*/ MatZeroRows_MPIAIJ, 1644 0, 1645 0, 1646 0, 1647 0, 1648 /*30*/ MatSetUpPreallocation_MPIAIJ, 1649 0, 1650 0, 1651 0, 1652 0, 1653 /*35*/ MatDuplicate_MPIAIJ, 1654 0, 1655 0, 1656 0, 1657 0, 1658 /*40*/ MatAXPY_MPIAIJ, 1659 MatGetSubMatrices_MPIAIJ, 1660 MatIncreaseOverlap_MPIAIJ, 1661 MatGetValues_MPIAIJ, 1662 MatCopy_MPIAIJ, 1663 /*45*/ MatPrintHelp_MPIAIJ, 1664 MatScale_MPIAIJ, 1665 0, 1666 0, 1667 0, 1668 /*50*/ MatSetBlockSize_MPIAIJ, 1669 0, 1670 0, 1671 0, 1672 0, 1673 /*55*/ MatFDColoringCreate_MPIAIJ, 1674 0, 1675 MatSetUnfactored_MPIAIJ, 1676 0, 1677 0, 1678 /*60*/ MatGetSubMatrix_MPIAIJ, 1679 MatDestroy_MPIAIJ, 1680 MatView_MPIAIJ, 1681 MatGetPetscMaps_Petsc, 1682 0, 1683 /*65*/ 0, 1684 0, 1685 0, 1686 0, 1687 0, 1688 /*70*/ 0, 1689 0, 1690 MatSetColoring_MPIAIJ, 1691 #if defined(PETSC_HAVE_ADIC) 1692 MatSetValuesAdic_MPIAIJ, 1693 #else 1694 0, 1695 #endif 1696 MatSetValuesAdifor_MPIAIJ, 1697 /*75*/ 0, 1698 0, 1699 0, 1700 0, 1701 0, 1702 /*80*/ 0, 1703 0, 1704 0, 1705 0, 1706 /*84*/ MatLoad_MPIAIJ, 1707 0, 1708 0, 1709 0, 1710 0, 1711 0, 1712 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1713 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1714 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1715 MatPtAP_MPIAIJ_MPIAIJ, 1716 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1717 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 1718 0, 1719 0, 1720 0}; 1721 1722 /* ----------------------------------------------------------------------------------------*/ 1723 1724 EXTERN_C_BEGIN 1725 #undef __FUNCT__ 1726 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1727 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 1728 { 1729 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1730 PetscErrorCode ierr; 1731 1732 PetscFunctionBegin; 1733 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1734 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1735 PetscFunctionReturn(0); 1736 } 1737 EXTERN_C_END 1738 1739 EXTERN_C_BEGIN 1740 #undef __FUNCT__ 1741 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1742 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 1743 { 1744 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1745 PetscErrorCode ierr; 1746 1747 PetscFunctionBegin; 1748 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1749 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1750 PetscFunctionReturn(0); 1751 } 1752 EXTERN_C_END 1753 1754 #include "petscpc.h" 1755 EXTERN_C_BEGIN 1756 #undef __FUNCT__ 1757 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1758 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1759 { 1760 Mat_MPIAIJ *b; 1761 PetscErrorCode ierr; 1762 PetscInt i; 1763 1764 PetscFunctionBegin; 1765 B->preallocated = PETSC_TRUE; 1766 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1767 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1768 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1769 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1770 if (d_nnz) { 1771 for (i=0; i<B->m; i++) { 1772 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1773 } 1774 } 1775 if (o_nnz) { 1776 for (i=0; i<B->m; i++) { 1777 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1778 } 1779 } 1780 b = (Mat_MPIAIJ*)B->data; 1781 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1782 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1783 1784 PetscFunctionReturn(0); 1785 } 1786 EXTERN_C_END 1787 1788 #undef __FUNCT__ 1789 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1790 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1791 { 1792 Mat mat; 1793 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1794 PetscErrorCode ierr; 1795 1796 PetscFunctionBegin; 1797 *newmat = 0; 1798 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1799 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1800 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1801 a = (Mat_MPIAIJ*)mat->data; 1802 1803 mat->factor = matin->factor; 1804 mat->bs = matin->bs; 1805 mat->assembled = PETSC_TRUE; 1806 mat->insertmode = NOT_SET_VALUES; 1807 mat->preallocated = PETSC_TRUE; 1808 1809 a->rstart = oldmat->rstart; 1810 a->rend = oldmat->rend; 1811 a->cstart = oldmat->cstart; 1812 a->cend = oldmat->cend; 1813 a->size = oldmat->size; 1814 a->rank = oldmat->rank; 1815 a->donotstash = oldmat->donotstash; 1816 a->roworiented = oldmat->roworiented; 1817 a->rowindices = 0; 1818 a->rowvalues = 0; 1819 a->getrowactive = PETSC_FALSE; 1820 1821 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1822 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1823 if (oldmat->colmap) { 1824 #if defined (PETSC_USE_CTABLE) 1825 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1826 #else 1827 ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1828 PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt)); 1829 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1830 #endif 1831 } else a->colmap = 0; 1832 if (oldmat->garray) { 1833 PetscInt len; 1834 len = oldmat->B->n; 1835 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1836 PetscLogObjectMemory(mat,len*sizeof(PetscInt)); 1837 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1838 } else a->garray = 0; 1839 1840 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1841 PetscLogObjectParent(mat,a->lvec); 1842 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1843 PetscLogObjectParent(mat,a->Mvctx); 1844 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1845 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1846 PetscLogObjectParent(mat,a->A); 1847 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1848 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1849 PetscLogObjectParent(mat,a->B); 1850 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1851 *newmat = mat; 1852 PetscFunctionReturn(0); 1853 } 1854 1855 #include "petscsys.h" 1856 1857 #undef __FUNCT__ 1858 #define __FUNCT__ "MatLoad_MPIAIJ" 1859 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1860 { 1861 Mat A; 1862 PetscScalar *vals,*svals; 1863 MPI_Comm comm = ((PetscObject)viewer)->comm; 1864 MPI_Status status; 1865 PetscErrorCode ierr; 1866 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1867 PetscInt i,nz,j,rstart,rend; 1868 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1869 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1870 PetscInt cend,cstart,n,*rowners; 1871 int fd; 1872 1873 PetscFunctionBegin; 1874 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1875 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1876 if (!rank) { 1877 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1878 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1879 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1880 } 1881 1882 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1883 M = header[1]; N = header[2]; 1884 /* determine ownership of all rows */ 1885 m = M/size + ((M % size) > rank); 1886 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1887 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1888 rowners[0] = 0; 1889 for (i=2; i<=size; i++) { 1890 rowners[i] += rowners[i-1]; 1891 } 1892 rstart = rowners[rank]; 1893 rend = rowners[rank+1]; 1894 1895 /* distribute row lengths to all processors */ 1896 ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr); 1897 if (!rank) { 1898 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 1899 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1900 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1901 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1902 for (j=0; j<m; j++) { 1903 procsnz[0] += ourlens[j]; 1904 } 1905 for (i=1; i<size; i++) { 1906 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 1907 /* calculate the number of nonzeros on each processor */ 1908 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 1909 procsnz[i] += rowlengths[j]; 1910 } 1911 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1912 } 1913 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1914 } else { 1915 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1916 } 1917 1918 if (!rank) { 1919 /* determine max buffer needed and allocate it */ 1920 maxnz = 0; 1921 for (i=0; i<size; i++) { 1922 maxnz = PetscMax(maxnz,procsnz[i]); 1923 } 1924 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1925 1926 /* read in my part of the matrix column indices */ 1927 nz = procsnz[0]; 1928 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1929 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1930 1931 /* read in every one elses and ship off */ 1932 for (i=1; i<size; i++) { 1933 nz = procsnz[i]; 1934 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1935 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1936 } 1937 ierr = PetscFree(cols);CHKERRQ(ierr); 1938 } else { 1939 /* determine buffer space needed for message */ 1940 nz = 0; 1941 for (i=0; i<m; i++) { 1942 nz += ourlens[i]; 1943 } 1944 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1945 1946 /* receive message of column indices*/ 1947 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1948 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1949 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1950 } 1951 1952 /* determine column ownership if matrix is not square */ 1953 if (N != M) { 1954 n = N/size + ((N % size) > rank); 1955 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1956 cstart = cend - n; 1957 } else { 1958 cstart = rstart; 1959 cend = rend; 1960 n = cend - cstart; 1961 } 1962 1963 /* loop over local rows, determining number of off diagonal entries */ 1964 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1965 jj = 0; 1966 for (i=0; i<m; i++) { 1967 for (j=0; j<ourlens[i]; j++) { 1968 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1969 jj++; 1970 } 1971 } 1972 1973 /* create our matrix */ 1974 for (i=0; i<m; i++) { 1975 ourlens[i] -= offlens[i]; 1976 } 1977 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 1978 ierr = MatSetType(A,type);CHKERRQ(ierr); 1979 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 1980 1981 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1982 for (i=0; i<m; i++) { 1983 ourlens[i] += offlens[i]; 1984 } 1985 1986 if (!rank) { 1987 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1988 1989 /* read in my part of the matrix numerical values */ 1990 nz = procsnz[0]; 1991 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1992 1993 /* insert into matrix */ 1994 jj = rstart; 1995 smycols = mycols; 1996 svals = vals; 1997 for (i=0; i<m; i++) { 1998 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1999 smycols += ourlens[i]; 2000 svals += ourlens[i]; 2001 jj++; 2002 } 2003 2004 /* read in other processors and ship out */ 2005 for (i=1; i<size; i++) { 2006 nz = procsnz[i]; 2007 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2008 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2009 } 2010 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2011 } else { 2012 /* receive numeric values */ 2013 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2014 2015 /* receive message of values*/ 2016 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2017 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2018 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2019 2020 /* insert into matrix */ 2021 jj = rstart; 2022 smycols = mycols; 2023 svals = vals; 2024 for (i=0; i<m; i++) { 2025 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2026 smycols += ourlens[i]; 2027 svals += ourlens[i]; 2028 jj++; 2029 } 2030 } 2031 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2032 ierr = PetscFree(vals);CHKERRQ(ierr); 2033 ierr = PetscFree(mycols);CHKERRQ(ierr); 2034 ierr = PetscFree(rowners);CHKERRQ(ierr); 2035 2036 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2037 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2038 *newmat = A; 2039 PetscFunctionReturn(0); 2040 } 2041 2042 #undef __FUNCT__ 2043 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2044 /* 2045 Not great since it makes two copies of the submatrix, first an SeqAIJ 2046 in local and then by concatenating the local matrices the end result. 2047 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2048 */ 2049 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2050 { 2051 PetscErrorCode ierr; 2052 PetscMPIInt rank,size; 2053 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2054 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2055 Mat *local,M,Mreuse; 2056 PetscScalar *vwork,*aa; 2057 MPI_Comm comm = mat->comm; 2058 Mat_SeqAIJ *aij; 2059 2060 2061 PetscFunctionBegin; 2062 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2063 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2064 2065 if (call == MAT_REUSE_MATRIX) { 2066 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2067 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2068 local = &Mreuse; 2069 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2070 } else { 2071 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2072 Mreuse = *local; 2073 ierr = PetscFree(local);CHKERRQ(ierr); 2074 } 2075 2076 /* 2077 m - number of local rows 2078 n - number of columns (same on all processors) 2079 rstart - first row in new global matrix generated 2080 */ 2081 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2082 if (call == MAT_INITIAL_MATRIX) { 2083 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2084 ii = aij->i; 2085 jj = aij->j; 2086 2087 /* 2088 Determine the number of non-zeros in the diagonal and off-diagonal 2089 portions of the matrix in order to do correct preallocation 2090 */ 2091 2092 /* first get start and end of "diagonal" columns */ 2093 if (csize == PETSC_DECIDE) { 2094 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2095 if (mglobal == n) { /* square matrix */ 2096 nlocal = m; 2097 } else { 2098 nlocal = n/size + ((n % size) > rank); 2099 } 2100 } else { 2101 nlocal = csize; 2102 } 2103 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2104 rstart = rend - nlocal; 2105 if (rank == size - 1 && rend != n) { 2106 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2107 } 2108 2109 /* next, compute all the lengths */ 2110 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2111 olens = dlens + m; 2112 for (i=0; i<m; i++) { 2113 jend = ii[i+1] - ii[i]; 2114 olen = 0; 2115 dlen = 0; 2116 for (j=0; j<jend; j++) { 2117 if (*jj < rstart || *jj >= rend) olen++; 2118 else dlen++; 2119 jj++; 2120 } 2121 olens[i] = olen; 2122 dlens[i] = dlen; 2123 } 2124 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2125 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2126 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2127 ierr = PetscFree(dlens);CHKERRQ(ierr); 2128 } else { 2129 PetscInt ml,nl; 2130 2131 M = *newmat; 2132 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2133 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2134 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2135 /* 2136 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2137 rather than the slower MatSetValues(). 2138 */ 2139 M->was_assembled = PETSC_TRUE; 2140 M->assembled = PETSC_FALSE; 2141 } 2142 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2143 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2144 ii = aij->i; 2145 jj = aij->j; 2146 aa = aij->a; 2147 for (i=0; i<m; i++) { 2148 row = rstart + i; 2149 nz = ii[i+1] - ii[i]; 2150 cwork = jj; jj += nz; 2151 vwork = aa; aa += nz; 2152 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2153 } 2154 2155 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2156 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2157 *newmat = M; 2158 2159 /* save submatrix used in processor for next request */ 2160 if (call == MAT_INITIAL_MATRIX) { 2161 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2162 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2163 } 2164 2165 PetscFunctionReturn(0); 2166 } 2167 2168 EXTERN_C_BEGIN 2169 #undef __FUNCT__ 2170 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2171 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2172 { 2173 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2174 PetscInt m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2175 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2176 const PetscInt *JJ; 2177 PetscScalar *values; 2178 PetscErrorCode ierr; 2179 2180 PetscFunctionBegin; 2181 #if defined(PETSC_OPT_g) 2182 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2183 #endif 2184 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2185 o_nnz = d_nnz + m; 2186 2187 for (i=0; i<m; i++) { 2188 nnz = I[i+1]- I[i]; 2189 JJ = J + I[i]; 2190 nnz_max = PetscMax(nnz_max,nnz); 2191 #if defined(PETSC_OPT_g) 2192 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2193 #endif 2194 for (j=0; j<nnz; j++) { 2195 if (*JJ >= cstart) break; 2196 JJ++; 2197 } 2198 d = 0; 2199 for (; j<nnz; j++) { 2200 if (*JJ++ >= cend) break; 2201 d++; 2202 } 2203 d_nnz[i] = d; 2204 o_nnz[i] = nnz - d; 2205 } 2206 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2207 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2208 2209 if (v) values = (PetscScalar*)v; 2210 else { 2211 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2212 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2213 } 2214 2215 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2216 for (i=0; i<m; i++) { 2217 ii = i + rstart; 2218 nnz = I[i+1]- I[i]; 2219 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr); 2220 } 2221 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2222 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2223 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2224 2225 if (!v) { 2226 ierr = PetscFree(values);CHKERRQ(ierr); 2227 } 2228 PetscFunctionReturn(0); 2229 } 2230 EXTERN_C_END 2231 2232 #undef __FUNCT__ 2233 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2234 /*@C 2235 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2236 (the default parallel PETSc format). 2237 2238 Collective on MPI_Comm 2239 2240 Input Parameters: 2241 + A - the matrix 2242 . i - the indices into j for the start of each local row (starts with zero) 2243 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2244 - v - optional values in the matrix 2245 2246 Level: developer 2247 2248 .keywords: matrix, aij, compressed row, sparse, parallel 2249 2250 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2251 @*/ 2252 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2253 { 2254 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2255 2256 PetscFunctionBegin; 2257 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2258 if (f) { 2259 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2260 } 2261 PetscFunctionReturn(0); 2262 } 2263 2264 #undef __FUNCT__ 2265 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2266 /*@C 2267 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2268 (the default parallel PETSc format). For good matrix assembly performance 2269 the user should preallocate the matrix storage by setting the parameters 2270 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2271 performance can be increased by more than a factor of 50. 2272 2273 Collective on MPI_Comm 2274 2275 Input Parameters: 2276 + A - the matrix 2277 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2278 (same value is used for all local rows) 2279 . d_nnz - array containing the number of nonzeros in the various rows of the 2280 DIAGONAL portion of the local submatrix (possibly different for each row) 2281 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2282 The size of this array is equal to the number of local rows, i.e 'm'. 2283 You must leave room for the diagonal entry even if it is zero. 2284 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2285 submatrix (same value is used for all local rows). 2286 - o_nnz - array containing the number of nonzeros in the various rows of the 2287 OFF-DIAGONAL portion of the local submatrix (possibly different for 2288 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2289 structure. The size of this array is equal to the number 2290 of local rows, i.e 'm'. 2291 2292 If the *_nnz parameter is given then the *_nz parameter is ignored 2293 2294 The AIJ format (also called the Yale sparse matrix format or 2295 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2296 storage. The stored row and column indices begin with zero. See the users manual for details. 2297 2298 The parallel matrix is partitioned such that the first m0 rows belong to 2299 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2300 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2301 2302 The DIAGONAL portion of the local submatrix of a processor can be defined 2303 as the submatrix which is obtained by extraction the part corresponding 2304 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2305 first row that belongs to the processor, and r2 is the last row belonging 2306 to the this processor. This is a square mxm matrix. The remaining portion 2307 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2308 2309 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2310 2311 Example usage: 2312 2313 Consider the following 8x8 matrix with 34 non-zero values, that is 2314 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2315 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2316 as follows: 2317 2318 .vb 2319 1 2 0 | 0 3 0 | 0 4 2320 Proc0 0 5 6 | 7 0 0 | 8 0 2321 9 0 10 | 11 0 0 | 12 0 2322 ------------------------------------- 2323 13 0 14 | 15 16 17 | 0 0 2324 Proc1 0 18 0 | 19 20 21 | 0 0 2325 0 0 0 | 22 23 0 | 24 0 2326 ------------------------------------- 2327 Proc2 25 26 27 | 0 0 28 | 29 0 2328 30 0 0 | 31 32 33 | 0 34 2329 .ve 2330 2331 This can be represented as a collection of submatrices as: 2332 2333 .vb 2334 A B C 2335 D E F 2336 G H I 2337 .ve 2338 2339 Where the submatrices A,B,C are owned by proc0, D,E,F are 2340 owned by proc1, G,H,I are owned by proc2. 2341 2342 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2343 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2344 The 'M','N' parameters are 8,8, and have the same values on all procs. 2345 2346 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2347 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2348 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2349 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2350 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2351 matrix, ans [DF] as another SeqAIJ matrix. 2352 2353 When d_nz, o_nz parameters are specified, d_nz storage elements are 2354 allocated for every row of the local diagonal submatrix, and o_nz 2355 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2356 One way to choose d_nz and o_nz is to use the max nonzerors per local 2357 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2358 In this case, the values of d_nz,o_nz are: 2359 .vb 2360 proc0 : dnz = 2, o_nz = 2 2361 proc1 : dnz = 3, o_nz = 2 2362 proc2 : dnz = 1, o_nz = 4 2363 .ve 2364 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2365 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2366 for proc3. i.e we are using 12+15+10=37 storage locations to store 2367 34 values. 2368 2369 When d_nnz, o_nnz parameters are specified, the storage is specified 2370 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2371 In the above case the values for d_nnz,o_nnz are: 2372 .vb 2373 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2374 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2375 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2376 .ve 2377 Here the space allocated is sum of all the above values i.e 34, and 2378 hence pre-allocation is perfect. 2379 2380 Level: intermediate 2381 2382 .keywords: matrix, aij, compressed row, sparse, parallel 2383 2384 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2385 MPIAIJ 2386 @*/ 2387 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2388 { 2389 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2390 2391 PetscFunctionBegin; 2392 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2393 if (f) { 2394 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2395 } 2396 PetscFunctionReturn(0); 2397 } 2398 2399 #undef __FUNCT__ 2400 #define __FUNCT__ "MatCreateMPIAIJ" 2401 /*@C 2402 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2403 (the default parallel PETSc format). For good matrix assembly performance 2404 the user should preallocate the matrix storage by setting the parameters 2405 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2406 performance can be increased by more than a factor of 50. 2407 2408 Collective on MPI_Comm 2409 2410 Input Parameters: 2411 + comm - MPI communicator 2412 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2413 This value should be the same as the local size used in creating the 2414 y vector for the matrix-vector product y = Ax. 2415 . n - This value should be the same as the local size used in creating the 2416 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2417 calculated if N is given) For square matrices n is almost always m. 2418 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2419 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2420 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2421 (same value is used for all local rows) 2422 . d_nnz - array containing the number of nonzeros in the various rows of the 2423 DIAGONAL portion of the local submatrix (possibly different for each row) 2424 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2425 The size of this array is equal to the number of local rows, i.e 'm'. 2426 You must leave room for the diagonal entry even if it is zero. 2427 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2428 submatrix (same value is used for all local rows). 2429 - o_nnz - array containing the number of nonzeros in the various rows of the 2430 OFF-DIAGONAL portion of the local submatrix (possibly different for 2431 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2432 structure. The size of this array is equal to the number 2433 of local rows, i.e 'm'. 2434 2435 Output Parameter: 2436 . A - the matrix 2437 2438 Notes: 2439 If the *_nnz parameter is given then the *_nz parameter is ignored 2440 2441 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2442 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2443 storage requirements for this matrix. 2444 2445 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2446 processor than it must be used on all processors that share the object for 2447 that argument. 2448 2449 The user MUST specify either the local or global matrix dimensions 2450 (possibly both). 2451 2452 The parallel matrix is partitioned such that the first m0 rows belong to 2453 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2454 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2455 2456 The DIAGONAL portion of the local submatrix of a processor can be defined 2457 as the submatrix which is obtained by extraction the part corresponding 2458 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2459 first row that belongs to the processor, and r2 is the last row belonging 2460 to the this processor. This is a square mxm matrix. The remaining portion 2461 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2462 2463 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2464 2465 When calling this routine with a single process communicator, a matrix of 2466 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2467 type of communicator, use the construction mechanism: 2468 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2469 2470 By default, this format uses inodes (identical nodes) when possible. 2471 We search for consecutive rows with the same nonzero structure, thereby 2472 reusing matrix information to achieve increased efficiency. 2473 2474 Options Database Keys: 2475 + -mat_aij_no_inode - Do not use inodes 2476 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2477 - -mat_aij_oneindex - Internally use indexing starting at 1 2478 rather than 0. Note that when calling MatSetValues(), 2479 the user still MUST index entries starting at 0! 2480 2481 2482 Example usage: 2483 2484 Consider the following 8x8 matrix with 34 non-zero values, that is 2485 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2486 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2487 as follows: 2488 2489 .vb 2490 1 2 0 | 0 3 0 | 0 4 2491 Proc0 0 5 6 | 7 0 0 | 8 0 2492 9 0 10 | 11 0 0 | 12 0 2493 ------------------------------------- 2494 13 0 14 | 15 16 17 | 0 0 2495 Proc1 0 18 0 | 19 20 21 | 0 0 2496 0 0 0 | 22 23 0 | 24 0 2497 ------------------------------------- 2498 Proc2 25 26 27 | 0 0 28 | 29 0 2499 30 0 0 | 31 32 33 | 0 34 2500 .ve 2501 2502 This can be represented as a collection of submatrices as: 2503 2504 .vb 2505 A B C 2506 D E F 2507 G H I 2508 .ve 2509 2510 Where the submatrices A,B,C are owned by proc0, D,E,F are 2511 owned by proc1, G,H,I are owned by proc2. 2512 2513 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2514 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2515 The 'M','N' parameters are 8,8, and have the same values on all procs. 2516 2517 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2518 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2519 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2520 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2521 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2522 matrix, ans [DF] as another SeqAIJ matrix. 2523 2524 When d_nz, o_nz parameters are specified, d_nz storage elements are 2525 allocated for every row of the local diagonal submatrix, and o_nz 2526 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2527 One way to choose d_nz and o_nz is to use the max nonzerors per local 2528 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2529 In this case, the values of d_nz,o_nz are: 2530 .vb 2531 proc0 : dnz = 2, o_nz = 2 2532 proc1 : dnz = 3, o_nz = 2 2533 proc2 : dnz = 1, o_nz = 4 2534 .ve 2535 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2536 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2537 for proc3. i.e we are using 12+15+10=37 storage locations to store 2538 34 values. 2539 2540 When d_nnz, o_nnz parameters are specified, the storage is specified 2541 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2542 In the above case the values for d_nnz,o_nnz are: 2543 .vb 2544 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2545 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2546 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2547 .ve 2548 Here the space allocated is sum of all the above values i.e 34, and 2549 hence pre-allocation is perfect. 2550 2551 Level: intermediate 2552 2553 .keywords: matrix, aij, compressed row, sparse, parallel 2554 2555 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2556 MPIAIJ 2557 @*/ 2558 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2559 { 2560 PetscErrorCode ierr; 2561 PetscMPIInt size; 2562 2563 PetscFunctionBegin; 2564 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2565 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2566 if (size > 1) { 2567 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2568 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2569 } else { 2570 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2571 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2572 } 2573 PetscFunctionReturn(0); 2574 } 2575 2576 #undef __FUNCT__ 2577 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2578 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2579 { 2580 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2581 2582 PetscFunctionBegin; 2583 *Ad = a->A; 2584 *Ao = a->B; 2585 *colmap = a->garray; 2586 PetscFunctionReturn(0); 2587 } 2588 2589 #undef __FUNCT__ 2590 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2591 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2592 { 2593 PetscErrorCode ierr; 2594 PetscInt i; 2595 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2596 2597 PetscFunctionBegin; 2598 if (coloring->ctype == IS_COLORING_LOCAL) { 2599 ISColoringValue *allcolors,*colors; 2600 ISColoring ocoloring; 2601 2602 /* set coloring for diagonal portion */ 2603 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2604 2605 /* set coloring for off-diagonal portion */ 2606 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2607 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2608 for (i=0; i<a->B->n; i++) { 2609 colors[i] = allcolors[a->garray[i]]; 2610 } 2611 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2612 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2613 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2614 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2615 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2616 ISColoringValue *colors; 2617 PetscInt *larray; 2618 ISColoring ocoloring; 2619 2620 /* set coloring for diagonal portion */ 2621 ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2622 for (i=0; i<a->A->n; i++) { 2623 larray[i] = i + a->cstart; 2624 } 2625 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2626 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2627 for (i=0; i<a->A->n; i++) { 2628 colors[i] = coloring->colors[larray[i]]; 2629 } 2630 ierr = PetscFree(larray);CHKERRQ(ierr); 2631 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2632 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2633 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2634 2635 /* set coloring for off-diagonal portion */ 2636 ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2637 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2638 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2639 for (i=0; i<a->B->n; i++) { 2640 colors[i] = coloring->colors[larray[i]]; 2641 } 2642 ierr = PetscFree(larray);CHKERRQ(ierr); 2643 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2644 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2645 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2646 } else { 2647 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2648 } 2649 2650 PetscFunctionReturn(0); 2651 } 2652 2653 #if defined(PETSC_HAVE_ADIC) 2654 #undef __FUNCT__ 2655 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2656 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2657 { 2658 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2659 PetscErrorCode ierr; 2660 2661 PetscFunctionBegin; 2662 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2663 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2664 PetscFunctionReturn(0); 2665 } 2666 #endif 2667 2668 #undef __FUNCT__ 2669 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2670 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2671 { 2672 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2673 PetscErrorCode ierr; 2674 2675 PetscFunctionBegin; 2676 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2677 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2678 PetscFunctionReturn(0); 2679 } 2680 2681 #undef __FUNCT__ 2682 #define __FUNCT__ "MatMerge" 2683 /*@C 2684 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2685 matrices from each processor 2686 2687 Collective on MPI_Comm 2688 2689 Input Parameters: 2690 + comm - the communicators the parallel matrix will live on 2691 . inmat - the input sequential matrices 2692 . n - number of local columns (or PETSC_DECIDE) 2693 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2694 2695 Output Parameter: 2696 . outmat - the parallel matrix generated 2697 2698 Level: advanced 2699 2700 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2701 2702 @*/ 2703 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2704 { 2705 PetscErrorCode ierr; 2706 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2707 PetscInt *indx; 2708 PetscScalar *values; 2709 PetscMap columnmap,rowmap; 2710 2711 PetscFunctionBegin; 2712 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2713 /* 2714 PetscMPIInt rank; 2715 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2716 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N); 2717 */ 2718 if (scall == MAT_INITIAL_MATRIX){ 2719 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2720 if (n == PETSC_DECIDE){ 2721 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2722 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2723 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2724 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2725 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2726 } 2727 2728 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2729 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2730 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2731 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2732 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2733 2734 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2735 for (i=0;i<m;i++) { 2736 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2737 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2738 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2739 } 2740 /* This routine will ONLY return MPIAIJ type matrix */ 2741 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2742 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2743 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2744 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2745 2746 } else if (scall == MAT_REUSE_MATRIX){ 2747 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2748 } else { 2749 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2750 } 2751 2752 for (i=0;i<m;i++) { 2753 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2754 I = i + rstart; 2755 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2756 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2757 } 2758 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2759 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2760 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2761 2762 PetscFunctionReturn(0); 2763 } 2764 2765 #undef __FUNCT__ 2766 #define __FUNCT__ "MatFileSplit" 2767 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2768 { 2769 PetscErrorCode ierr; 2770 PetscMPIInt rank; 2771 PetscInt m,N,i,rstart,nnz; 2772 size_t len; 2773 const PetscInt *indx; 2774 PetscViewer out; 2775 char *name; 2776 Mat B; 2777 const PetscScalar *values; 2778 2779 PetscFunctionBegin; 2780 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2781 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2782 /* Should this be the type of the diagonal block of A? */ 2783 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2784 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2785 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2786 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2787 for (i=0;i<m;i++) { 2788 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2789 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2790 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2791 } 2792 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2793 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2794 2795 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2796 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2797 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2798 sprintf(name,"%s.%d",outfile,rank); 2799 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2800 ierr = PetscFree(name); 2801 ierr = MatView(B,out);CHKERRQ(ierr); 2802 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2803 ierr = MatDestroy(B);CHKERRQ(ierr); 2804 PetscFunctionReturn(0); 2805 } 2806 2807 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2808 #undef __FUNCT__ 2809 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2810 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2811 { 2812 PetscErrorCode ierr; 2813 Mat_Merge_SeqsToMPI *merge; 2814 PetscObjectContainer container; 2815 2816 PetscFunctionBegin; 2817 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2818 if (container) { 2819 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2820 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2821 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2822 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2823 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2824 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2825 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2826 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2827 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2828 if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);} 2829 if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);} 2830 if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);} 2831 2832 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2833 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2834 } 2835 ierr = PetscFree(merge);CHKERRQ(ierr); 2836 2837 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2838 PetscFunctionReturn(0); 2839 } 2840 2841 #include "src/mat/utils/freespace.h" 2842 #include "petscbt.h" 2843 #undef __FUNCT__ 2844 #define __FUNCT__ "MatMerge_SeqsToMPI" 2845 /*@C 2846 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2847 matrices from each processor 2848 2849 Collective on MPI_Comm 2850 2851 Input Parameters: 2852 + comm - the communicators the parallel matrix will live on 2853 . seqmat - the input sequential matrices 2854 . m - number of local rows (or PETSC_DECIDE) 2855 . n - number of local columns (or PETSC_DECIDE) 2856 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2857 2858 Output Parameter: 2859 . mpimat - the parallel matrix generated 2860 2861 Level: advanced 2862 2863 Notes: 2864 The dimensions of the sequential matrix in each processor MUST be the same. 2865 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2866 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2867 @*/ 2868 static PetscEvent logkey_seqstompinum = 0; 2869 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2870 { 2871 PetscErrorCode ierr; 2872 MPI_Comm comm=mpimat->comm; 2873 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2874 PetscMPIInt size,rank,taga,*len_s; 2875 PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2876 PetscInt proc,m; 2877 PetscInt **buf_ri,**buf_rj; 2878 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2879 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2880 MPI_Request *s_waits,*r_waits; 2881 MPI_Status *status; 2882 MatScalar *aa=a->a,**abuf_r,*ba_i; 2883 Mat_Merge_SeqsToMPI *merge; 2884 PetscObjectContainer container; 2885 2886 PetscFunctionBegin; 2887 if (!logkey_seqstompinum) { 2888 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2889 } 2890 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2891 2892 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2893 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2894 2895 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2896 if (container) { 2897 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2898 } 2899 bi = merge->bi; 2900 bj = merge->bj; 2901 buf_ri = merge->buf_ri; 2902 buf_rj = merge->buf_rj; 2903 2904 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2905 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2906 len_s = merge->len_s; 2907 2908 /* send and recv matrix values */ 2909 /*-----------------------------*/ 2910 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2911 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 2912 2913 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2914 for (proc=0,k=0; proc<size; proc++){ 2915 if (!len_s[proc]) continue; 2916 i = owners[proc]; 2917 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 2918 k++; 2919 } 2920 2921 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 2922 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 2923 ierr = PetscFree(status);CHKERRQ(ierr); 2924 2925 ierr = PetscFree(s_waits);CHKERRQ(ierr); 2926 ierr = PetscFree(r_waits);CHKERRQ(ierr); 2927 2928 /* insert mat values of mpimat */ 2929 /*----------------------------*/ 2930 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 2931 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 2932 nextrow = buf_ri_k + merge->nrecv; 2933 nextai = nextrow + merge->nrecv; 2934 2935 for (k=0; k<merge->nrecv; k++){ 2936 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 2937 nrows = *(buf_ri_k[k]); 2938 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 2939 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 2940 } 2941 2942 /* set values of ba */ 2943 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2944 for (i=0; i<m; i++) { 2945 arow = owners[rank] + i; 2946 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 2947 bnzi = bi[i+1] - bi[i]; 2948 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 2949 2950 /* add local non-zero vals of this proc's seqmat into ba */ 2951 anzi = ai[arow+1] - ai[arow]; 2952 aj = a->j + ai[arow]; 2953 aa = a->a + ai[arow]; 2954 nextaj = 0; 2955 for (j=0; nextaj<anzi; j++){ 2956 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2957 ba_i[j] += aa[nextaj++]; 2958 } 2959 } 2960 2961 /* add received vals into ba */ 2962 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 2963 /* i-th row */ 2964 if (i == *nextrow[k]) { 2965 anzi = *(nextai[k]+1) - *nextai[k]; 2966 aj = buf_rj[k] + *(nextai[k]); 2967 aa = abuf_r[k] + *(nextai[k]); 2968 nextaj = 0; 2969 for (j=0; nextaj<anzi; j++){ 2970 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2971 ba_i[j] += aa[nextaj++]; 2972 } 2973 } 2974 nextrow[k]++; nextai[k]++; 2975 } 2976 } 2977 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 2978 } 2979 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2980 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2981 2982 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 2983 ierr = PetscFree(ba_i);CHKERRQ(ierr); 2984 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 2985 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2986 PetscFunctionReturn(0); 2987 } 2988 static PetscEvent logkey_seqstompisym = 0; 2989 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 2990 { 2991 PetscErrorCode ierr; 2992 Mat B_mpi; 2993 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2994 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 2995 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 2996 PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 2997 PetscInt len,proc,*dnz,*onz; 2998 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 2999 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3000 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3001 MPI_Status *status; 3002 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3003 PetscBT lnkbt; 3004 Mat_Merge_SeqsToMPI *merge; 3005 PetscObjectContainer container; 3006 3007 PetscFunctionBegin; 3008 if (!logkey_seqstompisym) { 3009 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3010 } 3011 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3012 3013 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3014 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3015 3016 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3017 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3018 3019 /* determine row ownership */ 3020 /*---------------------------------------------------------*/ 3021 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3022 if (m == PETSC_DECIDE) { 3023 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 3024 } else { 3025 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3026 } 3027 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 3028 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3029 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3030 3031 if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); } 3032 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3033 3034 /* determine the number of messages to send, their lengths */ 3035 /*---------------------------------------------------------*/ 3036 len_s = merge->len_s; 3037 3038 len = 0; /* length of buf_si[] */ 3039 merge->nsend = 0; 3040 for (proc=0; proc<size; proc++){ 3041 len_si[proc] = 0; 3042 if (proc == rank){ 3043 len_s[proc] = 0; 3044 } else { 3045 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3046 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3047 } 3048 if (len_s[proc]) { 3049 merge->nsend++; 3050 nrows = 0; 3051 for (i=owners[proc]; i<owners[proc+1]; i++){ 3052 if (ai[i+1] > ai[i]) nrows++; 3053 } 3054 len_si[proc] = 2*(nrows+1); 3055 len += len_si[proc]; 3056 } 3057 } 3058 3059 /* determine the number and length of messages to receive for ij-structure */ 3060 /*-------------------------------------------------------------------------*/ 3061 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3062 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3063 3064 /* post the Irecv of j-structure */ 3065 /*-------------------------------*/ 3066 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3067 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3068 3069 /* post the Isend of j-structure */ 3070 /*--------------------------------*/ 3071 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3072 sj_waits = si_waits + merge->nsend; 3073 3074 for (proc=0, k=0; proc<size; proc++){ 3075 if (!len_s[proc]) continue; 3076 i = owners[proc]; 3077 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3078 k++; 3079 } 3080 3081 /* receives and sends of j-structure are complete */ 3082 /*------------------------------------------------*/ 3083 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 3084 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 3085 3086 /* send and recv i-structure */ 3087 /*---------------------------*/ 3088 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3089 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3090 3091 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3092 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3093 for (proc=0,k=0; proc<size; proc++){ 3094 if (!len_s[proc]) continue; 3095 /* form outgoing message for i-structure: 3096 buf_si[0]: nrows to be sent 3097 [1:nrows]: row index (global) 3098 [nrows+1:2*nrows+1]: i-structure index 3099 */ 3100 /*-------------------------------------------*/ 3101 nrows = len_si[proc]/2 - 1; 3102 buf_si_i = buf_si + nrows+1; 3103 buf_si[0] = nrows; 3104 buf_si_i[0] = 0; 3105 nrows = 0; 3106 for (i=owners[proc]; i<owners[proc+1]; i++){ 3107 anzi = ai[i+1] - ai[i]; 3108 if (anzi) { 3109 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3110 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3111 nrows++; 3112 } 3113 } 3114 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3115 k++; 3116 buf_si += len_si[proc]; 3117 } 3118 3119 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 3120 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 3121 3122 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3123 for (i=0; i<merge->nrecv; i++){ 3124 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3125 } 3126 3127 ierr = PetscFree(len_si);CHKERRQ(ierr); 3128 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3129 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3130 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3131 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3132 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3133 ierr = PetscFree(status);CHKERRQ(ierr); 3134 3135 /* compute a local seq matrix in each processor */ 3136 /*----------------------------------------------*/ 3137 /* allocate bi array and free space for accumulating nonzero column info */ 3138 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3139 bi[0] = 0; 3140 3141 /* create and initialize a linked list */ 3142 nlnk = N+1; 3143 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3144 3145 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3146 len = 0; 3147 len = ai[owners[rank+1]] - ai[owners[rank]]; 3148 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3149 current_space = free_space; 3150 3151 /* determine symbolic info for each local row */ 3152 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3153 nextrow = buf_ri_k + merge->nrecv; 3154 nextai = nextrow + merge->nrecv; 3155 for (k=0; k<merge->nrecv; k++){ 3156 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3157 nrows = *buf_ri_k[k]; 3158 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3159 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3160 } 3161 3162 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3163 len = 0; 3164 for (i=0;i<m;i++) { 3165 bnzi = 0; 3166 /* add local non-zero cols of this proc's seqmat into lnk */ 3167 arow = owners[rank] + i; 3168 anzi = ai[arow+1] - ai[arow]; 3169 aj = a->j + ai[arow]; 3170 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3171 bnzi += nlnk; 3172 /* add received col data into lnk */ 3173 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3174 if (i == *nextrow[k]) { /* i-th row */ 3175 anzi = *(nextai[k]+1) - *nextai[k]; 3176 aj = buf_rj[k] + *nextai[k]; 3177 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3178 bnzi += nlnk; 3179 nextrow[k]++; nextai[k]++; 3180 } 3181 } 3182 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3183 3184 /* if free space is not available, make more free space */ 3185 if (current_space->local_remaining<bnzi) { 3186 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3187 nspacedouble++; 3188 } 3189 /* copy data into free space, then initialize lnk */ 3190 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3191 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3192 3193 current_space->array += bnzi; 3194 current_space->local_used += bnzi; 3195 current_space->local_remaining -= bnzi; 3196 3197 bi[i+1] = bi[i] + bnzi; 3198 } 3199 3200 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3201 3202 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3203 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3204 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3205 3206 /* create symbolic parallel matrix B_mpi */ 3207 /*---------------------------------------*/ 3208 if (n==PETSC_DECIDE) { 3209 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr); 3210 } else { 3211 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 3212 } 3213 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3214 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3215 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3216 3217 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3218 B_mpi->assembled = PETSC_FALSE; 3219 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3220 merge->bi = bi; 3221 merge->bj = bj; 3222 merge->buf_ri = buf_ri; 3223 merge->buf_rj = buf_rj; 3224 merge->coi = PETSC_NULL; 3225 merge->coj = PETSC_NULL; 3226 merge->owners_co = PETSC_NULL; 3227 3228 /* attach the supporting struct to B_mpi for reuse */ 3229 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3230 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3231 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3232 *mpimat = B_mpi; 3233 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3234 PetscFunctionReturn(0); 3235 } 3236 3237 static PetscEvent logkey_seqstompi = 0; 3238 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3239 { 3240 PetscErrorCode ierr; 3241 3242 PetscFunctionBegin; 3243 if (!logkey_seqstompi) { 3244 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3245 } 3246 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3247 if (scall == MAT_INITIAL_MATRIX){ 3248 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3249 } 3250 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3251 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3252 PetscFunctionReturn(0); 3253 } 3254 static PetscEvent logkey_getlocalmat = 0; 3255 #undef __FUNCT__ 3256 #define __FUNCT__ "MatGetLocalMat" 3257 /*@C 3258 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3259 3260 Not Collective 3261 3262 Input Parameters: 3263 + A - the matrix 3264 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3265 3266 Output Parameter: 3267 . A_loc - the local sequential matrix generated 3268 3269 Level: developer 3270 3271 @*/ 3272 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3273 { 3274 PetscErrorCode ierr; 3275 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3276 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3277 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3278 PetscScalar *aa=a->a,*ba=b->a,*ca; 3279 PetscInt am=A->m,i,j,k,cstart=mpimat->cstart; 3280 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3281 3282 PetscFunctionBegin; 3283 if (!logkey_getlocalmat) { 3284 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3285 } 3286 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3287 if (scall == MAT_INITIAL_MATRIX){ 3288 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3289 ci[0] = 0; 3290 for (i=0; i<am; i++){ 3291 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3292 } 3293 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3294 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3295 k = 0; 3296 for (i=0; i<am; i++) { 3297 ncols_o = bi[i+1] - bi[i]; 3298 ncols_d = ai[i+1] - ai[i]; 3299 /* off-diagonal portion of A */ 3300 for (jo=0; jo<ncols_o; jo++) { 3301 col = cmap[*bj]; 3302 if (col >= cstart) break; 3303 cj[k] = col; bj++; 3304 ca[k++] = *ba++; 3305 } 3306 /* diagonal portion of A */ 3307 for (j=0; j<ncols_d; j++) { 3308 cj[k] = cstart + *aj++; 3309 ca[k++] = *aa++; 3310 } 3311 /* off-diagonal portion of A */ 3312 for (j=jo; j<ncols_o; j++) { 3313 cj[k] = cmap[*bj++]; 3314 ca[k++] = *ba++; 3315 } 3316 } 3317 /* put together the new matrix */ 3318 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3319 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3320 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3321 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3322 mat->freedata = PETSC_TRUE; 3323 mat->nonew = 0; 3324 } else if (scall == MAT_REUSE_MATRIX){ 3325 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3326 ci = mat->i; cj = mat->j; ca = mat->a; 3327 for (i=0; i<am; i++) { 3328 /* off-diagonal portion of A */ 3329 ncols_o = bi[i+1] - bi[i]; 3330 for (jo=0; jo<ncols_o; jo++) { 3331 col = cmap[*bj]; 3332 if (col >= cstart) break; 3333 *ca++ = *ba++; bj++; 3334 } 3335 /* diagonal portion of A */ 3336 ncols_d = ai[i+1] - ai[i]; 3337 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3338 /* off-diagonal portion of A */ 3339 for (j=jo; j<ncols_o; j++) { 3340 *ca++ = *ba++; bj++; 3341 } 3342 } 3343 } else { 3344 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3345 } 3346 3347 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3348 PetscFunctionReturn(0); 3349 } 3350 3351 static PetscEvent logkey_getlocalmatcondensed = 0; 3352 #undef __FUNCT__ 3353 #define __FUNCT__ "MatGetLocalMatCondensed" 3354 /*@C 3355 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3356 3357 Not Collective 3358 3359 Input Parameters: 3360 + A - the matrix 3361 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3362 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3363 3364 Output Parameter: 3365 . A_loc - the local sequential matrix generated 3366 3367 Level: developer 3368 3369 @*/ 3370 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3371 { 3372 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3373 PetscErrorCode ierr; 3374 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3375 IS isrowa,iscola; 3376 Mat *aloc; 3377 3378 PetscFunctionBegin; 3379 if (!logkey_getlocalmatcondensed) { 3380 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3381 } 3382 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3383 if (!row){ 3384 start = a->rstart; end = a->rend; 3385 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3386 } else { 3387 isrowa = *row; 3388 } 3389 if (!col){ 3390 start = a->cstart; 3391 cmap = a->garray; 3392 nzA = a->A->n; 3393 nzB = a->B->n; 3394 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3395 ncols = 0; 3396 for (i=0; i<nzB; i++) { 3397 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3398 else break; 3399 } 3400 imark = i; 3401 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3402 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3403 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3404 ierr = PetscFree(idx);CHKERRQ(ierr); 3405 } else { 3406 iscola = *col; 3407 } 3408 if (scall != MAT_INITIAL_MATRIX){ 3409 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3410 aloc[0] = *A_loc; 3411 } 3412 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3413 *A_loc = aloc[0]; 3414 ierr = PetscFree(aloc);CHKERRQ(ierr); 3415 if (!row){ 3416 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3417 } 3418 if (!col){ 3419 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3420 } 3421 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3422 PetscFunctionReturn(0); 3423 } 3424 3425 static PetscEvent logkey_GetBrowsOfAcols = 0; 3426 #undef __FUNCT__ 3427 #define __FUNCT__ "MatGetBrowsOfAcols" 3428 /*@C 3429 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3430 3431 Collective on Mat 3432 3433 Input Parameters: 3434 + A,B - the matrices in mpiaij format 3435 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3436 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3437 3438 Output Parameter: 3439 + rowb, colb - index sets of rows and columns of B to extract 3440 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3441 - B_seq - the sequential matrix generated 3442 3443 Level: developer 3444 3445 @*/ 3446 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3447 { 3448 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3449 PetscErrorCode ierr; 3450 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3451 IS isrowb,iscolb; 3452 Mat *bseq; 3453 3454 PetscFunctionBegin; 3455 if (a->cstart != b->rstart || a->cend != b->rend){ 3456 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3457 } 3458 if (!logkey_GetBrowsOfAcols) { 3459 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3460 } 3461 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3462 3463 if (scall == MAT_INITIAL_MATRIX){ 3464 start = a->cstart; 3465 cmap = a->garray; 3466 nzA = a->A->n; 3467 nzB = a->B->n; 3468 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3469 ncols = 0; 3470 for (i=0; i<nzB; i++) { /* row < local row index */ 3471 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3472 else break; 3473 } 3474 imark = i; 3475 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3476 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3477 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3478 ierr = PetscFree(idx);CHKERRQ(ierr); 3479 *brstart = imark; 3480 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3481 } else { 3482 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3483 isrowb = *rowb; iscolb = *colb; 3484 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3485 bseq[0] = *B_seq; 3486 } 3487 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3488 *B_seq = bseq[0]; 3489 ierr = PetscFree(bseq);CHKERRQ(ierr); 3490 if (!rowb){ 3491 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3492 } else { 3493 *rowb = isrowb; 3494 } 3495 if (!colb){ 3496 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3497 } else { 3498 *colb = iscolb; 3499 } 3500 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3501 PetscFunctionReturn(0); 3502 } 3503 3504 static PetscEvent logkey_GetBrowsOfAocols = 0; 3505 #undef __FUNCT__ 3506 #define __FUNCT__ "MatGetBrowsOfAoCols" 3507 /*@C 3508 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3509 of the OFF-DIAGONAL portion of local A 3510 3511 Collective on Mat 3512 3513 Input Parameters: 3514 + A,B - the matrices in mpiaij format 3515 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3516 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3517 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3518 3519 Output Parameter: 3520 + B_oth - the sequential matrix generated 3521 3522 Level: developer 3523 3524 @*/ 3525 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3526 { 3527 VecScatter_MPI_General *gen_to,*gen_from; 3528 PetscErrorCode ierr; 3529 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3530 Mat_SeqAIJ *b_oth; 3531 VecScatter ctx=a->Mvctx; 3532 MPI_Comm comm=ctx->comm; 3533 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3534 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj; 3535 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3536 PetscInt i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3537 MPI_Request *rwaits,*swaits; 3538 MPI_Status *sstatus,rstatus; 3539 PetscInt *cols; 3540 PetscScalar *vals; 3541 PetscMPIInt j; 3542 3543 PetscFunctionBegin; 3544 if (a->cstart != b->rstart || a->cend != b->rend){ 3545 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3546 } 3547 if (!logkey_GetBrowsOfAocols) { 3548 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3549 } 3550 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3551 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3552 3553 gen_to = (VecScatter_MPI_General*)ctx->todata; 3554 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3555 rvalues = gen_from->values; /* holds the length of sending row */ 3556 svalues = gen_to->values; /* holds the length of receiving row */ 3557 nrecvs = gen_from->n; 3558 nsends = gen_to->n; 3559 rwaits = gen_from->requests; 3560 swaits = gen_to->requests; 3561 srow = gen_to->indices; /* local row index to be sent */ 3562 rstarts = gen_from->starts; 3563 sstarts = gen_to->starts; 3564 rprocs = gen_from->procs; 3565 sprocs = gen_to->procs; 3566 sstatus = gen_to->sstatus; 3567 3568 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3569 if (scall == MAT_INITIAL_MATRIX){ 3570 /* i-array */ 3571 /*---------*/ 3572 /* post receives */ 3573 for (i=0; i<nrecvs; i++){ 3574 rowlen = (PetscInt*)rvalues + rstarts[i]; 3575 nrows = rstarts[i+1]-rstarts[i]; 3576 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3577 } 3578 3579 /* pack the outgoing message */ 3580 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3581 rstartsj = sstartsj + nsends +1; 3582 sstartsj[0] = 0; rstartsj[0] = 0; 3583 len = 0; /* total length of j or a array to be sent */ 3584 k = 0; 3585 for (i=0; i<nsends; i++){ 3586 rowlen = (PetscInt*)svalues + sstarts[i]; 3587 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3588 for (j=0; j<nrows; j++) { 3589 row = srow[k] + b->rowners[rank]; /* global row idx */ 3590 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3591 len += rowlen[j]; 3592 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3593 k++; 3594 } 3595 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3596 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3597 } 3598 /* recvs and sends of i-array are completed */ 3599 i = nrecvs; 3600 while (i--) { 3601 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3602 } 3603 if (nsends) { 3604 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3605 } 3606 /* allocate buffers for sending j and a arrays */ 3607 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3608 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3609 3610 /* create i-array of B_oth */ 3611 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3612 b_othi[0] = 0; 3613 len = 0; /* total length of j or a array to be received */ 3614 k = 0; 3615 for (i=0; i<nrecvs; i++){ 3616 rowlen = (PetscInt*)rvalues + rstarts[i]; 3617 nrows = rstarts[i+1]-rstarts[i]; 3618 for (j=0; j<nrows; j++) { 3619 b_othi[k+1] = b_othi[k] + rowlen[j]; 3620 len += rowlen[j]; k++; 3621 } 3622 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3623 } 3624 3625 /* allocate space for j and a arrrays of B_oth */ 3626 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3627 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3628 3629 /* j-array */ 3630 /*---------*/ 3631 /* post receives of j-array */ 3632 for (i=0; i<nrecvs; i++){ 3633 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3634 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3635 } 3636 k = 0; 3637 for (i=0; i<nsends; i++){ 3638 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3639 bufJ = bufj+sstartsj[i]; 3640 for (j=0; j<nrows; j++) { 3641 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3642 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3643 for (l=0; l<ncols; l++){ 3644 *bufJ++ = cols[l]; 3645 } 3646 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3647 } 3648 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3649 } 3650 3651 /* recvs and sends of j-array are completed */ 3652 i = nrecvs; 3653 while (i--) { 3654 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3655 } 3656 if (nsends) { 3657 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3658 } 3659 } else if (scall == MAT_REUSE_MATRIX){ 3660 sstartsj = *startsj; 3661 rstartsj = sstartsj + nsends +1; 3662 bufa = *bufa_ptr; 3663 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3664 b_otha = b_oth->a; 3665 } else { 3666 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3667 } 3668 3669 /* a-array */ 3670 /*---------*/ 3671 /* post receives of a-array */ 3672 for (i=0; i<nrecvs; i++){ 3673 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3674 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3675 } 3676 k = 0; 3677 for (i=0; i<nsends; i++){ 3678 nrows = sstarts[i+1]-sstarts[i]; 3679 bufA = bufa+sstartsj[i]; 3680 for (j=0; j<nrows; j++) { 3681 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3682 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3683 for (l=0; l<ncols; l++){ 3684 *bufA++ = vals[l]; 3685 } 3686 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3687 3688 } 3689 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3690 } 3691 /* recvs and sends of a-array are completed */ 3692 i = nrecvs; 3693 while (i--) { 3694 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3695 } 3696 if (nsends) { 3697 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3698 } 3699 3700 if (scall == MAT_INITIAL_MATRIX){ 3701 /* put together the new matrix */ 3702 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3703 3704 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3705 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3706 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3707 b_oth->freedata = PETSC_TRUE; 3708 b_oth->nonew = 0; 3709 3710 ierr = PetscFree(bufj);CHKERRQ(ierr); 3711 if (!startsj || !bufa_ptr){ 3712 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3713 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3714 } else { 3715 *startsj = sstartsj; 3716 *bufa_ptr = bufa; 3717 } 3718 } 3719 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3720 3721 PetscFunctionReturn(0); 3722 } 3723 3724 /*MC 3725 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3726 3727 Options Database Keys: 3728 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3729 3730 Level: beginner 3731 3732 .seealso: MatCreateMPIAIJ 3733 M*/ 3734 3735 EXTERN_C_BEGIN 3736 #undef __FUNCT__ 3737 #define __FUNCT__ "MatCreate_MPIAIJ" 3738 PetscErrorCode MatCreate_MPIAIJ(Mat B) 3739 { 3740 Mat_MPIAIJ *b; 3741 PetscErrorCode ierr; 3742 PetscInt i; 3743 PetscMPIInt size; 3744 3745 PetscFunctionBegin; 3746 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3747 3748 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3749 B->data = (void*)b; 3750 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3751 B->factor = 0; 3752 B->bs = 1; 3753 B->assembled = PETSC_FALSE; 3754 B->mapping = 0; 3755 3756 B->insertmode = NOT_SET_VALUES; 3757 b->size = size; 3758 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3759 3760 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 3761 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 3762 3763 /* the information in the maps duplicates the information computed below, eventually 3764 we should remove the duplicate information that is not contained in the maps */ 3765 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 3766 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 3767 3768 /* build local table of row and column ownerships */ 3769 ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 3770 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 3771 b->cowners = b->rowners + b->size + 2; 3772 ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3773 b->rowners[0] = 0; 3774 for (i=2; i<=b->size; i++) { 3775 b->rowners[i] += b->rowners[i-1]; 3776 } 3777 b->rstart = b->rowners[b->rank]; 3778 b->rend = b->rowners[b->rank+1]; 3779 ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3780 b->cowners[0] = 0; 3781 for (i=2; i<=b->size; i++) { 3782 b->cowners[i] += b->cowners[i-1]; 3783 } 3784 b->cstart = b->cowners[b->rank]; 3785 b->cend = b->cowners[b->rank+1]; 3786 3787 /* build cache for off array entries formed */ 3788 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3789 b->donotstash = PETSC_FALSE; 3790 b->colmap = 0; 3791 b->garray = 0; 3792 b->roworiented = PETSC_TRUE; 3793 3794 /* stuff used for matrix vector multiply */ 3795 b->lvec = PETSC_NULL; 3796 b->Mvctx = PETSC_NULL; 3797 3798 /* stuff for MatGetRow() */ 3799 b->rowindices = 0; 3800 b->rowvalues = 0; 3801 b->getrowactive = PETSC_FALSE; 3802 3803 /* Explicitly create 2 MATSEQAIJ matrices. */ 3804 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 3805 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3806 PetscLogObjectParent(B,b->A); 3807 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 3808 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3809 PetscLogObjectParent(B,b->B); 3810 3811 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3812 "MatStoreValues_MPIAIJ", 3813 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3814 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3815 "MatRetrieveValues_MPIAIJ", 3816 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3817 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3818 "MatGetDiagonalBlock_MPIAIJ", 3819 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3820 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3821 "MatIsTranspose_MPIAIJ", 3822 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3823 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3824 "MatMPIAIJSetPreallocation_MPIAIJ", 3825 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3826 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3827 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3828 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3829 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3830 "MatDiagonalScaleLocal_MPIAIJ", 3831 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3832 PetscFunctionReturn(0); 3833 } 3834 EXTERN_C_END 3835