1 2 #include "src/mat/impls/aij/mpi/mpiaij.h" 3 #include "src/inline/spops.h" 4 5 /* 6 Local utility routine that creates a mapping from the global column 7 number to the local number in the off-diagonal part of the local 8 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 9 a slightly higher hash table cost; without it it is not scalable (each processor 10 has an order N integer array but is fast to acess. 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 17 PetscErrorCode ierr; 18 PetscInt n = aij->B->n,i; 19 20 PetscFunctionBegin; 21 #if defined (PETSC_USE_CTABLE) 22 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 23 for (i=0; i<n; i++){ 24 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 25 } 26 #else 27 ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 28 PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt)); 29 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 30 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 31 #endif 32 PetscFunctionReturn(0); 33 } 34 35 #define CHUNKSIZE 15 36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 40 rmax = aimax[row]; nrow = ailen[row]; \ 41 col1 = col - shift; \ 42 \ 43 low = 0; high = nrow; \ 44 while (high-low > 5) { \ 45 t = (low+high)/2; \ 46 if (rp[t] > col) high = t; \ 47 else low = t; \ 48 } \ 49 for (_i=low; _i<high; _i++) { \ 50 if (rp[_i] > col1) break; \ 51 if (rp[_i] == col1) { \ 52 if (addv == ADD_VALUES) ap[_i] += value; \ 53 else ap[_i] = value; \ 54 goto a_noinsert; \ 55 } \ 56 } \ 57 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 58 if (nonew == 1) goto a_noinsert; \ 59 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 60 if (nrow >= rmax) { \ 61 /* there is no extra room in row, therefore enlarge */ \ 62 PetscInt new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 63 PetscScalar *new_a; \ 64 \ 65 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 66 \ 67 /* malloc new storage space */ \ 68 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \ 69 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 70 new_j = (PetscInt*)(new_a + new_nz); \ 71 new_i = new_j + new_nz; \ 72 \ 73 /* copy over old data into new slots */ \ 74 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 75 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 76 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 77 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 78 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 79 len*sizeof(PetscInt));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 81 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 82 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 83 /* free up old matrix storage */ \ 84 \ 85 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 86 if (!a->singlemalloc) { \ 87 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 88 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 89 } \ 90 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 91 a->singlemalloc = PETSC_TRUE; \ 92 \ 93 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 94 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 95 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \ 96 a->maxnz += CHUNKSIZE; \ 97 a->reallocs++; \ 98 } \ 99 N = nrow++ - 1; a->nz++; \ 100 /* shift up all the later entries in this row */ \ 101 for (ii=N; ii>=_i; ii--) { \ 102 rp[ii+1] = rp[ii]; \ 103 ap[ii+1] = ap[ii]; \ 104 } \ 105 rp[_i] = col1; \ 106 ap[_i] = value; \ 107 a_noinsert: ; \ 108 ailen[row] = nrow; \ 109 } 110 111 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 112 { \ 113 \ 114 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 115 rmax = bimax[row]; nrow = bilen[row]; \ 116 col1 = col - shift; \ 117 \ 118 low = 0; high = nrow; \ 119 while (high-low > 5) { \ 120 t = (low+high)/2; \ 121 if (rp[t] > col) high = t; \ 122 else low = t; \ 123 } \ 124 for (_i=low; _i<high; _i++) { \ 125 if (rp[_i] > col1) break; \ 126 if (rp[_i] == col1) { \ 127 if (addv == ADD_VALUES) ap[_i] += value; \ 128 else ap[_i] = value; \ 129 goto b_noinsert; \ 130 } \ 131 } \ 132 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 133 if (nonew == 1) goto b_noinsert; \ 134 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 135 if (nrow >= rmax) { \ 136 /* there is no extra room in row, therefore enlarge */ \ 137 PetscInt new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 138 PetscScalar *new_a; \ 139 \ 140 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 141 \ 142 /* malloc new storage space */ \ 143 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \ 144 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 145 new_j = (PetscInt*)(new_a + new_nz); \ 146 new_i = new_j + new_nz; \ 147 \ 148 /* copy over old data into new slots */ \ 149 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 150 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 151 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 152 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 153 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 154 len*sizeof(PetscInt));CHKERRQ(ierr); \ 155 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 156 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 157 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 158 /* free up old matrix storage */ \ 159 \ 160 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 161 if (!b->singlemalloc) { \ 162 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 163 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 164 } \ 165 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 166 b->singlemalloc = PETSC_TRUE; \ 167 \ 168 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 169 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 170 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \ 171 b->maxnz += CHUNKSIZE; \ 172 b->reallocs++; \ 173 } \ 174 N = nrow++ - 1; b->nz++; \ 175 /* shift up all the later entries in this row */ \ 176 for (ii=N; ii>=_i; ii--) { \ 177 rp[ii+1] = rp[ii]; \ 178 ap[ii+1] = ap[ii]; \ 179 } \ 180 rp[_i] = col1; \ 181 ap[_i] = value; \ 182 b_noinsert: ; \ 183 bilen[row] = nrow; \ 184 } 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "MatSetValues_MPIAIJ" 188 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 189 { 190 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 191 PetscScalar value; 192 PetscErrorCode ierr; 193 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 194 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 195 PetscTruth roworiented = aij->roworiented; 196 197 /* Some Variables required in the macro */ 198 Mat A = aij->A; 199 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 200 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 201 PetscScalar *aa = a->a; 202 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 203 Mat B = aij->B; 204 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 205 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 206 PetscScalar *ba = b->a; 207 208 PetscInt *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 209 PetscInt nonew = a->nonew,shift=0; 210 PetscScalar *ap; 211 212 PetscFunctionBegin; 213 for (i=0; i<m; i++) { 214 if (im[i] < 0) continue; 215 #if defined(PETSC_USE_DEBUG) 216 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 217 #endif 218 if (im[i] >= rstart && im[i] < rend) { 219 row = im[i] - rstart; 220 for (j=0; j<n; j++) { 221 if (in[j] >= cstart && in[j] < cend){ 222 col = in[j] - cstart; 223 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 224 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 225 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 226 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 227 } else if (in[j] < 0) continue; 228 #if defined(PETSC_USE_DEBUG) 229 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 230 #endif 231 else { 232 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 233 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 234 if (mat->was_assembled) { 235 if (!aij->colmap) { 236 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 237 } 238 #if defined (PETSC_USE_CTABLE) 239 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 240 col--; 241 #else 242 col = aij->colmap[in[j]] - 1; 243 #endif 244 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 245 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 246 col = in[j]; 247 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 248 B = aij->B; 249 b = (Mat_SeqAIJ*)B->data; 250 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 251 ba = b->a; 252 } 253 } else col = in[j]; 254 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 255 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 256 } 257 } 258 } else { 259 if (!aij->donotstash) { 260 if (roworiented) { 261 if (ignorezeroentries && v[i*n] == 0.0) continue; 262 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 263 } else { 264 if (ignorezeroentries && v[i] == 0.0) continue; 265 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 266 } 267 } 268 } 269 } 270 PetscFunctionReturn(0); 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatGetValues_MPIAIJ" 275 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 276 { 277 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 278 PetscErrorCode ierr; 279 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 280 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 281 282 PetscFunctionBegin; 283 for (i=0; i<m; i++) { 284 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 285 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 286 if (idxm[i] >= rstart && idxm[i] < rend) { 287 row = idxm[i] - rstart; 288 for (j=0; j<n; j++) { 289 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 290 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 291 if (idxn[j] >= cstart && idxn[j] < cend){ 292 col = idxn[j] - cstart; 293 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 294 } else { 295 if (!aij->colmap) { 296 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 297 } 298 #if defined (PETSC_USE_CTABLE) 299 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 300 col --; 301 #else 302 col = aij->colmap[idxn[j]] - 1; 303 #endif 304 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 305 else { 306 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 307 } 308 } 309 } 310 } else { 311 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 312 } 313 } 314 PetscFunctionReturn(0); 315 } 316 317 #undef __FUNCT__ 318 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 319 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 320 { 321 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 322 PetscErrorCode ierr; 323 PetscInt nstash,reallocs; 324 InsertMode addv; 325 326 PetscFunctionBegin; 327 if (aij->donotstash) { 328 PetscFunctionReturn(0); 329 } 330 331 /* make sure all processors are either in INSERTMODE or ADDMODE */ 332 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 333 if (addv == (ADD_VALUES|INSERT_VALUES)) { 334 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 335 } 336 mat->insertmode = addv; /* in case this processor had no cache */ 337 338 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 339 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 340 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs); 341 PetscFunctionReturn(0); 342 } 343 344 #undef __FUNCT__ 345 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 346 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 347 { 348 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 349 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 350 PetscErrorCode ierr; 351 PetscMPIInt n; 352 PetscInt i,j,rstart,ncols,flg; 353 PetscInt *row,*col,other_disassembled; 354 PetscScalar *val; 355 InsertMode addv = mat->insertmode; 356 357 PetscFunctionBegin; 358 if (!aij->donotstash) { 359 while (1) { 360 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 361 if (!flg) break; 362 363 for (i=0; i<n;) { 364 /* Now identify the consecutive vals belonging to the same row */ 365 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 366 if (j < n) ncols = j-i; 367 else ncols = n-i; 368 /* Now assemble all these values with a single function call */ 369 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 370 i = j; 371 } 372 } 373 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 374 } 375 a->compressedrow.use = PETSC_FALSE; 376 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 377 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 378 379 /* determine if any processor has disassembled, if so we must 380 also disassemble ourselfs, in order that we may reassemble. */ 381 /* 382 if nonzero structure of submatrix B cannot change then we know that 383 no processor disassembled thus we can skip this stuff 384 */ 385 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 386 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 387 if (mat->was_assembled && !other_disassembled) { 388 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 389 /* reaccess the b because aij->B was changed */ 390 b = (Mat_SeqAIJ *)aij->B->data; 391 } 392 } 393 394 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 395 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 396 } 397 398 b->inode.use = PETSC_FALSE; 399 b->compressedrow.use = PETSC_TRUE; 400 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 401 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 402 403 if (aij->rowvalues) { 404 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 405 aij->rowvalues = 0; 406 } 407 408 /* used by MatAXPY() */ 409 a->xtoy = 0; b->xtoy = 0; 410 a->XtoY = 0; b->XtoY = 0; 411 412 PetscFunctionReturn(0); 413 } 414 415 #undef __FUNCT__ 416 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 417 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 418 { 419 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 420 PetscErrorCode ierr; 421 422 PetscFunctionBegin; 423 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 424 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatZeroRows_MPIAIJ" 430 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 431 { 432 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 433 PetscErrorCode ierr; 434 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag; 435 PetscInt i,N,*rows,*owners = l->rowners; 436 PetscInt *nprocs,j,idx,nsends,row; 437 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 438 PetscInt *rvalues,count,base,slen,*source; 439 PetscInt *lens,*lrows,*values,rstart=l->rstart; 440 MPI_Comm comm = A->comm; 441 MPI_Request *send_waits,*recv_waits; 442 MPI_Status recv_status,*send_status; 443 IS istmp; 444 PetscTruth found; 445 446 PetscFunctionBegin; 447 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 448 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 449 450 /* first count number of contributors to each processor */ 451 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 452 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 453 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 454 for (i=0; i<N; i++) { 455 idx = rows[i]; 456 found = PETSC_FALSE; 457 for (j=0; j<size; j++) { 458 if (idx >= owners[j] && idx < owners[j+1]) { 459 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 460 } 461 } 462 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 463 } 464 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 465 466 /* inform other processors of number of messages and max length*/ 467 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 468 469 /* post receives: */ 470 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 471 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 472 for (i=0; i<nrecvs; i++) { 473 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 474 } 475 476 /* do sends: 477 1) starts[i] gives the starting index in svalues for stuff going to 478 the ith processor 479 */ 480 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 481 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 482 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 483 starts[0] = 0; 484 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 485 for (i=0; i<N; i++) { 486 svalues[starts[owner[i]]++] = rows[i]; 487 } 488 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 489 490 starts[0] = 0; 491 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 492 count = 0; 493 for (i=0; i<size; i++) { 494 if (nprocs[2*i+1]) { 495 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 496 } 497 } 498 ierr = PetscFree(starts);CHKERRQ(ierr); 499 500 base = owners[rank]; 501 502 /* wait on receives */ 503 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 504 source = lens + nrecvs; 505 count = nrecvs; slen = 0; 506 while (count) { 507 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 508 /* unpack receives into our local space */ 509 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 510 source[imdex] = recv_status.MPI_SOURCE; 511 lens[imdex] = n; 512 slen += n; 513 count--; 514 } 515 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 516 517 /* move the data into the send scatter */ 518 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 519 count = 0; 520 for (i=0; i<nrecvs; i++) { 521 values = rvalues + i*nmax; 522 for (j=0; j<lens[i]; j++) { 523 lrows[count++] = values[j] - base; 524 } 525 } 526 ierr = PetscFree(rvalues);CHKERRQ(ierr); 527 ierr = PetscFree(lens);CHKERRQ(ierr); 528 ierr = PetscFree(owner);CHKERRQ(ierr); 529 ierr = PetscFree(nprocs);CHKERRQ(ierr); 530 531 /* actually zap the local rows */ 532 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 533 PetscLogObjectParent(A,istmp); 534 535 /* 536 Zero the required rows. If the "diagonal block" of the matrix 537 is square and the user wishes to set the diagonal we use seperate 538 code so that MatSetValues() is not called for each diagonal allocating 539 new memory, thus calling lots of mallocs and slowing things down. 540 541 Contributed by: Mathew Knepley 542 */ 543 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 544 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 545 if (diag && (l->A->M == l->A->N)) { 546 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 547 } else if (diag) { 548 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 549 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 550 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 551 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 552 } 553 for (i = 0; i < slen; i++) { 554 row = lrows[i] + rstart; 555 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 556 } 557 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 558 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 559 } else { 560 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 561 } 562 ierr = ISDestroy(istmp);CHKERRQ(ierr); 563 ierr = PetscFree(lrows);CHKERRQ(ierr); 564 565 /* wait on sends */ 566 if (nsends) { 567 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 568 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 569 ierr = PetscFree(send_status);CHKERRQ(ierr); 570 } 571 ierr = PetscFree(send_waits);CHKERRQ(ierr); 572 ierr = PetscFree(svalues);CHKERRQ(ierr); 573 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatMult_MPIAIJ" 579 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 580 { 581 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 582 PetscErrorCode ierr; 583 PetscInt nt; 584 585 PetscFunctionBegin; 586 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 587 if (nt != A->n) { 588 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt); 589 } 590 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 591 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 592 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 593 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 594 PetscFunctionReturn(0); 595 } 596 597 #undef __FUNCT__ 598 #define __FUNCT__ "MatMultAdd_MPIAIJ" 599 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 600 { 601 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 602 PetscErrorCode ierr; 603 604 PetscFunctionBegin; 605 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 606 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 607 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 608 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 609 PetscFunctionReturn(0); 610 } 611 612 #undef __FUNCT__ 613 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 614 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 615 { 616 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 617 PetscErrorCode ierr; 618 PetscTruth merged; 619 620 PetscFunctionBegin; 621 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 622 /* do nondiagonal part */ 623 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 624 if (!merged) { 625 /* send it on its way */ 626 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 627 /* do local part */ 628 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 629 /* receive remote parts: note this assumes the values are not actually */ 630 /* added in yy until the next line, */ 631 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 632 } else { 633 /* do local part */ 634 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 635 /* send it on its way */ 636 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 637 /* values actually were received in the Begin() but we need to call this nop */ 638 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 639 } 640 PetscFunctionReturn(0); 641 } 642 643 EXTERN_C_BEGIN 644 #undef __FUNCT__ 645 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 646 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 647 { 648 MPI_Comm comm; 649 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 650 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 651 IS Me,Notme; 652 PetscErrorCode ierr; 653 PetscInt M,N,first,last,*notme,i; 654 PetscMPIInt size; 655 656 PetscFunctionBegin; 657 658 /* Easy test: symmetric diagonal block */ 659 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 660 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 661 if (!*f) PetscFunctionReturn(0); 662 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 663 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 664 if (size == 1) PetscFunctionReturn(0); 665 666 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 667 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 668 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 669 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 670 for (i=0; i<first; i++) notme[i] = i; 671 for (i=last; i<M; i++) notme[i-last+first] = i; 672 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 673 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 674 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 675 Aoff = Aoffs[0]; 676 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 677 Boff = Boffs[0]; 678 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 679 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 680 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 681 ierr = ISDestroy(Me);CHKERRQ(ierr); 682 ierr = ISDestroy(Notme);CHKERRQ(ierr); 683 684 PetscFunctionReturn(0); 685 } 686 EXTERN_C_END 687 688 #undef __FUNCT__ 689 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 690 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 691 { 692 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 693 PetscErrorCode ierr; 694 695 PetscFunctionBegin; 696 /* do nondiagonal part */ 697 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 698 /* send it on its way */ 699 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 700 /* do local part */ 701 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 702 /* receive remote parts */ 703 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 704 PetscFunctionReturn(0); 705 } 706 707 /* 708 This only works correctly for square matrices where the subblock A->A is the 709 diagonal block 710 */ 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 713 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 714 { 715 PetscErrorCode ierr; 716 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 717 718 PetscFunctionBegin; 719 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 720 if (a->rstart != a->cstart || a->rend != a->cend) { 721 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 722 } 723 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 724 PetscFunctionReturn(0); 725 } 726 727 #undef __FUNCT__ 728 #define __FUNCT__ "MatScale_MPIAIJ" 729 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 730 { 731 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 732 PetscErrorCode ierr; 733 734 PetscFunctionBegin; 735 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 736 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatDestroy_MPIAIJ" 742 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 743 { 744 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 745 PetscErrorCode ierr; 746 747 PetscFunctionBegin; 748 #if defined(PETSC_USE_LOG) 749 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 750 #endif 751 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 752 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 753 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 754 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 755 #if defined (PETSC_USE_CTABLE) 756 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 757 #else 758 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 759 #endif 760 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 761 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 762 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 763 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 764 ierr = PetscFree(aij);CHKERRQ(ierr); 765 766 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 767 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 768 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 769 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 770 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 771 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 772 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 773 PetscFunctionReturn(0); 774 } 775 776 #undef __FUNCT__ 777 #define __FUNCT__ "MatView_MPIAIJ_Binary" 778 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 779 { 780 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 781 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 782 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 783 PetscErrorCode ierr; 784 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 785 int fd; 786 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 787 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 788 PetscScalar *column_values; 789 790 PetscFunctionBegin; 791 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 792 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 793 nz = A->nz + B->nz; 794 if (!rank) { 795 header[0] = MAT_FILE_COOKIE; 796 header[1] = mat->M; 797 header[2] = mat->N; 798 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 799 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 800 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 801 /* get largest number of rows any processor has */ 802 rlen = mat->m; 803 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 804 for (i=1; i<size; i++) { 805 rlen = PetscMax(rlen,range[i+1] - range[i]); 806 } 807 } else { 808 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 809 rlen = mat->m; 810 } 811 812 /* load up the local row counts */ 813 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 814 for (i=0; i<mat->m; i++) { 815 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 816 } 817 818 /* store the row lengths to the file */ 819 if (!rank) { 820 MPI_Status status; 821 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 822 for (i=1; i<size; i++) { 823 rlen = range[i+1] - range[i]; 824 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 825 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 826 } 827 } else { 828 ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 829 } 830 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 831 832 /* load up the local column indices */ 833 nzmax = nz; /* )th processor needs space a largest processor needs */ 834 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 835 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 836 cnt = 0; 837 for (i=0; i<mat->m; i++) { 838 for (j=B->i[i]; j<B->i[i+1]; j++) { 839 if ( (col = garray[B->j[j]]) > cstart) break; 840 column_indices[cnt++] = col; 841 } 842 for (k=A->i[i]; k<A->i[i+1]; k++) { 843 column_indices[cnt++] = A->j[k] + cstart; 844 } 845 for (; j<B->i[i+1]; j++) { 846 column_indices[cnt++] = garray[B->j[j]]; 847 } 848 } 849 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 850 851 /* store the column indices to the file */ 852 if (!rank) { 853 MPI_Status status; 854 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 855 for (i=1; i<size; i++) { 856 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 857 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 858 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 859 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 860 } 861 } else { 862 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 863 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 864 } 865 ierr = PetscFree(column_indices);CHKERRQ(ierr); 866 867 /* load up the local column values */ 868 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 869 cnt = 0; 870 for (i=0; i<mat->m; i++) { 871 for (j=B->i[i]; j<B->i[i+1]; j++) { 872 if ( garray[B->j[j]] > cstart) break; 873 column_values[cnt++] = B->a[j]; 874 } 875 for (k=A->i[i]; k<A->i[i+1]; k++) { 876 column_values[cnt++] = A->a[k]; 877 } 878 for (; j<B->i[i+1]; j++) { 879 column_values[cnt++] = B->a[j]; 880 } 881 } 882 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 883 884 /* store the column values to the file */ 885 if (!rank) { 886 MPI_Status status; 887 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 888 for (i=1; i<size; i++) { 889 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 890 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 891 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 892 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 893 } 894 } else { 895 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 896 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 897 } 898 ierr = PetscFree(column_values);CHKERRQ(ierr); 899 PetscFunctionReturn(0); 900 } 901 902 #undef __FUNCT__ 903 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 904 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 905 { 906 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 907 PetscErrorCode ierr; 908 PetscMPIInt rank = aij->rank,size = aij->size; 909 PetscTruth isdraw,iascii,isbinary; 910 PetscViewer sviewer; 911 PetscViewerFormat format; 912 913 PetscFunctionBegin; 914 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 915 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 916 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 917 if (iascii) { 918 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 919 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 920 MatInfo info; 921 Mat_SeqAIJ *a = (Mat_SeqAIJ*) aij->A->data; 922 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 923 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 924 if (!a->inode.size) { 925 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 926 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 927 } else { 928 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 929 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 930 } 931 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 932 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 933 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 934 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 935 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 936 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 937 PetscFunctionReturn(0); 938 } else if (format == PETSC_VIEWER_ASCII_INFO) { 939 Mat_SeqAIJ *a = (Mat_SeqAIJ*) aij->A->data; 940 if (a->inode.size) { 941 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",a->inode.node_count,a->inode.limit);CHKERRQ(ierr); 942 } else { 943 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 944 } 945 PetscFunctionReturn(0); 946 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 947 PetscFunctionReturn(0); 948 } 949 } else if (isbinary) { 950 if (size == 1) { 951 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 952 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 953 } else { 954 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 955 } 956 PetscFunctionReturn(0); 957 } else if (isdraw) { 958 PetscDraw draw; 959 PetscTruth isnull; 960 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 961 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 962 } 963 964 if (size == 1) { 965 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 966 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 967 } else { 968 /* assemble the entire matrix onto first processor. */ 969 Mat A; 970 Mat_SeqAIJ *Aloc; 971 PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 972 PetscScalar *a; 973 974 if (!rank) { 975 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 976 } else { 977 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 978 } 979 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 980 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 981 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 982 PetscLogObjectParent(mat,A); 983 984 /* copy over the A part */ 985 Aloc = (Mat_SeqAIJ*)aij->A->data; 986 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 987 row = aij->rstart; 988 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 989 for (i=0; i<m; i++) { 990 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 991 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 992 } 993 aj = Aloc->j; 994 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 995 996 /* copy over the B part */ 997 Aloc = (Mat_SeqAIJ*)aij->B->data; 998 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 999 row = aij->rstart; 1000 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1001 ct = cols; 1002 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1003 for (i=0; i<m; i++) { 1004 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1005 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1006 } 1007 ierr = PetscFree(ct);CHKERRQ(ierr); 1008 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1009 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1010 /* 1011 Everyone has to call to draw the matrix since the graphics waits are 1012 synchronized across all processors that share the PetscDraw object 1013 */ 1014 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1015 if (!rank) { 1016 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1017 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1018 } 1019 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1020 ierr = MatDestroy(A);CHKERRQ(ierr); 1021 } 1022 PetscFunctionReturn(0); 1023 } 1024 1025 #undef __FUNCT__ 1026 #define __FUNCT__ "MatView_MPIAIJ" 1027 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1028 { 1029 PetscErrorCode ierr; 1030 PetscTruth iascii,isdraw,issocket,isbinary; 1031 1032 PetscFunctionBegin; 1033 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1034 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1035 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1036 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1037 if (iascii || isdraw || isbinary || issocket) { 1038 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1039 } else { 1040 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1041 } 1042 PetscFunctionReturn(0); 1043 } 1044 1045 1046 1047 #undef __FUNCT__ 1048 #define __FUNCT__ "MatRelax_MPIAIJ" 1049 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1050 { 1051 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1052 PetscErrorCode ierr; 1053 Vec bb1; 1054 PetscScalar mone=-1.0; 1055 1056 PetscFunctionBegin; 1057 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1058 1059 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1060 1061 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1062 if (flag & SOR_ZERO_INITIAL_GUESS) { 1063 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1064 its--; 1065 } 1066 1067 while (its--) { 1068 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1069 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1070 1071 /* update rhs: bb1 = bb - B*x */ 1072 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1073 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1074 1075 /* local sweep */ 1076 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1077 CHKERRQ(ierr); 1078 } 1079 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1080 if (flag & SOR_ZERO_INITIAL_GUESS) { 1081 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1082 its--; 1083 } 1084 while (its--) { 1085 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1086 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1087 1088 /* update rhs: bb1 = bb - B*x */ 1089 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1090 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1091 1092 /* local sweep */ 1093 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1094 CHKERRQ(ierr); 1095 } 1096 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1097 if (flag & SOR_ZERO_INITIAL_GUESS) { 1098 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1099 its--; 1100 } 1101 while (its--) { 1102 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1103 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1104 1105 /* update rhs: bb1 = bb - B*x */ 1106 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1107 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1108 1109 /* local sweep */ 1110 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1111 CHKERRQ(ierr); 1112 } 1113 } else { 1114 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1115 } 1116 1117 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1118 PetscFunctionReturn(0); 1119 } 1120 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1123 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1124 { 1125 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1126 Mat A = mat->A,B = mat->B; 1127 PetscErrorCode ierr; 1128 PetscReal isend[5],irecv[5]; 1129 1130 PetscFunctionBegin; 1131 info->block_size = 1.0; 1132 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1133 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1134 isend[3] = info->memory; isend[4] = info->mallocs; 1135 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1136 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1137 isend[3] += info->memory; isend[4] += info->mallocs; 1138 if (flag == MAT_LOCAL) { 1139 info->nz_used = isend[0]; 1140 info->nz_allocated = isend[1]; 1141 info->nz_unneeded = isend[2]; 1142 info->memory = isend[3]; 1143 info->mallocs = isend[4]; 1144 } else if (flag == MAT_GLOBAL_MAX) { 1145 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1146 info->nz_used = irecv[0]; 1147 info->nz_allocated = irecv[1]; 1148 info->nz_unneeded = irecv[2]; 1149 info->memory = irecv[3]; 1150 info->mallocs = irecv[4]; 1151 } else if (flag == MAT_GLOBAL_SUM) { 1152 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1153 info->nz_used = irecv[0]; 1154 info->nz_allocated = irecv[1]; 1155 info->nz_unneeded = irecv[2]; 1156 info->memory = irecv[3]; 1157 info->mallocs = irecv[4]; 1158 } 1159 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1160 info->fill_ratio_needed = 0; 1161 info->factor_mallocs = 0; 1162 info->rows_global = (double)matin->M; 1163 info->columns_global = (double)matin->N; 1164 info->rows_local = (double)matin->m; 1165 info->columns_local = (double)matin->N; 1166 1167 PetscFunctionReturn(0); 1168 } 1169 1170 #undef __FUNCT__ 1171 #define __FUNCT__ "MatSetOption_MPIAIJ" 1172 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1173 { 1174 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1175 PetscErrorCode ierr; 1176 1177 PetscFunctionBegin; 1178 switch (op) { 1179 case MAT_NO_NEW_NONZERO_LOCATIONS: 1180 case MAT_YES_NEW_NONZERO_LOCATIONS: 1181 case MAT_COLUMNS_UNSORTED: 1182 case MAT_COLUMNS_SORTED: 1183 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1184 case MAT_KEEP_ZEROED_ROWS: 1185 case MAT_NEW_NONZERO_LOCATION_ERR: 1186 case MAT_USE_INODES: 1187 case MAT_DO_NOT_USE_INODES: 1188 case MAT_IGNORE_ZERO_ENTRIES: 1189 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1190 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1191 break; 1192 case MAT_ROW_ORIENTED: 1193 a->roworiented = PETSC_TRUE; 1194 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1195 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1196 break; 1197 case MAT_ROWS_SORTED: 1198 case MAT_ROWS_UNSORTED: 1199 case MAT_YES_NEW_DIAGONALS: 1200 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1201 break; 1202 case MAT_COLUMN_ORIENTED: 1203 a->roworiented = PETSC_FALSE; 1204 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1205 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1206 break; 1207 case MAT_IGNORE_OFF_PROC_ENTRIES: 1208 a->donotstash = PETSC_TRUE; 1209 break; 1210 case MAT_NO_NEW_DIAGONALS: 1211 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1212 case MAT_SYMMETRIC: 1213 case MAT_STRUCTURALLY_SYMMETRIC: 1214 case MAT_HERMITIAN: 1215 case MAT_SYMMETRY_ETERNAL: 1216 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1217 break; 1218 case MAT_NOT_SYMMETRIC: 1219 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1220 case MAT_NOT_HERMITIAN: 1221 case MAT_NOT_SYMMETRY_ETERNAL: 1222 break; 1223 default: 1224 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1225 } 1226 PetscFunctionReturn(0); 1227 } 1228 1229 #undef __FUNCT__ 1230 #define __FUNCT__ "MatGetRow_MPIAIJ" 1231 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1232 { 1233 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1234 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1235 PetscErrorCode ierr; 1236 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1237 PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1238 PetscInt *cmap,*idx_p; 1239 1240 PetscFunctionBegin; 1241 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1242 mat->getrowactive = PETSC_TRUE; 1243 1244 if (!mat->rowvalues && (idx || v)) { 1245 /* 1246 allocate enough space to hold information from the longest row. 1247 */ 1248 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1249 PetscInt max = 1,tmp; 1250 for (i=0; i<matin->m; i++) { 1251 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1252 if (max < tmp) { max = tmp; } 1253 } 1254 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1255 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1256 } 1257 1258 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1259 lrow = row - rstart; 1260 1261 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1262 if (!v) {pvA = 0; pvB = 0;} 1263 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1264 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1265 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1266 nztot = nzA + nzB; 1267 1268 cmap = mat->garray; 1269 if (v || idx) { 1270 if (nztot) { 1271 /* Sort by increasing column numbers, assuming A and B already sorted */ 1272 PetscInt imark = -1; 1273 if (v) { 1274 *v = v_p = mat->rowvalues; 1275 for (i=0; i<nzB; i++) { 1276 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1277 else break; 1278 } 1279 imark = i; 1280 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1281 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1282 } 1283 if (idx) { 1284 *idx = idx_p = mat->rowindices; 1285 if (imark > -1) { 1286 for (i=0; i<imark; i++) { 1287 idx_p[i] = cmap[cworkB[i]]; 1288 } 1289 } else { 1290 for (i=0; i<nzB; i++) { 1291 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1292 else break; 1293 } 1294 imark = i; 1295 } 1296 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1297 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1298 } 1299 } else { 1300 if (idx) *idx = 0; 1301 if (v) *v = 0; 1302 } 1303 } 1304 *nz = nztot; 1305 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1306 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1312 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1313 { 1314 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1315 1316 PetscFunctionBegin; 1317 if (!aij->getrowactive) { 1318 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1319 } 1320 aij->getrowactive = PETSC_FALSE; 1321 PetscFunctionReturn(0); 1322 } 1323 1324 #undef __FUNCT__ 1325 #define __FUNCT__ "MatNorm_MPIAIJ" 1326 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1327 { 1328 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1329 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1330 PetscErrorCode ierr; 1331 PetscInt i,j,cstart = aij->cstart; 1332 PetscReal sum = 0.0; 1333 PetscScalar *v; 1334 1335 PetscFunctionBegin; 1336 if (aij->size == 1) { 1337 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1338 } else { 1339 if (type == NORM_FROBENIUS) { 1340 v = amat->a; 1341 for (i=0; i<amat->nz; i++) { 1342 #if defined(PETSC_USE_COMPLEX) 1343 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1344 #else 1345 sum += (*v)*(*v); v++; 1346 #endif 1347 } 1348 v = bmat->a; 1349 for (i=0; i<bmat->nz; i++) { 1350 #if defined(PETSC_USE_COMPLEX) 1351 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1352 #else 1353 sum += (*v)*(*v); v++; 1354 #endif 1355 } 1356 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1357 *norm = sqrt(*norm); 1358 } else if (type == NORM_1) { /* max column norm */ 1359 PetscReal *tmp,*tmp2; 1360 PetscInt *jj,*garray = aij->garray; 1361 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1362 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1363 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1364 *norm = 0.0; 1365 v = amat->a; jj = amat->j; 1366 for (j=0; j<amat->nz; j++) { 1367 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1368 } 1369 v = bmat->a; jj = bmat->j; 1370 for (j=0; j<bmat->nz; j++) { 1371 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1372 } 1373 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1374 for (j=0; j<mat->N; j++) { 1375 if (tmp2[j] > *norm) *norm = tmp2[j]; 1376 } 1377 ierr = PetscFree(tmp);CHKERRQ(ierr); 1378 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1379 } else if (type == NORM_INFINITY) { /* max row norm */ 1380 PetscReal ntemp = 0.0; 1381 for (j=0; j<aij->A->m; j++) { 1382 v = amat->a + amat->i[j]; 1383 sum = 0.0; 1384 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1385 sum += PetscAbsScalar(*v); v++; 1386 } 1387 v = bmat->a + bmat->i[j]; 1388 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1389 sum += PetscAbsScalar(*v); v++; 1390 } 1391 if (sum > ntemp) ntemp = sum; 1392 } 1393 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1394 } else { 1395 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1396 } 1397 } 1398 PetscFunctionReturn(0); 1399 } 1400 1401 #undef __FUNCT__ 1402 #define __FUNCT__ "MatTranspose_MPIAIJ" 1403 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1404 { 1405 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1406 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1407 PetscErrorCode ierr; 1408 PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1409 Mat B; 1410 PetscScalar *array; 1411 1412 PetscFunctionBegin; 1413 if (!matout && M != N) { 1414 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1415 } 1416 1417 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1418 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1419 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1420 1421 /* copy over the A part */ 1422 Aloc = (Mat_SeqAIJ*)a->A->data; 1423 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1424 row = a->rstart; 1425 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1426 for (i=0; i<m; i++) { 1427 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1428 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1429 } 1430 aj = Aloc->j; 1431 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1432 1433 /* copy over the B part */ 1434 Aloc = (Mat_SeqAIJ*)a->B->data; 1435 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1436 row = a->rstart; 1437 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1438 ct = cols; 1439 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1440 for (i=0; i<m; i++) { 1441 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1442 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1443 } 1444 ierr = PetscFree(ct);CHKERRQ(ierr); 1445 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1446 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1447 if (matout) { 1448 *matout = B; 1449 } else { 1450 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1451 } 1452 PetscFunctionReturn(0); 1453 } 1454 1455 #undef __FUNCT__ 1456 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1457 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1458 { 1459 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1460 Mat a = aij->A,b = aij->B; 1461 PetscErrorCode ierr; 1462 PetscInt s1,s2,s3; 1463 1464 PetscFunctionBegin; 1465 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1466 if (rr) { 1467 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1468 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1469 /* Overlap communication with computation. */ 1470 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1471 } 1472 if (ll) { 1473 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1474 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1475 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1476 } 1477 /* scale the diagonal block */ 1478 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1479 1480 if (rr) { 1481 /* Do a scatter end and then right scale the off-diagonal block */ 1482 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1483 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1484 } 1485 1486 PetscFunctionReturn(0); 1487 } 1488 1489 1490 #undef __FUNCT__ 1491 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1492 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1493 { 1494 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1495 PetscErrorCode ierr; 1496 1497 PetscFunctionBegin; 1498 if (!a->rank) { 1499 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1500 } 1501 PetscFunctionReturn(0); 1502 } 1503 1504 #undef __FUNCT__ 1505 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1506 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1507 { 1508 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1509 PetscErrorCode ierr; 1510 1511 PetscFunctionBegin; 1512 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1513 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1514 PetscFunctionReturn(0); 1515 } 1516 #undef __FUNCT__ 1517 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1518 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1519 { 1520 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1521 PetscErrorCode ierr; 1522 1523 PetscFunctionBegin; 1524 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1525 PetscFunctionReturn(0); 1526 } 1527 1528 #undef __FUNCT__ 1529 #define __FUNCT__ "MatEqual_MPIAIJ" 1530 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1531 { 1532 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1533 Mat a,b,c,d; 1534 PetscTruth flg; 1535 PetscErrorCode ierr; 1536 1537 PetscFunctionBegin; 1538 a = matA->A; b = matA->B; 1539 c = matB->A; d = matB->B; 1540 1541 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1542 if (flg) { 1543 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1544 } 1545 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1546 PetscFunctionReturn(0); 1547 } 1548 1549 #undef __FUNCT__ 1550 #define __FUNCT__ "MatCopy_MPIAIJ" 1551 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1552 { 1553 PetscErrorCode ierr; 1554 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1555 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1556 1557 PetscFunctionBegin; 1558 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1559 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1560 /* because of the column compression in the off-processor part of the matrix a->B, 1561 the number of columns in a->B and b->B may be different, hence we cannot call 1562 the MatCopy() directly on the two parts. If need be, we can provide a more 1563 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1564 then copying the submatrices */ 1565 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1566 } else { 1567 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1568 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1569 } 1570 PetscFunctionReturn(0); 1571 } 1572 1573 #undef __FUNCT__ 1574 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1575 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1576 { 1577 PetscErrorCode ierr; 1578 1579 PetscFunctionBegin; 1580 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1581 PetscFunctionReturn(0); 1582 } 1583 1584 #include "petscblaslapack.h" 1585 #undef __FUNCT__ 1586 #define __FUNCT__ "MatAXPY_MPIAIJ" 1587 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1588 { 1589 PetscErrorCode ierr; 1590 PetscInt i; 1591 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1592 PetscBLASInt bnz,one=1; 1593 Mat_SeqAIJ *x,*y; 1594 1595 PetscFunctionBegin; 1596 if (str == SAME_NONZERO_PATTERN) { 1597 x = (Mat_SeqAIJ *)xx->A->data; 1598 y = (Mat_SeqAIJ *)yy->A->data; 1599 bnz = (PetscBLASInt)x->nz; 1600 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1601 x = (Mat_SeqAIJ *)xx->B->data; 1602 y = (Mat_SeqAIJ *)yy->B->data; 1603 bnz = (PetscBLASInt)x->nz; 1604 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1605 } else if (str == SUBSET_NONZERO_PATTERN) { 1606 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1607 1608 x = (Mat_SeqAIJ *)xx->B->data; 1609 y = (Mat_SeqAIJ *)yy->B->data; 1610 if (y->xtoy && y->XtoY != xx->B) { 1611 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1612 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1613 } 1614 if (!y->xtoy) { /* get xtoy */ 1615 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1616 y->XtoY = xx->B; 1617 } 1618 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1619 } else { 1620 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1621 } 1622 PetscFunctionReturn(0); 1623 } 1624 1625 /* -------------------------------------------------------------------*/ 1626 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1627 MatGetRow_MPIAIJ, 1628 MatRestoreRow_MPIAIJ, 1629 MatMult_MPIAIJ, 1630 /* 4*/ MatMultAdd_MPIAIJ, 1631 MatMultTranspose_MPIAIJ, 1632 MatMultTransposeAdd_MPIAIJ, 1633 0, 1634 0, 1635 0, 1636 /*10*/ 0, 1637 0, 1638 0, 1639 MatRelax_MPIAIJ, 1640 MatTranspose_MPIAIJ, 1641 /*15*/ MatGetInfo_MPIAIJ, 1642 MatEqual_MPIAIJ, 1643 MatGetDiagonal_MPIAIJ, 1644 MatDiagonalScale_MPIAIJ, 1645 MatNorm_MPIAIJ, 1646 /*20*/ MatAssemblyBegin_MPIAIJ, 1647 MatAssemblyEnd_MPIAIJ, 1648 0, 1649 MatSetOption_MPIAIJ, 1650 MatZeroEntries_MPIAIJ, 1651 /*25*/ MatZeroRows_MPIAIJ, 1652 0, 1653 0, 1654 0, 1655 0, 1656 /*30*/ MatSetUpPreallocation_MPIAIJ, 1657 0, 1658 0, 1659 0, 1660 0, 1661 /*35*/ MatDuplicate_MPIAIJ, 1662 0, 1663 0, 1664 0, 1665 0, 1666 /*40*/ MatAXPY_MPIAIJ, 1667 MatGetSubMatrices_MPIAIJ, 1668 MatIncreaseOverlap_MPIAIJ, 1669 MatGetValues_MPIAIJ, 1670 MatCopy_MPIAIJ, 1671 /*45*/ MatPrintHelp_MPIAIJ, 1672 MatScale_MPIAIJ, 1673 0, 1674 0, 1675 0, 1676 /*50*/ MatSetBlockSize_MPIAIJ, 1677 0, 1678 0, 1679 0, 1680 0, 1681 /*55*/ MatFDColoringCreate_MPIAIJ, 1682 0, 1683 MatSetUnfactored_MPIAIJ, 1684 0, 1685 0, 1686 /*60*/ MatGetSubMatrix_MPIAIJ, 1687 MatDestroy_MPIAIJ, 1688 MatView_MPIAIJ, 1689 MatGetPetscMaps_Petsc, 1690 0, 1691 /*65*/ 0, 1692 0, 1693 0, 1694 0, 1695 0, 1696 /*70*/ 0, 1697 0, 1698 MatSetColoring_MPIAIJ, 1699 #if defined(PETSC_HAVE_ADIC) 1700 MatSetValuesAdic_MPIAIJ, 1701 #else 1702 0, 1703 #endif 1704 MatSetValuesAdifor_MPIAIJ, 1705 /*75*/ 0, 1706 0, 1707 0, 1708 0, 1709 0, 1710 /*80*/ 0, 1711 0, 1712 0, 1713 0, 1714 /*84*/ MatLoad_MPIAIJ, 1715 0, 1716 0, 1717 0, 1718 0, 1719 0, 1720 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1721 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1722 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1723 MatPtAP_MPIAIJ_MPIAIJ, 1724 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1725 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 1726 0, 1727 0, 1728 0}; 1729 1730 /* ----------------------------------------------------------------------------------------*/ 1731 1732 EXTERN_C_BEGIN 1733 #undef __FUNCT__ 1734 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1735 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 1736 { 1737 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1738 PetscErrorCode ierr; 1739 1740 PetscFunctionBegin; 1741 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1742 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1743 PetscFunctionReturn(0); 1744 } 1745 EXTERN_C_END 1746 1747 EXTERN_C_BEGIN 1748 #undef __FUNCT__ 1749 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1750 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 1751 { 1752 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1753 PetscErrorCode ierr; 1754 1755 PetscFunctionBegin; 1756 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1757 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1758 PetscFunctionReturn(0); 1759 } 1760 EXTERN_C_END 1761 1762 #include "petscpc.h" 1763 EXTERN_C_BEGIN 1764 #undef __FUNCT__ 1765 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1766 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1767 { 1768 Mat_MPIAIJ *b; 1769 PetscErrorCode ierr; 1770 PetscInt i; 1771 1772 PetscFunctionBegin; 1773 B->preallocated = PETSC_TRUE; 1774 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1775 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1776 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1777 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1778 if (d_nnz) { 1779 for (i=0; i<B->m; i++) { 1780 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1781 } 1782 } 1783 if (o_nnz) { 1784 for (i=0; i<B->m; i++) { 1785 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1786 } 1787 } 1788 b = (Mat_MPIAIJ*)B->data; 1789 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1790 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1791 1792 PetscFunctionReturn(0); 1793 } 1794 EXTERN_C_END 1795 1796 #undef __FUNCT__ 1797 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1798 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1799 { 1800 Mat mat; 1801 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1802 PetscErrorCode ierr; 1803 1804 PetscFunctionBegin; 1805 *newmat = 0; 1806 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1807 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1808 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1809 a = (Mat_MPIAIJ*)mat->data; 1810 1811 mat->factor = matin->factor; 1812 mat->bs = matin->bs; 1813 mat->assembled = PETSC_TRUE; 1814 mat->insertmode = NOT_SET_VALUES; 1815 mat->preallocated = PETSC_TRUE; 1816 1817 a->rstart = oldmat->rstart; 1818 a->rend = oldmat->rend; 1819 a->cstart = oldmat->cstart; 1820 a->cend = oldmat->cend; 1821 a->size = oldmat->size; 1822 a->rank = oldmat->rank; 1823 a->donotstash = oldmat->donotstash; 1824 a->roworiented = oldmat->roworiented; 1825 a->rowindices = 0; 1826 a->rowvalues = 0; 1827 a->getrowactive = PETSC_FALSE; 1828 1829 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1830 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1831 if (oldmat->colmap) { 1832 #if defined (PETSC_USE_CTABLE) 1833 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1834 #else 1835 ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1836 PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt)); 1837 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1838 #endif 1839 } else a->colmap = 0; 1840 if (oldmat->garray) { 1841 PetscInt len; 1842 len = oldmat->B->n; 1843 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1844 PetscLogObjectMemory(mat,len*sizeof(PetscInt)); 1845 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1846 } else a->garray = 0; 1847 1848 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1849 PetscLogObjectParent(mat,a->lvec); 1850 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1851 PetscLogObjectParent(mat,a->Mvctx); 1852 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1853 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1854 PetscLogObjectParent(mat,a->A); 1855 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1856 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1857 PetscLogObjectParent(mat,a->B); 1858 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1859 *newmat = mat; 1860 PetscFunctionReturn(0); 1861 } 1862 1863 #include "petscsys.h" 1864 1865 #undef __FUNCT__ 1866 #define __FUNCT__ "MatLoad_MPIAIJ" 1867 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1868 { 1869 Mat A; 1870 PetscScalar *vals,*svals; 1871 MPI_Comm comm = ((PetscObject)viewer)->comm; 1872 MPI_Status status; 1873 PetscErrorCode ierr; 1874 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1875 PetscInt i,nz,j,rstart,rend; 1876 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1877 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1878 PetscInt cend,cstart,n,*rowners; 1879 int fd; 1880 1881 PetscFunctionBegin; 1882 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1883 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1884 if (!rank) { 1885 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1886 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1887 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1888 } 1889 1890 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1891 M = header[1]; N = header[2]; 1892 /* determine ownership of all rows */ 1893 m = M/size + ((M % size) > rank); 1894 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1895 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1896 rowners[0] = 0; 1897 for (i=2; i<=size; i++) { 1898 rowners[i] += rowners[i-1]; 1899 } 1900 rstart = rowners[rank]; 1901 rend = rowners[rank+1]; 1902 1903 /* distribute row lengths to all processors */ 1904 ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr); 1905 if (!rank) { 1906 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 1907 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1908 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1909 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1910 for (j=0; j<m; j++) { 1911 procsnz[0] += ourlens[j]; 1912 } 1913 for (i=1; i<size; i++) { 1914 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 1915 /* calculate the number of nonzeros on each processor */ 1916 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 1917 procsnz[i] += rowlengths[j]; 1918 } 1919 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1920 } 1921 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1922 } else { 1923 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1924 } 1925 1926 if (!rank) { 1927 /* determine max buffer needed and allocate it */ 1928 maxnz = 0; 1929 for (i=0; i<size; i++) { 1930 maxnz = PetscMax(maxnz,procsnz[i]); 1931 } 1932 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1933 1934 /* read in my part of the matrix column indices */ 1935 nz = procsnz[0]; 1936 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1937 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1938 1939 /* read in every one elses and ship off */ 1940 for (i=1; i<size; i++) { 1941 nz = procsnz[i]; 1942 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1943 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1944 } 1945 ierr = PetscFree(cols);CHKERRQ(ierr); 1946 } else { 1947 /* determine buffer space needed for message */ 1948 nz = 0; 1949 for (i=0; i<m; i++) { 1950 nz += ourlens[i]; 1951 } 1952 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1953 1954 /* receive message of column indices*/ 1955 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1956 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1957 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1958 } 1959 1960 /* determine column ownership if matrix is not square */ 1961 if (N != M) { 1962 n = N/size + ((N % size) > rank); 1963 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1964 cstart = cend - n; 1965 } else { 1966 cstart = rstart; 1967 cend = rend; 1968 n = cend - cstart; 1969 } 1970 1971 /* loop over local rows, determining number of off diagonal entries */ 1972 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1973 jj = 0; 1974 for (i=0; i<m; i++) { 1975 for (j=0; j<ourlens[i]; j++) { 1976 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1977 jj++; 1978 } 1979 } 1980 1981 /* create our matrix */ 1982 for (i=0; i<m; i++) { 1983 ourlens[i] -= offlens[i]; 1984 } 1985 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 1986 ierr = MatSetType(A,type);CHKERRQ(ierr); 1987 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 1988 1989 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1990 for (i=0; i<m; i++) { 1991 ourlens[i] += offlens[i]; 1992 } 1993 1994 if (!rank) { 1995 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1996 1997 /* read in my part of the matrix numerical values */ 1998 nz = procsnz[0]; 1999 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2000 2001 /* insert into matrix */ 2002 jj = rstart; 2003 smycols = mycols; 2004 svals = vals; 2005 for (i=0; i<m; i++) { 2006 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2007 smycols += ourlens[i]; 2008 svals += ourlens[i]; 2009 jj++; 2010 } 2011 2012 /* read in other processors and ship out */ 2013 for (i=1; i<size; i++) { 2014 nz = procsnz[i]; 2015 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2016 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2017 } 2018 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2019 } else { 2020 /* receive numeric values */ 2021 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2022 2023 /* receive message of values*/ 2024 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2025 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2026 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2027 2028 /* insert into matrix */ 2029 jj = rstart; 2030 smycols = mycols; 2031 svals = vals; 2032 for (i=0; i<m; i++) { 2033 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2034 smycols += ourlens[i]; 2035 svals += ourlens[i]; 2036 jj++; 2037 } 2038 } 2039 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2040 ierr = PetscFree(vals);CHKERRQ(ierr); 2041 ierr = PetscFree(mycols);CHKERRQ(ierr); 2042 ierr = PetscFree(rowners);CHKERRQ(ierr); 2043 2044 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2045 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2046 *newmat = A; 2047 PetscFunctionReturn(0); 2048 } 2049 2050 #undef __FUNCT__ 2051 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2052 /* 2053 Not great since it makes two copies of the submatrix, first an SeqAIJ 2054 in local and then by concatenating the local matrices the end result. 2055 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2056 */ 2057 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2058 { 2059 PetscErrorCode ierr; 2060 PetscMPIInt rank,size; 2061 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2062 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2063 Mat *local,M,Mreuse; 2064 PetscScalar *vwork,*aa; 2065 MPI_Comm comm = mat->comm; 2066 Mat_SeqAIJ *aij; 2067 2068 2069 PetscFunctionBegin; 2070 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2071 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2072 2073 if (call == MAT_REUSE_MATRIX) { 2074 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2075 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2076 local = &Mreuse; 2077 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2078 } else { 2079 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2080 Mreuse = *local; 2081 ierr = PetscFree(local);CHKERRQ(ierr); 2082 } 2083 2084 /* 2085 m - number of local rows 2086 n - number of columns (same on all processors) 2087 rstart - first row in new global matrix generated 2088 */ 2089 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2090 if (call == MAT_INITIAL_MATRIX) { 2091 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2092 ii = aij->i; 2093 jj = aij->j; 2094 2095 /* 2096 Determine the number of non-zeros in the diagonal and off-diagonal 2097 portions of the matrix in order to do correct preallocation 2098 */ 2099 2100 /* first get start and end of "diagonal" columns */ 2101 if (csize == PETSC_DECIDE) { 2102 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2103 if (mglobal == n) { /* square matrix */ 2104 nlocal = m; 2105 } else { 2106 nlocal = n/size + ((n % size) > rank); 2107 } 2108 } else { 2109 nlocal = csize; 2110 } 2111 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2112 rstart = rend - nlocal; 2113 if (rank == size - 1 && rend != n) { 2114 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2115 } 2116 2117 /* next, compute all the lengths */ 2118 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2119 olens = dlens + m; 2120 for (i=0; i<m; i++) { 2121 jend = ii[i+1] - ii[i]; 2122 olen = 0; 2123 dlen = 0; 2124 for (j=0; j<jend; j++) { 2125 if (*jj < rstart || *jj >= rend) olen++; 2126 else dlen++; 2127 jj++; 2128 } 2129 olens[i] = olen; 2130 dlens[i] = dlen; 2131 } 2132 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2133 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2134 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2135 ierr = PetscFree(dlens);CHKERRQ(ierr); 2136 } else { 2137 PetscInt ml,nl; 2138 2139 M = *newmat; 2140 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2141 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2142 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2143 /* 2144 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2145 rather than the slower MatSetValues(). 2146 */ 2147 M->was_assembled = PETSC_TRUE; 2148 M->assembled = PETSC_FALSE; 2149 } 2150 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2151 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2152 ii = aij->i; 2153 jj = aij->j; 2154 aa = aij->a; 2155 for (i=0; i<m; i++) { 2156 row = rstart + i; 2157 nz = ii[i+1] - ii[i]; 2158 cwork = jj; jj += nz; 2159 vwork = aa; aa += nz; 2160 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2161 } 2162 2163 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2164 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2165 *newmat = M; 2166 2167 /* save submatrix used in processor for next request */ 2168 if (call == MAT_INITIAL_MATRIX) { 2169 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2170 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2171 } 2172 2173 PetscFunctionReturn(0); 2174 } 2175 2176 EXTERN_C_BEGIN 2177 #undef __FUNCT__ 2178 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2179 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2180 { 2181 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2182 PetscInt m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2183 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2184 const PetscInt *JJ; 2185 PetscScalar *values; 2186 PetscErrorCode ierr; 2187 2188 PetscFunctionBegin; 2189 #if defined(PETSC_OPT_g) 2190 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2191 #endif 2192 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2193 o_nnz = d_nnz + m; 2194 2195 for (i=0; i<m; i++) { 2196 nnz = I[i+1]- I[i]; 2197 JJ = J + I[i]; 2198 nnz_max = PetscMax(nnz_max,nnz); 2199 #if defined(PETSC_OPT_g) 2200 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2201 #endif 2202 for (j=0; j<nnz; j++) { 2203 if (*JJ >= cstart) break; 2204 JJ++; 2205 } 2206 d = 0; 2207 for (; j<nnz; j++) { 2208 if (*JJ++ >= cend) break; 2209 d++; 2210 } 2211 d_nnz[i] = d; 2212 o_nnz[i] = nnz - d; 2213 } 2214 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2215 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2216 2217 if (v) values = (PetscScalar*)v; 2218 else { 2219 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2220 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2221 } 2222 2223 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2224 for (i=0; i<m; i++) { 2225 ii = i + rstart; 2226 nnz = I[i+1]- I[i]; 2227 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr); 2228 } 2229 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2230 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2231 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2232 2233 if (!v) { 2234 ierr = PetscFree(values);CHKERRQ(ierr); 2235 } 2236 PetscFunctionReturn(0); 2237 } 2238 EXTERN_C_END 2239 2240 #undef __FUNCT__ 2241 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2242 /*@C 2243 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2244 (the default parallel PETSc format). 2245 2246 Collective on MPI_Comm 2247 2248 Input Parameters: 2249 + A - the matrix 2250 . i - the indices into j for the start of each local row (starts with zero) 2251 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2252 - v - optional values in the matrix 2253 2254 Level: developer 2255 2256 .keywords: matrix, aij, compressed row, sparse, parallel 2257 2258 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2259 @*/ 2260 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2261 { 2262 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2263 2264 PetscFunctionBegin; 2265 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2266 if (f) { 2267 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2268 } 2269 PetscFunctionReturn(0); 2270 } 2271 2272 #undef __FUNCT__ 2273 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2274 /*@C 2275 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2276 (the default parallel PETSc format). For good matrix assembly performance 2277 the user should preallocate the matrix storage by setting the parameters 2278 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2279 performance can be increased by more than a factor of 50. 2280 2281 Collective on MPI_Comm 2282 2283 Input Parameters: 2284 + A - the matrix 2285 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2286 (same value is used for all local rows) 2287 . d_nnz - array containing the number of nonzeros in the various rows of the 2288 DIAGONAL portion of the local submatrix (possibly different for each row) 2289 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2290 The size of this array is equal to the number of local rows, i.e 'm'. 2291 You must leave room for the diagonal entry even if it is zero. 2292 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2293 submatrix (same value is used for all local rows). 2294 - o_nnz - array containing the number of nonzeros in the various rows of the 2295 OFF-DIAGONAL portion of the local submatrix (possibly different for 2296 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2297 structure. The size of this array is equal to the number 2298 of local rows, i.e 'm'. 2299 2300 If the *_nnz parameter is given then the *_nz parameter is ignored 2301 2302 The AIJ format (also called the Yale sparse matrix format or 2303 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2304 storage. The stored row and column indices begin with zero. See the users manual for details. 2305 2306 The parallel matrix is partitioned such that the first m0 rows belong to 2307 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2308 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2309 2310 The DIAGONAL portion of the local submatrix of a processor can be defined 2311 as the submatrix which is obtained by extraction the part corresponding 2312 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2313 first row that belongs to the processor, and r2 is the last row belonging 2314 to the this processor. This is a square mxm matrix. The remaining portion 2315 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2316 2317 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2318 2319 Example usage: 2320 2321 Consider the following 8x8 matrix with 34 non-zero values, that is 2322 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2323 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2324 as follows: 2325 2326 .vb 2327 1 2 0 | 0 3 0 | 0 4 2328 Proc0 0 5 6 | 7 0 0 | 8 0 2329 9 0 10 | 11 0 0 | 12 0 2330 ------------------------------------- 2331 13 0 14 | 15 16 17 | 0 0 2332 Proc1 0 18 0 | 19 20 21 | 0 0 2333 0 0 0 | 22 23 0 | 24 0 2334 ------------------------------------- 2335 Proc2 25 26 27 | 0 0 28 | 29 0 2336 30 0 0 | 31 32 33 | 0 34 2337 .ve 2338 2339 This can be represented as a collection of submatrices as: 2340 2341 .vb 2342 A B C 2343 D E F 2344 G H I 2345 .ve 2346 2347 Where the submatrices A,B,C are owned by proc0, D,E,F are 2348 owned by proc1, G,H,I are owned by proc2. 2349 2350 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2351 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2352 The 'M','N' parameters are 8,8, and have the same values on all procs. 2353 2354 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2355 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2356 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2357 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2358 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2359 matrix, ans [DF] as another SeqAIJ matrix. 2360 2361 When d_nz, o_nz parameters are specified, d_nz storage elements are 2362 allocated for every row of the local diagonal submatrix, and o_nz 2363 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2364 One way to choose d_nz and o_nz is to use the max nonzerors per local 2365 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2366 In this case, the values of d_nz,o_nz are: 2367 .vb 2368 proc0 : dnz = 2, o_nz = 2 2369 proc1 : dnz = 3, o_nz = 2 2370 proc2 : dnz = 1, o_nz = 4 2371 .ve 2372 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2373 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2374 for proc3. i.e we are using 12+15+10=37 storage locations to store 2375 34 values. 2376 2377 When d_nnz, o_nnz parameters are specified, the storage is specified 2378 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2379 In the above case the values for d_nnz,o_nnz are: 2380 .vb 2381 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2382 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2383 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2384 .ve 2385 Here the space allocated is sum of all the above values i.e 34, and 2386 hence pre-allocation is perfect. 2387 2388 Level: intermediate 2389 2390 .keywords: matrix, aij, compressed row, sparse, parallel 2391 2392 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2393 MPIAIJ 2394 @*/ 2395 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2396 { 2397 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2398 2399 PetscFunctionBegin; 2400 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2401 if (f) { 2402 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2403 } 2404 PetscFunctionReturn(0); 2405 } 2406 2407 #undef __FUNCT__ 2408 #define __FUNCT__ "MatCreateMPIAIJ" 2409 /*@C 2410 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2411 (the default parallel PETSc format). For good matrix assembly performance 2412 the user should preallocate the matrix storage by setting the parameters 2413 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2414 performance can be increased by more than a factor of 50. 2415 2416 Collective on MPI_Comm 2417 2418 Input Parameters: 2419 + comm - MPI communicator 2420 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2421 This value should be the same as the local size used in creating the 2422 y vector for the matrix-vector product y = Ax. 2423 . n - This value should be the same as the local size used in creating the 2424 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2425 calculated if N is given) For square matrices n is almost always m. 2426 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2427 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2428 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2429 (same value is used for all local rows) 2430 . d_nnz - array containing the number of nonzeros in the various rows of the 2431 DIAGONAL portion of the local submatrix (possibly different for each row) 2432 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2433 The size of this array is equal to the number of local rows, i.e 'm'. 2434 You must leave room for the diagonal entry even if it is zero. 2435 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2436 submatrix (same value is used for all local rows). 2437 - o_nnz - array containing the number of nonzeros in the various rows of the 2438 OFF-DIAGONAL portion of the local submatrix (possibly different for 2439 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2440 structure. The size of this array is equal to the number 2441 of local rows, i.e 'm'. 2442 2443 Output Parameter: 2444 . A - the matrix 2445 2446 Notes: 2447 If the *_nnz parameter is given then the *_nz parameter is ignored 2448 2449 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2450 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2451 storage requirements for this matrix. 2452 2453 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2454 processor than it must be used on all processors that share the object for 2455 that argument. 2456 2457 The user MUST specify either the local or global matrix dimensions 2458 (possibly both). 2459 2460 The parallel matrix is partitioned such that the first m0 rows belong to 2461 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2462 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2463 2464 The DIAGONAL portion of the local submatrix of a processor can be defined 2465 as the submatrix which is obtained by extraction the part corresponding 2466 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2467 first row that belongs to the processor, and r2 is the last row belonging 2468 to the this processor. This is a square mxm matrix. The remaining portion 2469 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2470 2471 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2472 2473 When calling this routine with a single process communicator, a matrix of 2474 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2475 type of communicator, use the construction mechanism: 2476 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2477 2478 By default, this format uses inodes (identical nodes) when possible. 2479 We search for consecutive rows with the same nonzero structure, thereby 2480 reusing matrix information to achieve increased efficiency. 2481 2482 Options Database Keys: 2483 + -mat_aij_no_inode - Do not use inodes 2484 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2485 - -mat_aij_oneindex - Internally use indexing starting at 1 2486 rather than 0. Note that when calling MatSetValues(), 2487 the user still MUST index entries starting at 0! 2488 2489 2490 Example usage: 2491 2492 Consider the following 8x8 matrix with 34 non-zero values, that is 2493 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2494 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2495 as follows: 2496 2497 .vb 2498 1 2 0 | 0 3 0 | 0 4 2499 Proc0 0 5 6 | 7 0 0 | 8 0 2500 9 0 10 | 11 0 0 | 12 0 2501 ------------------------------------- 2502 13 0 14 | 15 16 17 | 0 0 2503 Proc1 0 18 0 | 19 20 21 | 0 0 2504 0 0 0 | 22 23 0 | 24 0 2505 ------------------------------------- 2506 Proc2 25 26 27 | 0 0 28 | 29 0 2507 30 0 0 | 31 32 33 | 0 34 2508 .ve 2509 2510 This can be represented as a collection of submatrices as: 2511 2512 .vb 2513 A B C 2514 D E F 2515 G H I 2516 .ve 2517 2518 Where the submatrices A,B,C are owned by proc0, D,E,F are 2519 owned by proc1, G,H,I are owned by proc2. 2520 2521 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2522 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2523 The 'M','N' parameters are 8,8, and have the same values on all procs. 2524 2525 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2526 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2527 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2528 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2529 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2530 matrix, ans [DF] as another SeqAIJ matrix. 2531 2532 When d_nz, o_nz parameters are specified, d_nz storage elements are 2533 allocated for every row of the local diagonal submatrix, and o_nz 2534 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2535 One way to choose d_nz and o_nz is to use the max nonzerors per local 2536 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2537 In this case, the values of d_nz,o_nz are: 2538 .vb 2539 proc0 : dnz = 2, o_nz = 2 2540 proc1 : dnz = 3, o_nz = 2 2541 proc2 : dnz = 1, o_nz = 4 2542 .ve 2543 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2544 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2545 for proc3. i.e we are using 12+15+10=37 storage locations to store 2546 34 values. 2547 2548 When d_nnz, o_nnz parameters are specified, the storage is specified 2549 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2550 In the above case the values for d_nnz,o_nnz are: 2551 .vb 2552 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2553 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2554 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2555 .ve 2556 Here the space allocated is sum of all the above values i.e 34, and 2557 hence pre-allocation is perfect. 2558 2559 Level: intermediate 2560 2561 .keywords: matrix, aij, compressed row, sparse, parallel 2562 2563 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2564 MPIAIJ 2565 @*/ 2566 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2567 { 2568 PetscErrorCode ierr; 2569 PetscMPIInt size; 2570 2571 PetscFunctionBegin; 2572 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2573 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2574 if (size > 1) { 2575 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2576 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2577 } else { 2578 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2579 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2580 } 2581 PetscFunctionReturn(0); 2582 } 2583 2584 #undef __FUNCT__ 2585 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2586 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2587 { 2588 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2589 2590 PetscFunctionBegin; 2591 *Ad = a->A; 2592 *Ao = a->B; 2593 *colmap = a->garray; 2594 PetscFunctionReturn(0); 2595 } 2596 2597 #undef __FUNCT__ 2598 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2599 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2600 { 2601 PetscErrorCode ierr; 2602 PetscInt i; 2603 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2604 2605 PetscFunctionBegin; 2606 if (coloring->ctype == IS_COLORING_LOCAL) { 2607 ISColoringValue *allcolors,*colors; 2608 ISColoring ocoloring; 2609 2610 /* set coloring for diagonal portion */ 2611 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2612 2613 /* set coloring for off-diagonal portion */ 2614 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2615 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2616 for (i=0; i<a->B->n; i++) { 2617 colors[i] = allcolors[a->garray[i]]; 2618 } 2619 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2620 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2621 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2622 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2623 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2624 ISColoringValue *colors; 2625 PetscInt *larray; 2626 ISColoring ocoloring; 2627 2628 /* set coloring for diagonal portion */ 2629 ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2630 for (i=0; i<a->A->n; i++) { 2631 larray[i] = i + a->cstart; 2632 } 2633 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2634 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2635 for (i=0; i<a->A->n; i++) { 2636 colors[i] = coloring->colors[larray[i]]; 2637 } 2638 ierr = PetscFree(larray);CHKERRQ(ierr); 2639 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2640 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2641 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2642 2643 /* set coloring for off-diagonal portion */ 2644 ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2645 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2646 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2647 for (i=0; i<a->B->n; i++) { 2648 colors[i] = coloring->colors[larray[i]]; 2649 } 2650 ierr = PetscFree(larray);CHKERRQ(ierr); 2651 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2652 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2653 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2654 } else { 2655 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2656 } 2657 2658 PetscFunctionReturn(0); 2659 } 2660 2661 #if defined(PETSC_HAVE_ADIC) 2662 #undef __FUNCT__ 2663 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2664 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2665 { 2666 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2667 PetscErrorCode ierr; 2668 2669 PetscFunctionBegin; 2670 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2671 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2672 PetscFunctionReturn(0); 2673 } 2674 #endif 2675 2676 #undef __FUNCT__ 2677 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2678 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2679 { 2680 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2681 PetscErrorCode ierr; 2682 2683 PetscFunctionBegin; 2684 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2685 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2686 PetscFunctionReturn(0); 2687 } 2688 2689 #undef __FUNCT__ 2690 #define __FUNCT__ "MatMerge" 2691 /*@C 2692 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2693 matrices from each processor 2694 2695 Collective on MPI_Comm 2696 2697 Input Parameters: 2698 + comm - the communicators the parallel matrix will live on 2699 . inmat - the input sequential matrices 2700 . n - number of local columns (or PETSC_DECIDE) 2701 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2702 2703 Output Parameter: 2704 . outmat - the parallel matrix generated 2705 2706 Level: advanced 2707 2708 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2709 2710 @*/ 2711 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2712 { 2713 PetscErrorCode ierr; 2714 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2715 PetscInt *indx; 2716 PetscScalar *values; 2717 PetscMap columnmap,rowmap; 2718 2719 PetscFunctionBegin; 2720 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2721 /* 2722 PetscMPIInt rank; 2723 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2724 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N); 2725 */ 2726 if (scall == MAT_INITIAL_MATRIX){ 2727 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2728 if (n == PETSC_DECIDE){ 2729 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2730 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2731 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2732 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2733 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2734 } 2735 2736 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2737 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2738 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2739 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2740 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2741 2742 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2743 for (i=0;i<m;i++) { 2744 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2745 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2746 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2747 } 2748 /* This routine will ONLY return MPIAIJ type matrix */ 2749 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2750 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2751 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2752 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2753 2754 } else if (scall == MAT_REUSE_MATRIX){ 2755 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2756 } else { 2757 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2758 } 2759 2760 for (i=0;i<m;i++) { 2761 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2762 I = i + rstart; 2763 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2764 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2765 } 2766 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2767 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2768 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2769 2770 PetscFunctionReturn(0); 2771 } 2772 2773 #undef __FUNCT__ 2774 #define __FUNCT__ "MatFileSplit" 2775 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2776 { 2777 PetscErrorCode ierr; 2778 PetscMPIInt rank; 2779 PetscInt m,N,i,rstart,nnz; 2780 size_t len; 2781 const PetscInt *indx; 2782 PetscViewer out; 2783 char *name; 2784 Mat B; 2785 const PetscScalar *values; 2786 2787 PetscFunctionBegin; 2788 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2789 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2790 /* Should this be the type of the diagonal block of A? */ 2791 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2792 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2793 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2794 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2795 for (i=0;i<m;i++) { 2796 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2797 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2798 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2799 } 2800 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2801 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2802 2803 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2804 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2805 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2806 sprintf(name,"%s.%d",outfile,rank); 2807 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2808 ierr = PetscFree(name); 2809 ierr = MatView(B,out);CHKERRQ(ierr); 2810 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2811 ierr = MatDestroy(B);CHKERRQ(ierr); 2812 PetscFunctionReturn(0); 2813 } 2814 2815 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2816 #undef __FUNCT__ 2817 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2818 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2819 { 2820 PetscErrorCode ierr; 2821 Mat_Merge_SeqsToMPI *merge; 2822 PetscObjectContainer container; 2823 2824 PetscFunctionBegin; 2825 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2826 if (container) { 2827 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2828 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2829 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2830 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2831 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2832 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2833 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2834 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2835 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2836 if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);} 2837 if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);} 2838 if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);} 2839 2840 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2841 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2842 } 2843 ierr = PetscFree(merge);CHKERRQ(ierr); 2844 2845 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2846 PetscFunctionReturn(0); 2847 } 2848 2849 #include "src/mat/utils/freespace.h" 2850 #include "petscbt.h" 2851 #undef __FUNCT__ 2852 #define __FUNCT__ "MatMerge_SeqsToMPI" 2853 /*@C 2854 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2855 matrices from each processor 2856 2857 Collective on MPI_Comm 2858 2859 Input Parameters: 2860 + comm - the communicators the parallel matrix will live on 2861 . seqmat - the input sequential matrices 2862 . m - number of local rows (or PETSC_DECIDE) 2863 . n - number of local columns (or PETSC_DECIDE) 2864 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2865 2866 Output Parameter: 2867 . mpimat - the parallel matrix generated 2868 2869 Level: advanced 2870 2871 Notes: 2872 The dimensions of the sequential matrix in each processor MUST be the same. 2873 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2874 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2875 @*/ 2876 static PetscEvent logkey_seqstompinum = 0; 2877 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2878 { 2879 PetscErrorCode ierr; 2880 MPI_Comm comm=mpimat->comm; 2881 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2882 PetscMPIInt size,rank,taga,*len_s; 2883 PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2884 PetscInt proc,m; 2885 PetscInt **buf_ri,**buf_rj; 2886 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2887 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2888 MPI_Request *s_waits,*r_waits; 2889 MPI_Status *status; 2890 MatScalar *aa=a->a,**abuf_r,*ba_i; 2891 Mat_Merge_SeqsToMPI *merge; 2892 PetscObjectContainer container; 2893 2894 PetscFunctionBegin; 2895 if (!logkey_seqstompinum) { 2896 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2897 } 2898 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2899 2900 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2901 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2902 2903 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2904 if (container) { 2905 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2906 } 2907 bi = merge->bi; 2908 bj = merge->bj; 2909 buf_ri = merge->buf_ri; 2910 buf_rj = merge->buf_rj; 2911 2912 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2913 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2914 len_s = merge->len_s; 2915 2916 /* send and recv matrix values */ 2917 /*-----------------------------*/ 2918 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2919 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 2920 2921 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2922 for (proc=0,k=0; proc<size; proc++){ 2923 if (!len_s[proc]) continue; 2924 i = owners[proc]; 2925 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 2926 k++; 2927 } 2928 2929 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 2930 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 2931 ierr = PetscFree(status);CHKERRQ(ierr); 2932 2933 ierr = PetscFree(s_waits);CHKERRQ(ierr); 2934 ierr = PetscFree(r_waits);CHKERRQ(ierr); 2935 2936 /* insert mat values of mpimat */ 2937 /*----------------------------*/ 2938 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 2939 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 2940 nextrow = buf_ri_k + merge->nrecv; 2941 nextai = nextrow + merge->nrecv; 2942 2943 for (k=0; k<merge->nrecv; k++){ 2944 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 2945 nrows = *(buf_ri_k[k]); 2946 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 2947 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 2948 } 2949 2950 /* set values of ba */ 2951 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2952 for (i=0; i<m; i++) { 2953 arow = owners[rank] + i; 2954 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 2955 bnzi = bi[i+1] - bi[i]; 2956 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 2957 2958 /* add local non-zero vals of this proc's seqmat into ba */ 2959 anzi = ai[arow+1] - ai[arow]; 2960 aj = a->j + ai[arow]; 2961 aa = a->a + ai[arow]; 2962 nextaj = 0; 2963 for (j=0; nextaj<anzi; j++){ 2964 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2965 ba_i[j] += aa[nextaj++]; 2966 } 2967 } 2968 2969 /* add received vals into ba */ 2970 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 2971 /* i-th row */ 2972 if (i == *nextrow[k]) { 2973 anzi = *(nextai[k]+1) - *nextai[k]; 2974 aj = buf_rj[k] + *(nextai[k]); 2975 aa = abuf_r[k] + *(nextai[k]); 2976 nextaj = 0; 2977 for (j=0; nextaj<anzi; j++){ 2978 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2979 ba_i[j] += aa[nextaj++]; 2980 } 2981 } 2982 nextrow[k]++; nextai[k]++; 2983 } 2984 } 2985 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 2986 } 2987 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2988 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2989 2990 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 2991 ierr = PetscFree(ba_i);CHKERRQ(ierr); 2992 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 2993 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2994 PetscFunctionReturn(0); 2995 } 2996 static PetscEvent logkey_seqstompisym = 0; 2997 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 2998 { 2999 PetscErrorCode ierr; 3000 Mat B_mpi; 3001 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3002 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3003 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3004 PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 3005 PetscInt len,proc,*dnz,*onz; 3006 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3007 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3008 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3009 MPI_Status *status; 3010 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3011 PetscBT lnkbt; 3012 Mat_Merge_SeqsToMPI *merge; 3013 PetscObjectContainer container; 3014 3015 PetscFunctionBegin; 3016 if (!logkey_seqstompisym) { 3017 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3018 } 3019 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3020 3021 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3022 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3023 3024 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3025 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3026 3027 /* determine row ownership */ 3028 /*---------------------------------------------------------*/ 3029 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3030 if (m == PETSC_DECIDE) { 3031 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 3032 } else { 3033 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3034 } 3035 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 3036 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3037 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3038 3039 if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); } 3040 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3041 3042 /* determine the number of messages to send, their lengths */ 3043 /*---------------------------------------------------------*/ 3044 len_s = merge->len_s; 3045 3046 len = 0; /* length of buf_si[] */ 3047 merge->nsend = 0; 3048 for (proc=0; proc<size; proc++){ 3049 len_si[proc] = 0; 3050 if (proc == rank){ 3051 len_s[proc] = 0; 3052 } else { 3053 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3054 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3055 } 3056 if (len_s[proc]) { 3057 merge->nsend++; 3058 nrows = 0; 3059 for (i=owners[proc]; i<owners[proc+1]; i++){ 3060 if (ai[i+1] > ai[i]) nrows++; 3061 } 3062 len_si[proc] = 2*(nrows+1); 3063 len += len_si[proc]; 3064 } 3065 } 3066 3067 /* determine the number and length of messages to receive for ij-structure */ 3068 /*-------------------------------------------------------------------------*/ 3069 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3070 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3071 3072 /* post the Irecv of j-structure */ 3073 /*-------------------------------*/ 3074 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3075 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3076 3077 /* post the Isend of j-structure */ 3078 /*--------------------------------*/ 3079 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3080 sj_waits = si_waits + merge->nsend; 3081 3082 for (proc=0, k=0; proc<size; proc++){ 3083 if (!len_s[proc]) continue; 3084 i = owners[proc]; 3085 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3086 k++; 3087 } 3088 3089 /* receives and sends of j-structure are complete */ 3090 /*------------------------------------------------*/ 3091 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 3092 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 3093 3094 /* send and recv i-structure */ 3095 /*---------------------------*/ 3096 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3097 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3098 3099 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3100 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3101 for (proc=0,k=0; proc<size; proc++){ 3102 if (!len_s[proc]) continue; 3103 /* form outgoing message for i-structure: 3104 buf_si[0]: nrows to be sent 3105 [1:nrows]: row index (global) 3106 [nrows+1:2*nrows+1]: i-structure index 3107 */ 3108 /*-------------------------------------------*/ 3109 nrows = len_si[proc]/2 - 1; 3110 buf_si_i = buf_si + nrows+1; 3111 buf_si[0] = nrows; 3112 buf_si_i[0] = 0; 3113 nrows = 0; 3114 for (i=owners[proc]; i<owners[proc+1]; i++){ 3115 anzi = ai[i+1] - ai[i]; 3116 if (anzi) { 3117 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3118 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3119 nrows++; 3120 } 3121 } 3122 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3123 k++; 3124 buf_si += len_si[proc]; 3125 } 3126 3127 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 3128 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 3129 3130 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3131 for (i=0; i<merge->nrecv; i++){ 3132 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3133 } 3134 3135 ierr = PetscFree(len_si);CHKERRQ(ierr); 3136 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3137 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3138 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3139 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3140 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3141 ierr = PetscFree(status);CHKERRQ(ierr); 3142 3143 /* compute a local seq matrix in each processor */ 3144 /*----------------------------------------------*/ 3145 /* allocate bi array and free space for accumulating nonzero column info */ 3146 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3147 bi[0] = 0; 3148 3149 /* create and initialize a linked list */ 3150 nlnk = N+1; 3151 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3152 3153 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3154 len = 0; 3155 len = ai[owners[rank+1]] - ai[owners[rank]]; 3156 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3157 current_space = free_space; 3158 3159 /* determine symbolic info for each local row */ 3160 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3161 nextrow = buf_ri_k + merge->nrecv; 3162 nextai = nextrow + merge->nrecv; 3163 for (k=0; k<merge->nrecv; k++){ 3164 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3165 nrows = *buf_ri_k[k]; 3166 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3167 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3168 } 3169 3170 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3171 len = 0; 3172 for (i=0;i<m;i++) { 3173 bnzi = 0; 3174 /* add local non-zero cols of this proc's seqmat into lnk */ 3175 arow = owners[rank] + i; 3176 anzi = ai[arow+1] - ai[arow]; 3177 aj = a->j + ai[arow]; 3178 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3179 bnzi += nlnk; 3180 /* add received col data into lnk */ 3181 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3182 if (i == *nextrow[k]) { /* i-th row */ 3183 anzi = *(nextai[k]+1) - *nextai[k]; 3184 aj = buf_rj[k] + *nextai[k]; 3185 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3186 bnzi += nlnk; 3187 nextrow[k]++; nextai[k]++; 3188 } 3189 } 3190 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3191 3192 /* if free space is not available, make more free space */ 3193 if (current_space->local_remaining<bnzi) { 3194 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3195 nspacedouble++; 3196 } 3197 /* copy data into free space, then initialize lnk */ 3198 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3199 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3200 3201 current_space->array += bnzi; 3202 current_space->local_used += bnzi; 3203 current_space->local_remaining -= bnzi; 3204 3205 bi[i+1] = bi[i] + bnzi; 3206 } 3207 3208 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3209 3210 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3211 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3212 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3213 3214 /* create symbolic parallel matrix B_mpi */ 3215 /*---------------------------------------*/ 3216 if (n==PETSC_DECIDE) { 3217 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr); 3218 } else { 3219 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 3220 } 3221 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3222 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3223 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3224 3225 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3226 B_mpi->assembled = PETSC_FALSE; 3227 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3228 merge->bi = bi; 3229 merge->bj = bj; 3230 merge->buf_ri = buf_ri; 3231 merge->buf_rj = buf_rj; 3232 merge->coi = PETSC_NULL; 3233 merge->coj = PETSC_NULL; 3234 merge->owners_co = PETSC_NULL; 3235 3236 /* attach the supporting struct to B_mpi for reuse */ 3237 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3238 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3239 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3240 *mpimat = B_mpi; 3241 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3242 PetscFunctionReturn(0); 3243 } 3244 3245 static PetscEvent logkey_seqstompi = 0; 3246 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3247 { 3248 PetscErrorCode ierr; 3249 3250 PetscFunctionBegin; 3251 if (!logkey_seqstompi) { 3252 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3253 } 3254 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3255 if (scall == MAT_INITIAL_MATRIX){ 3256 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3257 } 3258 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3259 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3260 PetscFunctionReturn(0); 3261 } 3262 static PetscEvent logkey_getlocalmat = 0; 3263 #undef __FUNCT__ 3264 #define __FUNCT__ "MatGetLocalMat" 3265 /*@C 3266 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3267 3268 Not Collective 3269 3270 Input Parameters: 3271 + A - the matrix 3272 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3273 3274 Output Parameter: 3275 . A_loc - the local sequential matrix generated 3276 3277 Level: developer 3278 3279 @*/ 3280 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3281 { 3282 PetscErrorCode ierr; 3283 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3284 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3285 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3286 PetscScalar *aa=a->a,*ba=b->a,*ca; 3287 PetscInt am=A->m,i,j,k,cstart=mpimat->cstart; 3288 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3289 3290 PetscFunctionBegin; 3291 if (!logkey_getlocalmat) { 3292 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3293 } 3294 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3295 if (scall == MAT_INITIAL_MATRIX){ 3296 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3297 ci[0] = 0; 3298 for (i=0; i<am; i++){ 3299 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3300 } 3301 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3302 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3303 k = 0; 3304 for (i=0; i<am; i++) { 3305 ncols_o = bi[i+1] - bi[i]; 3306 ncols_d = ai[i+1] - ai[i]; 3307 /* off-diagonal portion of A */ 3308 for (jo=0; jo<ncols_o; jo++) { 3309 col = cmap[*bj]; 3310 if (col >= cstart) break; 3311 cj[k] = col; bj++; 3312 ca[k++] = *ba++; 3313 } 3314 /* diagonal portion of A */ 3315 for (j=0; j<ncols_d; j++) { 3316 cj[k] = cstart + *aj++; 3317 ca[k++] = *aa++; 3318 } 3319 /* off-diagonal portion of A */ 3320 for (j=jo; j<ncols_o; j++) { 3321 cj[k] = cmap[*bj++]; 3322 ca[k++] = *ba++; 3323 } 3324 } 3325 /* put together the new matrix */ 3326 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3327 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3328 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3329 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3330 mat->freedata = PETSC_TRUE; 3331 mat->nonew = 0; 3332 } else if (scall == MAT_REUSE_MATRIX){ 3333 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3334 ci = mat->i; cj = mat->j; ca = mat->a; 3335 for (i=0; i<am; i++) { 3336 /* off-diagonal portion of A */ 3337 ncols_o = bi[i+1] - bi[i]; 3338 for (jo=0; jo<ncols_o; jo++) { 3339 col = cmap[*bj]; 3340 if (col >= cstart) break; 3341 *ca++ = *ba++; bj++; 3342 } 3343 /* diagonal portion of A */ 3344 ncols_d = ai[i+1] - ai[i]; 3345 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3346 /* off-diagonal portion of A */ 3347 for (j=jo; j<ncols_o; j++) { 3348 *ca++ = *ba++; bj++; 3349 } 3350 } 3351 } else { 3352 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3353 } 3354 3355 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3356 PetscFunctionReturn(0); 3357 } 3358 3359 static PetscEvent logkey_getlocalmatcondensed = 0; 3360 #undef __FUNCT__ 3361 #define __FUNCT__ "MatGetLocalMatCondensed" 3362 /*@C 3363 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3364 3365 Not Collective 3366 3367 Input Parameters: 3368 + A - the matrix 3369 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3370 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3371 3372 Output Parameter: 3373 . A_loc - the local sequential matrix generated 3374 3375 Level: developer 3376 3377 @*/ 3378 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3379 { 3380 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3381 PetscErrorCode ierr; 3382 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3383 IS isrowa,iscola; 3384 Mat *aloc; 3385 3386 PetscFunctionBegin; 3387 if (!logkey_getlocalmatcondensed) { 3388 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3389 } 3390 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3391 if (!row){ 3392 start = a->rstart; end = a->rend; 3393 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3394 } else { 3395 isrowa = *row; 3396 } 3397 if (!col){ 3398 start = a->cstart; 3399 cmap = a->garray; 3400 nzA = a->A->n; 3401 nzB = a->B->n; 3402 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3403 ncols = 0; 3404 for (i=0; i<nzB; i++) { 3405 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3406 else break; 3407 } 3408 imark = i; 3409 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3410 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3411 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3412 ierr = PetscFree(idx);CHKERRQ(ierr); 3413 } else { 3414 iscola = *col; 3415 } 3416 if (scall != MAT_INITIAL_MATRIX){ 3417 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3418 aloc[0] = *A_loc; 3419 } 3420 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3421 *A_loc = aloc[0]; 3422 ierr = PetscFree(aloc);CHKERRQ(ierr); 3423 if (!row){ 3424 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3425 } 3426 if (!col){ 3427 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3428 } 3429 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3430 PetscFunctionReturn(0); 3431 } 3432 3433 static PetscEvent logkey_GetBrowsOfAcols = 0; 3434 #undef __FUNCT__ 3435 #define __FUNCT__ "MatGetBrowsOfAcols" 3436 /*@C 3437 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3438 3439 Collective on Mat 3440 3441 Input Parameters: 3442 + A,B - the matrices in mpiaij format 3443 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3444 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3445 3446 Output Parameter: 3447 + rowb, colb - index sets of rows and columns of B to extract 3448 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3449 - B_seq - the sequential matrix generated 3450 3451 Level: developer 3452 3453 @*/ 3454 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3455 { 3456 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3457 PetscErrorCode ierr; 3458 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3459 IS isrowb,iscolb; 3460 Mat *bseq; 3461 3462 PetscFunctionBegin; 3463 if (a->cstart != b->rstart || a->cend != b->rend){ 3464 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3465 } 3466 if (!logkey_GetBrowsOfAcols) { 3467 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3468 } 3469 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3470 3471 if (scall == MAT_INITIAL_MATRIX){ 3472 start = a->cstart; 3473 cmap = a->garray; 3474 nzA = a->A->n; 3475 nzB = a->B->n; 3476 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3477 ncols = 0; 3478 for (i=0; i<nzB; i++) { /* row < local row index */ 3479 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3480 else break; 3481 } 3482 imark = i; 3483 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3484 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3485 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3486 ierr = PetscFree(idx);CHKERRQ(ierr); 3487 *brstart = imark; 3488 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3489 } else { 3490 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3491 isrowb = *rowb; iscolb = *colb; 3492 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3493 bseq[0] = *B_seq; 3494 } 3495 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3496 *B_seq = bseq[0]; 3497 ierr = PetscFree(bseq);CHKERRQ(ierr); 3498 if (!rowb){ 3499 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3500 } else { 3501 *rowb = isrowb; 3502 } 3503 if (!colb){ 3504 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3505 } else { 3506 *colb = iscolb; 3507 } 3508 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3509 PetscFunctionReturn(0); 3510 } 3511 3512 static PetscEvent logkey_GetBrowsOfAocols = 0; 3513 #undef __FUNCT__ 3514 #define __FUNCT__ "MatGetBrowsOfAoCols" 3515 /*@C 3516 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3517 of the OFF-DIAGONAL portion of local A 3518 3519 Collective on Mat 3520 3521 Input Parameters: 3522 + A,B - the matrices in mpiaij format 3523 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3524 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3525 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3526 3527 Output Parameter: 3528 + B_oth - the sequential matrix generated 3529 3530 Level: developer 3531 3532 @*/ 3533 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3534 { 3535 VecScatter_MPI_General *gen_to,*gen_from; 3536 PetscErrorCode ierr; 3537 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3538 Mat_SeqAIJ *b_oth; 3539 VecScatter ctx=a->Mvctx; 3540 MPI_Comm comm=ctx->comm; 3541 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3542 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj; 3543 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3544 PetscInt i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3545 MPI_Request *rwaits,*swaits; 3546 MPI_Status *sstatus,rstatus; 3547 PetscInt *cols; 3548 PetscScalar *vals; 3549 PetscMPIInt j; 3550 3551 PetscFunctionBegin; 3552 if (a->cstart != b->rstart || a->cend != b->rend){ 3553 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3554 } 3555 if (!logkey_GetBrowsOfAocols) { 3556 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3557 } 3558 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3559 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3560 3561 gen_to = (VecScatter_MPI_General*)ctx->todata; 3562 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3563 rvalues = gen_from->values; /* holds the length of sending row */ 3564 svalues = gen_to->values; /* holds the length of receiving row */ 3565 nrecvs = gen_from->n; 3566 nsends = gen_to->n; 3567 rwaits = gen_from->requests; 3568 swaits = gen_to->requests; 3569 srow = gen_to->indices; /* local row index to be sent */ 3570 rstarts = gen_from->starts; 3571 sstarts = gen_to->starts; 3572 rprocs = gen_from->procs; 3573 sprocs = gen_to->procs; 3574 sstatus = gen_to->sstatus; 3575 3576 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3577 if (scall == MAT_INITIAL_MATRIX){ 3578 /* i-array */ 3579 /*---------*/ 3580 /* post receives */ 3581 for (i=0; i<nrecvs; i++){ 3582 rowlen = (PetscInt*)rvalues + rstarts[i]; 3583 nrows = rstarts[i+1]-rstarts[i]; 3584 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3585 } 3586 3587 /* pack the outgoing message */ 3588 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3589 rstartsj = sstartsj + nsends +1; 3590 sstartsj[0] = 0; rstartsj[0] = 0; 3591 len = 0; /* total length of j or a array to be sent */ 3592 k = 0; 3593 for (i=0; i<nsends; i++){ 3594 rowlen = (PetscInt*)svalues + sstarts[i]; 3595 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3596 for (j=0; j<nrows; j++) { 3597 row = srow[k] + b->rowners[rank]; /* global row idx */ 3598 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3599 len += rowlen[j]; 3600 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3601 k++; 3602 } 3603 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3604 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3605 } 3606 /* recvs and sends of i-array are completed */ 3607 i = nrecvs; 3608 while (i--) { 3609 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3610 } 3611 if (nsends) { 3612 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3613 } 3614 /* allocate buffers for sending j and a arrays */ 3615 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3616 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3617 3618 /* create i-array of B_oth */ 3619 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3620 b_othi[0] = 0; 3621 len = 0; /* total length of j or a array to be received */ 3622 k = 0; 3623 for (i=0; i<nrecvs; i++){ 3624 rowlen = (PetscInt*)rvalues + rstarts[i]; 3625 nrows = rstarts[i+1]-rstarts[i]; 3626 for (j=0; j<nrows; j++) { 3627 b_othi[k+1] = b_othi[k] + rowlen[j]; 3628 len += rowlen[j]; k++; 3629 } 3630 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3631 } 3632 3633 /* allocate space for j and a arrrays of B_oth */ 3634 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3635 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3636 3637 /* j-array */ 3638 /*---------*/ 3639 /* post receives of j-array */ 3640 for (i=0; i<nrecvs; i++){ 3641 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3642 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3643 } 3644 k = 0; 3645 for (i=0; i<nsends; i++){ 3646 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3647 bufJ = bufj+sstartsj[i]; 3648 for (j=0; j<nrows; j++) { 3649 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3650 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3651 for (l=0; l<ncols; l++){ 3652 *bufJ++ = cols[l]; 3653 } 3654 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3655 } 3656 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3657 } 3658 3659 /* recvs and sends of j-array are completed */ 3660 i = nrecvs; 3661 while (i--) { 3662 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3663 } 3664 if (nsends) { 3665 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3666 } 3667 } else if (scall == MAT_REUSE_MATRIX){ 3668 sstartsj = *startsj; 3669 rstartsj = sstartsj + nsends +1; 3670 bufa = *bufa_ptr; 3671 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3672 b_otha = b_oth->a; 3673 } else { 3674 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3675 } 3676 3677 /* a-array */ 3678 /*---------*/ 3679 /* post receives of a-array */ 3680 for (i=0; i<nrecvs; i++){ 3681 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3682 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3683 } 3684 k = 0; 3685 for (i=0; i<nsends; i++){ 3686 nrows = sstarts[i+1]-sstarts[i]; 3687 bufA = bufa+sstartsj[i]; 3688 for (j=0; j<nrows; j++) { 3689 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3690 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3691 for (l=0; l<ncols; l++){ 3692 *bufA++ = vals[l]; 3693 } 3694 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3695 3696 } 3697 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3698 } 3699 /* recvs and sends of a-array are completed */ 3700 i = nrecvs; 3701 while (i--) { 3702 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3703 } 3704 if (nsends) { 3705 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3706 } 3707 3708 if (scall == MAT_INITIAL_MATRIX){ 3709 /* put together the new matrix */ 3710 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3711 3712 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3713 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3714 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3715 b_oth->freedata = PETSC_TRUE; 3716 b_oth->nonew = 0; 3717 3718 ierr = PetscFree(bufj);CHKERRQ(ierr); 3719 if (!startsj || !bufa_ptr){ 3720 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3721 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3722 } else { 3723 *startsj = sstartsj; 3724 *bufa_ptr = bufa; 3725 } 3726 } 3727 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3728 3729 PetscFunctionReturn(0); 3730 } 3731 3732 /*MC 3733 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3734 3735 Options Database Keys: 3736 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3737 3738 Level: beginner 3739 3740 .seealso: MatCreateMPIAIJ 3741 M*/ 3742 3743 EXTERN_C_BEGIN 3744 #undef __FUNCT__ 3745 #define __FUNCT__ "MatCreate_MPIAIJ" 3746 PetscErrorCode MatCreate_MPIAIJ(Mat B) 3747 { 3748 Mat_MPIAIJ *b; 3749 PetscErrorCode ierr; 3750 PetscInt i; 3751 PetscMPIInt size; 3752 3753 PetscFunctionBegin; 3754 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3755 3756 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3757 B->data = (void*)b; 3758 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3759 B->factor = 0; 3760 B->bs = 1; 3761 B->assembled = PETSC_FALSE; 3762 B->mapping = 0; 3763 3764 B->insertmode = NOT_SET_VALUES; 3765 b->size = size; 3766 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3767 3768 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 3769 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 3770 3771 /* the information in the maps duplicates the information computed below, eventually 3772 we should remove the duplicate information that is not contained in the maps */ 3773 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 3774 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 3775 3776 /* build local table of row and column ownerships */ 3777 ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 3778 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 3779 b->cowners = b->rowners + b->size + 2; 3780 ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3781 b->rowners[0] = 0; 3782 for (i=2; i<=b->size; i++) { 3783 b->rowners[i] += b->rowners[i-1]; 3784 } 3785 b->rstart = b->rowners[b->rank]; 3786 b->rend = b->rowners[b->rank+1]; 3787 ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3788 b->cowners[0] = 0; 3789 for (i=2; i<=b->size; i++) { 3790 b->cowners[i] += b->cowners[i-1]; 3791 } 3792 b->cstart = b->cowners[b->rank]; 3793 b->cend = b->cowners[b->rank+1]; 3794 3795 /* build cache for off array entries formed */ 3796 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3797 b->donotstash = PETSC_FALSE; 3798 b->colmap = 0; 3799 b->garray = 0; 3800 b->roworiented = PETSC_TRUE; 3801 3802 /* stuff used for matrix vector multiply */ 3803 b->lvec = PETSC_NULL; 3804 b->Mvctx = PETSC_NULL; 3805 3806 /* stuff for MatGetRow() */ 3807 b->rowindices = 0; 3808 b->rowvalues = 0; 3809 b->getrowactive = PETSC_FALSE; 3810 3811 /* Explicitly create 2 MATSEQAIJ matrices. */ 3812 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 3813 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3814 PetscLogObjectParent(B,b->A); 3815 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 3816 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3817 PetscLogObjectParent(B,b->B); 3818 3819 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3820 "MatStoreValues_MPIAIJ", 3821 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3822 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3823 "MatRetrieveValues_MPIAIJ", 3824 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3825 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3826 "MatGetDiagonalBlock_MPIAIJ", 3827 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3828 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3829 "MatIsTranspose_MPIAIJ", 3830 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3831 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3832 "MatMPIAIJSetPreallocation_MPIAIJ", 3833 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3834 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3835 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3836 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3837 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3838 "MatDiagonalScaleLocal_MPIAIJ", 3839 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3840 PetscFunctionReturn(0); 3841 } 3842 EXTERN_C_END 3843