1 2 #include "src/mat/impls/aij/mpi/mpiaij.h" 3 #include "src/inline/spops.h" 4 5 /* 6 Local utility routine that creates a mapping from the global column 7 number to the local number in the off-diagonal part of the local 8 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 9 a slightly higher hash table cost; without it it is not scalable (each processor 10 has an order N integer array but is fast to acess. 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 17 PetscErrorCode ierr; 18 PetscInt n = aij->B->n,i; 19 20 PetscFunctionBegin; 21 #if defined (PETSC_USE_CTABLE) 22 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 23 for (i=0; i<n; i++){ 24 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 25 } 26 #else 27 ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 28 PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt)); 29 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 30 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 31 #endif 32 PetscFunctionReturn(0); 33 } 34 35 #define CHUNKSIZE 15 36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 40 rmax = aimax[row]; nrow = ailen[row]; \ 41 col1 = col - shift; \ 42 \ 43 low = 0; high = nrow; \ 44 while (high-low > 5) { \ 45 t = (low+high)/2; \ 46 if (rp[t] > col) high = t; \ 47 else low = t; \ 48 } \ 49 for (_i=low; _i<high; _i++) { \ 50 if (rp[_i] > col1) break; \ 51 if (rp[_i] == col1) { \ 52 if (addv == ADD_VALUES) ap[_i] += value; \ 53 else ap[_i] = value; \ 54 goto a_noinsert; \ 55 } \ 56 } \ 57 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 58 if (nonew == 1) goto a_noinsert; \ 59 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 60 if (nrow >= rmax) { \ 61 /* there is no extra room in row, therefore enlarge */ \ 62 PetscInt new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 63 PetscScalar *new_a; \ 64 \ 65 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 66 \ 67 /* malloc new storage space */ \ 68 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \ 69 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 70 new_j = (PetscInt*)(new_a + new_nz); \ 71 new_i = new_j + new_nz; \ 72 \ 73 /* copy over old data into new slots */ \ 74 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 75 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 76 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 77 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 78 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 79 len*sizeof(PetscInt));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 81 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 82 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 83 /* free up old matrix storage */ \ 84 \ 85 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 86 if (!a->singlemalloc) { \ 87 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 88 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 89 } \ 90 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 91 a->singlemalloc = PETSC_TRUE; \ 92 \ 93 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 94 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 95 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \ 96 a->maxnz += CHUNKSIZE; \ 97 a->reallocs++; \ 98 } \ 99 N = nrow++ - 1; a->nz++; \ 100 /* shift up all the later entries in this row */ \ 101 for (ii=N; ii>=_i; ii--) { \ 102 rp[ii+1] = rp[ii]; \ 103 ap[ii+1] = ap[ii]; \ 104 } \ 105 rp[_i] = col1; \ 106 ap[_i] = value; \ 107 a_noinsert: ; \ 108 ailen[row] = nrow; \ 109 } 110 111 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 112 { \ 113 \ 114 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 115 rmax = bimax[row]; nrow = bilen[row]; \ 116 col1 = col - shift; \ 117 \ 118 low = 0; high = nrow; \ 119 while (high-low > 5) { \ 120 t = (low+high)/2; \ 121 if (rp[t] > col) high = t; \ 122 else low = t; \ 123 } \ 124 for (_i=low; _i<high; _i++) { \ 125 if (rp[_i] > col1) break; \ 126 if (rp[_i] == col1) { \ 127 if (addv == ADD_VALUES) ap[_i] += value; \ 128 else ap[_i] = value; \ 129 goto b_noinsert; \ 130 } \ 131 } \ 132 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 133 if (nonew == 1) goto b_noinsert; \ 134 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 135 if (nrow >= rmax) { \ 136 /* there is no extra room in row, therefore enlarge */ \ 137 PetscInt new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 138 PetscScalar *new_a; \ 139 \ 140 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 141 \ 142 /* malloc new storage space */ \ 143 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \ 144 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 145 new_j = (PetscInt*)(new_a + new_nz); \ 146 new_i = new_j + new_nz; \ 147 \ 148 /* copy over old data into new slots */ \ 149 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 150 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 151 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 152 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 153 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 154 len*sizeof(PetscInt));CHKERRQ(ierr); \ 155 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 156 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 157 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 158 /* free up old matrix storage */ \ 159 \ 160 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 161 if (!b->singlemalloc) { \ 162 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 163 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 164 } \ 165 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 166 b->singlemalloc = PETSC_TRUE; \ 167 \ 168 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 169 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 170 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \ 171 b->maxnz += CHUNKSIZE; \ 172 b->reallocs++; \ 173 } \ 174 N = nrow++ - 1; b->nz++; \ 175 /* shift up all the later entries in this row */ \ 176 for (ii=N; ii>=_i; ii--) { \ 177 rp[ii+1] = rp[ii]; \ 178 ap[ii+1] = ap[ii]; \ 179 } \ 180 rp[_i] = col1; \ 181 ap[_i] = value; \ 182 b_noinsert: ; \ 183 bilen[row] = nrow; \ 184 } 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "MatSetValues_MPIAIJ" 188 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 189 { 190 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 191 PetscScalar value; 192 PetscErrorCode ierr; 193 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 194 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 195 PetscTruth roworiented = aij->roworiented; 196 197 /* Some Variables required in the macro */ 198 Mat A = aij->A; 199 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 200 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 201 PetscScalar *aa = a->a; 202 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 203 Mat B = aij->B; 204 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 205 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 206 PetscScalar *ba = b->a; 207 208 PetscInt *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 209 PetscInt nonew = a->nonew,shift=0; 210 PetscScalar *ap; 211 212 PetscFunctionBegin; 213 for (i=0; i<m; i++) { 214 if (im[i] < 0) continue; 215 #if defined(PETSC_USE_DEBUG) 216 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 217 #endif 218 if (im[i] >= rstart && im[i] < rend) { 219 row = im[i] - rstart; 220 for (j=0; j<n; j++) { 221 if (in[j] >= cstart && in[j] < cend){ 222 col = in[j] - cstart; 223 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 224 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 225 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 226 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 227 } else if (in[j] < 0) continue; 228 #if defined(PETSC_USE_DEBUG) 229 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 230 #endif 231 else { 232 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 233 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 234 if (mat->was_assembled) { 235 if (!aij->colmap) { 236 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 237 } 238 #if defined (PETSC_USE_CTABLE) 239 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 240 col--; 241 #else 242 col = aij->colmap[in[j]] - 1; 243 #endif 244 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 245 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 246 col = in[j]; 247 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 248 B = aij->B; 249 b = (Mat_SeqAIJ*)B->data; 250 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 251 ba = b->a; 252 } 253 } else col = in[j]; 254 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 255 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 256 } 257 } 258 } else { 259 if (!aij->donotstash) { 260 if (roworiented) { 261 if (ignorezeroentries && v[i*n] == 0.0) continue; 262 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 263 } else { 264 if (ignorezeroentries && v[i] == 0.0) continue; 265 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 266 } 267 } 268 } 269 } 270 PetscFunctionReturn(0); 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatGetValues_MPIAIJ" 275 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 276 { 277 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 278 PetscErrorCode ierr; 279 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 280 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 281 282 PetscFunctionBegin; 283 for (i=0; i<m; i++) { 284 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 285 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 286 if (idxm[i] >= rstart && idxm[i] < rend) { 287 row = idxm[i] - rstart; 288 for (j=0; j<n; j++) { 289 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 290 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 291 if (idxn[j] >= cstart && idxn[j] < cend){ 292 col = idxn[j] - cstart; 293 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 294 } else { 295 if (!aij->colmap) { 296 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 297 } 298 #if defined (PETSC_USE_CTABLE) 299 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 300 col --; 301 #else 302 col = aij->colmap[idxn[j]] - 1; 303 #endif 304 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 305 else { 306 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 307 } 308 } 309 } 310 } else { 311 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 312 } 313 } 314 PetscFunctionReturn(0); 315 } 316 317 #undef __FUNCT__ 318 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 319 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 320 { 321 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 322 PetscErrorCode ierr; 323 PetscInt nstash,reallocs; 324 InsertMode addv; 325 326 PetscFunctionBegin; 327 if (aij->donotstash) { 328 PetscFunctionReturn(0); 329 } 330 331 /* make sure all processors are either in INSERTMODE or ADDMODE */ 332 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 333 if (addv == (ADD_VALUES|INSERT_VALUES)) { 334 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 335 } 336 mat->insertmode = addv; /* in case this processor had no cache */ 337 338 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 339 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 340 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs); 341 PetscFunctionReturn(0); 342 } 343 344 #undef __FUNCT__ 345 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 346 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 347 { 348 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 349 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 350 PetscErrorCode ierr; 351 PetscMPIInt n; 352 PetscInt i,j,rstart,ncols,flg; 353 PetscInt *row,*col,other_disassembled; 354 PetscScalar *val; 355 InsertMode addv = mat->insertmode; 356 357 PetscFunctionBegin; 358 if (!aij->donotstash) { 359 while (1) { 360 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 361 if (!flg) break; 362 363 for (i=0; i<n;) { 364 /* Now identify the consecutive vals belonging to the same row */ 365 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 366 if (j < n) ncols = j-i; 367 else ncols = n-i; 368 /* Now assemble all these values with a single function call */ 369 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 370 i = j; 371 } 372 } 373 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 374 } 375 a->compressedrow.use = PETSC_FALSE; 376 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 377 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 378 379 /* determine if any processor has disassembled, if so we must 380 also disassemble ourselfs, in order that we may reassemble. */ 381 /* 382 if nonzero structure of submatrix B cannot change then we know that 383 no processor disassembled thus we can skip this stuff 384 */ 385 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 386 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 387 if (mat->was_assembled && !other_disassembled) { 388 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 389 /* reaccess the b because aij->B was changed */ 390 b = (Mat_SeqAIJ *)aij->B->data; 391 } 392 } 393 394 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 395 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 396 } 397 398 b->inode.use = PETSC_FALSE; 399 b->compressedrow.use = PETSC_TRUE; 400 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 401 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 402 403 if (aij->rowvalues) { 404 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 405 aij->rowvalues = 0; 406 } 407 408 /* used by MatAXPY() */ 409 a->xtoy = 0; b->xtoy = 0; 410 a->XtoY = 0; b->XtoY = 0; 411 412 PetscFunctionReturn(0); 413 } 414 415 #undef __FUNCT__ 416 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 417 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 418 { 419 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 420 PetscErrorCode ierr; 421 422 PetscFunctionBegin; 423 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 424 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatZeroRows_MPIAIJ" 430 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 431 { 432 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 433 PetscErrorCode ierr; 434 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag; 435 PetscInt i,N,*rows,*owners = l->rowners; 436 PetscInt *nprocs,j,idx,nsends,row; 437 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 438 PetscInt *rvalues,count,base,slen,*source; 439 PetscInt *lens,*lrows,*values,rstart=l->rstart; 440 MPI_Comm comm = A->comm; 441 MPI_Request *send_waits,*recv_waits; 442 MPI_Status recv_status,*send_status; 443 IS istmp; 444 PetscTruth found; 445 446 PetscFunctionBegin; 447 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 448 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 449 450 /* first count number of contributors to each processor */ 451 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 452 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 453 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 454 for (i=0; i<N; i++) { 455 idx = rows[i]; 456 found = PETSC_FALSE; 457 for (j=0; j<size; j++) { 458 if (idx >= owners[j] && idx < owners[j+1]) { 459 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 460 } 461 } 462 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 463 } 464 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 465 466 /* inform other processors of number of messages and max length*/ 467 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 468 469 /* post receives: */ 470 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 471 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 472 for (i=0; i<nrecvs; i++) { 473 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 474 } 475 476 /* do sends: 477 1) starts[i] gives the starting index in svalues for stuff going to 478 the ith processor 479 */ 480 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 481 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 482 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 483 starts[0] = 0; 484 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 485 for (i=0; i<N; i++) { 486 svalues[starts[owner[i]]++] = rows[i]; 487 } 488 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 489 490 starts[0] = 0; 491 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 492 count = 0; 493 for (i=0; i<size; i++) { 494 if (nprocs[2*i+1]) { 495 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 496 } 497 } 498 ierr = PetscFree(starts);CHKERRQ(ierr); 499 500 base = owners[rank]; 501 502 /* wait on receives */ 503 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 504 source = lens + nrecvs; 505 count = nrecvs; slen = 0; 506 while (count) { 507 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 508 /* unpack receives into our local space */ 509 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 510 source[imdex] = recv_status.MPI_SOURCE; 511 lens[imdex] = n; 512 slen += n; 513 count--; 514 } 515 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 516 517 /* move the data into the send scatter */ 518 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 519 count = 0; 520 for (i=0; i<nrecvs; i++) { 521 values = rvalues + i*nmax; 522 for (j=0; j<lens[i]; j++) { 523 lrows[count++] = values[j] - base; 524 } 525 } 526 ierr = PetscFree(rvalues);CHKERRQ(ierr); 527 ierr = PetscFree(lens);CHKERRQ(ierr); 528 ierr = PetscFree(owner);CHKERRQ(ierr); 529 ierr = PetscFree(nprocs);CHKERRQ(ierr); 530 531 /* actually zap the local rows */ 532 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 533 PetscLogObjectParent(A,istmp); 534 535 /* 536 Zero the required rows. If the "diagonal block" of the matrix 537 is square and the user wishes to set the diagonal we use seperate 538 code so that MatSetValues() is not called for each diagonal allocating 539 new memory, thus calling lots of mallocs and slowing things down. 540 541 Contributed by: Mathew Knepley 542 */ 543 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 544 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 545 if (diag && (l->A->M == l->A->N)) { 546 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 547 } else if (diag) { 548 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 549 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 550 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 551 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 552 } 553 for (i = 0; i < slen; i++) { 554 row = lrows[i] + rstart; 555 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 556 } 557 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 558 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 559 } else { 560 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 561 } 562 ierr = ISDestroy(istmp);CHKERRQ(ierr); 563 ierr = PetscFree(lrows);CHKERRQ(ierr); 564 565 /* wait on sends */ 566 if (nsends) { 567 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 568 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 569 ierr = PetscFree(send_status);CHKERRQ(ierr); 570 } 571 ierr = PetscFree(send_waits);CHKERRQ(ierr); 572 ierr = PetscFree(svalues);CHKERRQ(ierr); 573 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatMult_MPIAIJ" 579 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 580 { 581 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 582 PetscErrorCode ierr; 583 PetscInt nt; 584 585 PetscFunctionBegin; 586 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 587 if (nt != A->n) { 588 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt); 589 } 590 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 591 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 592 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 593 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 594 PetscFunctionReturn(0); 595 } 596 597 #undef __FUNCT__ 598 #define __FUNCT__ "MatMultAdd_MPIAIJ" 599 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 600 { 601 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 602 PetscErrorCode ierr; 603 604 PetscFunctionBegin; 605 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 606 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 607 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 608 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 609 PetscFunctionReturn(0); 610 } 611 612 #undef __FUNCT__ 613 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 614 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 615 { 616 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 617 PetscErrorCode ierr; 618 619 PetscFunctionBegin; 620 /* do nondiagonal part */ 621 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 622 /* send it on its way */ 623 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 624 /* do local part */ 625 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 626 /* receive remote parts: note this assumes the values are not actually */ 627 /* inserted in yy until the next line, which is true for my implementation*/ 628 /* but is not perhaps always true. */ 629 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 630 PetscFunctionReturn(0); 631 } 632 633 EXTERN_C_BEGIN 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 636 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 637 { 638 MPI_Comm comm; 639 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 640 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 641 IS Me,Notme; 642 PetscErrorCode ierr; 643 PetscInt M,N,first,last,*notme,i; 644 PetscMPIInt size; 645 646 PetscFunctionBegin; 647 648 /* Easy test: symmetric diagonal block */ 649 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 650 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 651 if (!*f) PetscFunctionReturn(0); 652 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 653 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 654 if (size == 1) PetscFunctionReturn(0); 655 656 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 657 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 658 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 659 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 660 for (i=0; i<first; i++) notme[i] = i; 661 for (i=last; i<M; i++) notme[i-last+first] = i; 662 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 663 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 664 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 665 Aoff = Aoffs[0]; 666 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 667 Boff = Boffs[0]; 668 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 669 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 670 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 671 ierr = ISDestroy(Me);CHKERRQ(ierr); 672 ierr = ISDestroy(Notme);CHKERRQ(ierr); 673 674 PetscFunctionReturn(0); 675 } 676 EXTERN_C_END 677 678 #undef __FUNCT__ 679 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 680 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 681 { 682 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 683 PetscErrorCode ierr; 684 685 PetscFunctionBegin; 686 /* do nondiagonal part */ 687 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 688 /* send it on its way */ 689 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 690 /* do local part */ 691 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 692 /* receive remote parts: note this assumes the values are not actually */ 693 /* inserted in yy until the next line, which is true for my implementation*/ 694 /* but is not perhaps always true. */ 695 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 696 PetscFunctionReturn(0); 697 } 698 699 /* 700 This only works correctly for square matrices where the subblock A->A is the 701 diagonal block 702 */ 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 705 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 706 { 707 PetscErrorCode ierr; 708 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 709 710 PetscFunctionBegin; 711 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 712 if (a->rstart != a->cstart || a->rend != a->cend) { 713 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 714 } 715 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 716 PetscFunctionReturn(0); 717 } 718 719 #undef __FUNCT__ 720 #define __FUNCT__ "MatScale_MPIAIJ" 721 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 722 { 723 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 724 PetscErrorCode ierr; 725 726 PetscFunctionBegin; 727 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 728 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 729 PetscFunctionReturn(0); 730 } 731 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatDestroy_MPIAIJ" 734 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 735 { 736 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 737 PetscErrorCode ierr; 738 739 PetscFunctionBegin; 740 #if defined(PETSC_USE_LOG) 741 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 742 #endif 743 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 744 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 745 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 746 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 747 #if defined (PETSC_USE_CTABLE) 748 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 749 #else 750 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 751 #endif 752 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 753 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 754 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 755 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 756 ierr = PetscFree(aij);CHKERRQ(ierr); 757 758 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 759 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 760 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 761 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 762 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 763 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 764 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 765 PetscFunctionReturn(0); 766 } 767 768 #undef __FUNCT__ 769 #define __FUNCT__ "MatView_MPIAIJ_Binary" 770 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 771 { 772 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 773 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 774 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 775 PetscErrorCode ierr; 776 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 777 int fd; 778 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 779 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 780 PetscScalar *column_values; 781 782 PetscFunctionBegin; 783 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 784 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 785 nz = A->nz + B->nz; 786 if (!rank) { 787 header[0] = MAT_FILE_COOKIE; 788 header[1] = mat->M; 789 header[2] = mat->N; 790 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 791 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 792 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 793 /* get largest number of rows any processor has */ 794 rlen = mat->m; 795 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 796 for (i=1; i<size; i++) { 797 rlen = PetscMax(rlen,range[i+1] - range[i]); 798 } 799 } else { 800 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 801 rlen = mat->m; 802 } 803 804 /* load up the local row counts */ 805 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 806 for (i=0; i<mat->m; i++) { 807 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 808 } 809 810 /* store the row lengths to the file */ 811 if (!rank) { 812 MPI_Status status; 813 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 814 for (i=1; i<size; i++) { 815 rlen = range[i+1] - range[i]; 816 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 817 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 818 } 819 } else { 820 ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 821 } 822 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 823 824 /* load up the local column indices */ 825 nzmax = nz; /* )th processor needs space a largest processor needs */ 826 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 827 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 828 cnt = 0; 829 for (i=0; i<mat->m; i++) { 830 for (j=B->i[i]; j<B->i[i+1]; j++) { 831 if ( (col = garray[B->j[j]]) > cstart) break; 832 column_indices[cnt++] = col; 833 } 834 for (k=A->i[i]; k<A->i[i+1]; k++) { 835 column_indices[cnt++] = A->j[k] + cstart; 836 } 837 for (; j<B->i[i+1]; j++) { 838 column_indices[cnt++] = garray[B->j[j]]; 839 } 840 } 841 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 842 843 /* store the column indices to the file */ 844 if (!rank) { 845 MPI_Status status; 846 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 847 for (i=1; i<size; i++) { 848 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 849 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 850 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 851 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 852 } 853 } else { 854 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 855 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 856 } 857 ierr = PetscFree(column_indices);CHKERRQ(ierr); 858 859 /* load up the local column values */ 860 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 861 cnt = 0; 862 for (i=0; i<mat->m; i++) { 863 for (j=B->i[i]; j<B->i[i+1]; j++) { 864 if ( garray[B->j[j]] > cstart) break; 865 column_values[cnt++] = B->a[j]; 866 } 867 for (k=A->i[i]; k<A->i[i+1]; k++) { 868 column_values[cnt++] = A->a[k]; 869 } 870 for (; j<B->i[i+1]; j++) { 871 column_values[cnt++] = B->a[j]; 872 } 873 } 874 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 875 876 /* store the column values to the file */ 877 if (!rank) { 878 MPI_Status status; 879 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 880 for (i=1; i<size; i++) { 881 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 882 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 883 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 884 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 885 } 886 } else { 887 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 888 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 889 } 890 ierr = PetscFree(column_values);CHKERRQ(ierr); 891 PetscFunctionReturn(0); 892 } 893 894 #undef __FUNCT__ 895 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 896 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 897 { 898 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 899 PetscErrorCode ierr; 900 PetscMPIInt rank = aij->rank,size = aij->size; 901 PetscTruth isdraw,iascii,flg,isbinary; 902 PetscViewer sviewer; 903 PetscViewerFormat format; 904 905 PetscFunctionBegin; 906 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 907 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 908 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 909 if (iascii) { 910 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 911 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 912 MatInfo info; 913 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 914 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 915 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 916 if (flg) { 917 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 918 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 919 } else { 920 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 921 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 922 } 923 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 924 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 925 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 926 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 927 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 928 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 929 PetscFunctionReturn(0); 930 } else if (format == PETSC_VIEWER_ASCII_INFO) { 931 PetscFunctionReturn(0); 932 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 933 PetscFunctionReturn(0); 934 } 935 } else if (isbinary) { 936 if (size == 1) { 937 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 938 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 939 } else { 940 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 941 } 942 PetscFunctionReturn(0); 943 } else if (isdraw) { 944 PetscDraw draw; 945 PetscTruth isnull; 946 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 947 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 948 } 949 950 if (size == 1) { 951 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 952 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 953 } else { 954 /* assemble the entire matrix onto first processor. */ 955 Mat A; 956 Mat_SeqAIJ *Aloc; 957 PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 958 PetscScalar *a; 959 960 if (!rank) { 961 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 962 } else { 963 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 964 } 965 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 966 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 967 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 968 PetscLogObjectParent(mat,A); 969 970 /* copy over the A part */ 971 Aloc = (Mat_SeqAIJ*)aij->A->data; 972 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 973 row = aij->rstart; 974 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 975 for (i=0; i<m; i++) { 976 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 977 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 978 } 979 aj = Aloc->j; 980 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 981 982 /* copy over the B part */ 983 Aloc = (Mat_SeqAIJ*)aij->B->data; 984 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 985 row = aij->rstart; 986 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 987 ct = cols; 988 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 989 for (i=0; i<m; i++) { 990 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 991 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 992 } 993 ierr = PetscFree(ct);CHKERRQ(ierr); 994 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 995 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 996 /* 997 Everyone has to call to draw the matrix since the graphics waits are 998 synchronized across all processors that share the PetscDraw object 999 */ 1000 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1001 if (!rank) { 1002 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1003 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1004 } 1005 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1006 ierr = MatDestroy(A);CHKERRQ(ierr); 1007 } 1008 PetscFunctionReturn(0); 1009 } 1010 1011 #undef __FUNCT__ 1012 #define __FUNCT__ "MatView_MPIAIJ" 1013 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1014 { 1015 PetscErrorCode ierr; 1016 PetscTruth iascii,isdraw,issocket,isbinary; 1017 1018 PetscFunctionBegin; 1019 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1020 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1021 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1022 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1023 if (iascii || isdraw || isbinary || issocket) { 1024 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1025 } else { 1026 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1027 } 1028 PetscFunctionReturn(0); 1029 } 1030 1031 1032 1033 #undef __FUNCT__ 1034 #define __FUNCT__ "MatRelax_MPIAIJ" 1035 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1036 { 1037 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1038 PetscErrorCode ierr; 1039 Vec bb1; 1040 PetscScalar mone=-1.0; 1041 1042 PetscFunctionBegin; 1043 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1044 1045 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1046 1047 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1048 if (flag & SOR_ZERO_INITIAL_GUESS) { 1049 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1050 its--; 1051 } 1052 1053 while (its--) { 1054 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1055 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1056 1057 /* update rhs: bb1 = bb - B*x */ 1058 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1059 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1060 1061 /* local sweep */ 1062 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1063 CHKERRQ(ierr); 1064 } 1065 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1066 if (flag & SOR_ZERO_INITIAL_GUESS) { 1067 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1068 its--; 1069 } 1070 while (its--) { 1071 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1072 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1073 1074 /* update rhs: bb1 = bb - B*x */ 1075 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1076 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1077 1078 /* local sweep */ 1079 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1080 CHKERRQ(ierr); 1081 } 1082 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1083 if (flag & SOR_ZERO_INITIAL_GUESS) { 1084 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1085 its--; 1086 } 1087 while (its--) { 1088 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1089 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1090 1091 /* update rhs: bb1 = bb - B*x */ 1092 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1093 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1094 1095 /* local sweep */ 1096 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1097 CHKERRQ(ierr); 1098 } 1099 } else { 1100 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1101 } 1102 1103 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1109 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1110 { 1111 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1112 Mat A = mat->A,B = mat->B; 1113 PetscErrorCode ierr; 1114 PetscReal isend[5],irecv[5]; 1115 1116 PetscFunctionBegin; 1117 info->block_size = 1.0; 1118 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1119 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1120 isend[3] = info->memory; isend[4] = info->mallocs; 1121 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1122 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1123 isend[3] += info->memory; isend[4] += info->mallocs; 1124 if (flag == MAT_LOCAL) { 1125 info->nz_used = isend[0]; 1126 info->nz_allocated = isend[1]; 1127 info->nz_unneeded = isend[2]; 1128 info->memory = isend[3]; 1129 info->mallocs = isend[4]; 1130 } else if (flag == MAT_GLOBAL_MAX) { 1131 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1132 info->nz_used = irecv[0]; 1133 info->nz_allocated = irecv[1]; 1134 info->nz_unneeded = irecv[2]; 1135 info->memory = irecv[3]; 1136 info->mallocs = irecv[4]; 1137 } else if (flag == MAT_GLOBAL_SUM) { 1138 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1139 info->nz_used = irecv[0]; 1140 info->nz_allocated = irecv[1]; 1141 info->nz_unneeded = irecv[2]; 1142 info->memory = irecv[3]; 1143 info->mallocs = irecv[4]; 1144 } 1145 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1146 info->fill_ratio_needed = 0; 1147 info->factor_mallocs = 0; 1148 info->rows_global = (double)matin->M; 1149 info->columns_global = (double)matin->N; 1150 info->rows_local = (double)matin->m; 1151 info->columns_local = (double)matin->N; 1152 1153 PetscFunctionReturn(0); 1154 } 1155 1156 #undef __FUNCT__ 1157 #define __FUNCT__ "MatSetOption_MPIAIJ" 1158 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1159 { 1160 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1161 PetscErrorCode ierr; 1162 1163 PetscFunctionBegin; 1164 switch (op) { 1165 case MAT_NO_NEW_NONZERO_LOCATIONS: 1166 case MAT_YES_NEW_NONZERO_LOCATIONS: 1167 case MAT_COLUMNS_UNSORTED: 1168 case MAT_COLUMNS_SORTED: 1169 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1170 case MAT_KEEP_ZEROED_ROWS: 1171 case MAT_NEW_NONZERO_LOCATION_ERR: 1172 case MAT_USE_INODES: 1173 case MAT_DO_NOT_USE_INODES: 1174 case MAT_IGNORE_ZERO_ENTRIES: 1175 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1176 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1177 break; 1178 case MAT_ROW_ORIENTED: 1179 a->roworiented = PETSC_TRUE; 1180 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1181 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1182 break; 1183 case MAT_ROWS_SORTED: 1184 case MAT_ROWS_UNSORTED: 1185 case MAT_YES_NEW_DIAGONALS: 1186 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1187 break; 1188 case MAT_COLUMN_ORIENTED: 1189 a->roworiented = PETSC_FALSE; 1190 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1191 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1192 break; 1193 case MAT_IGNORE_OFF_PROC_ENTRIES: 1194 a->donotstash = PETSC_TRUE; 1195 break; 1196 case MAT_NO_NEW_DIAGONALS: 1197 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1198 case MAT_SYMMETRIC: 1199 case MAT_STRUCTURALLY_SYMMETRIC: 1200 case MAT_HERMITIAN: 1201 case MAT_SYMMETRY_ETERNAL: 1202 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1203 break; 1204 case MAT_NOT_SYMMETRIC: 1205 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1206 case MAT_NOT_HERMITIAN: 1207 case MAT_NOT_SYMMETRY_ETERNAL: 1208 break; 1209 default: 1210 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1211 } 1212 PetscFunctionReturn(0); 1213 } 1214 1215 #undef __FUNCT__ 1216 #define __FUNCT__ "MatGetRow_MPIAIJ" 1217 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1218 { 1219 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1220 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1221 PetscErrorCode ierr; 1222 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1223 PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1224 PetscInt *cmap,*idx_p; 1225 1226 PetscFunctionBegin; 1227 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1228 mat->getrowactive = PETSC_TRUE; 1229 1230 if (!mat->rowvalues && (idx || v)) { 1231 /* 1232 allocate enough space to hold information from the longest row. 1233 */ 1234 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1235 PetscInt max = 1,tmp; 1236 for (i=0; i<matin->m; i++) { 1237 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1238 if (max < tmp) { max = tmp; } 1239 } 1240 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1241 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1242 } 1243 1244 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1245 lrow = row - rstart; 1246 1247 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1248 if (!v) {pvA = 0; pvB = 0;} 1249 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1250 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1251 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1252 nztot = nzA + nzB; 1253 1254 cmap = mat->garray; 1255 if (v || idx) { 1256 if (nztot) { 1257 /* Sort by increasing column numbers, assuming A and B already sorted */ 1258 PetscInt imark = -1; 1259 if (v) { 1260 *v = v_p = mat->rowvalues; 1261 for (i=0; i<nzB; i++) { 1262 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1263 else break; 1264 } 1265 imark = i; 1266 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1267 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1268 } 1269 if (idx) { 1270 *idx = idx_p = mat->rowindices; 1271 if (imark > -1) { 1272 for (i=0; i<imark; i++) { 1273 idx_p[i] = cmap[cworkB[i]]; 1274 } 1275 } else { 1276 for (i=0; i<nzB; i++) { 1277 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1278 else break; 1279 } 1280 imark = i; 1281 } 1282 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1283 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1284 } 1285 } else { 1286 if (idx) *idx = 0; 1287 if (v) *v = 0; 1288 } 1289 } 1290 *nz = nztot; 1291 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1292 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1293 PetscFunctionReturn(0); 1294 } 1295 1296 #undef __FUNCT__ 1297 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1298 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1299 { 1300 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1301 1302 PetscFunctionBegin; 1303 if (!aij->getrowactive) { 1304 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1305 } 1306 aij->getrowactive = PETSC_FALSE; 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatNorm_MPIAIJ" 1312 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1313 { 1314 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1315 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1316 PetscErrorCode ierr; 1317 PetscInt i,j,cstart = aij->cstart; 1318 PetscReal sum = 0.0; 1319 PetscScalar *v; 1320 1321 PetscFunctionBegin; 1322 if (aij->size == 1) { 1323 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1324 } else { 1325 if (type == NORM_FROBENIUS) { 1326 v = amat->a; 1327 for (i=0; i<amat->nz; i++) { 1328 #if defined(PETSC_USE_COMPLEX) 1329 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1330 #else 1331 sum += (*v)*(*v); v++; 1332 #endif 1333 } 1334 v = bmat->a; 1335 for (i=0; i<bmat->nz; i++) { 1336 #if defined(PETSC_USE_COMPLEX) 1337 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1338 #else 1339 sum += (*v)*(*v); v++; 1340 #endif 1341 } 1342 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1343 *norm = sqrt(*norm); 1344 } else if (type == NORM_1) { /* max column norm */ 1345 PetscReal *tmp,*tmp2; 1346 PetscInt *jj,*garray = aij->garray; 1347 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1348 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1349 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1350 *norm = 0.0; 1351 v = amat->a; jj = amat->j; 1352 for (j=0; j<amat->nz; j++) { 1353 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1354 } 1355 v = bmat->a; jj = bmat->j; 1356 for (j=0; j<bmat->nz; j++) { 1357 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1358 } 1359 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1360 for (j=0; j<mat->N; j++) { 1361 if (tmp2[j] > *norm) *norm = tmp2[j]; 1362 } 1363 ierr = PetscFree(tmp);CHKERRQ(ierr); 1364 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1365 } else if (type == NORM_INFINITY) { /* max row norm */ 1366 PetscReal ntemp = 0.0; 1367 for (j=0; j<aij->A->m; j++) { 1368 v = amat->a + amat->i[j]; 1369 sum = 0.0; 1370 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1371 sum += PetscAbsScalar(*v); v++; 1372 } 1373 v = bmat->a + bmat->i[j]; 1374 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1375 sum += PetscAbsScalar(*v); v++; 1376 } 1377 if (sum > ntemp) ntemp = sum; 1378 } 1379 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1380 } else { 1381 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1382 } 1383 } 1384 PetscFunctionReturn(0); 1385 } 1386 1387 #undef __FUNCT__ 1388 #define __FUNCT__ "MatTranspose_MPIAIJ" 1389 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1390 { 1391 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1392 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1393 PetscErrorCode ierr; 1394 PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1395 Mat B; 1396 PetscScalar *array; 1397 1398 PetscFunctionBegin; 1399 if (!matout && M != N) { 1400 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1401 } 1402 1403 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1404 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1405 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1406 1407 /* copy over the A part */ 1408 Aloc = (Mat_SeqAIJ*)a->A->data; 1409 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1410 row = a->rstart; 1411 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1412 for (i=0; i<m; i++) { 1413 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1414 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1415 } 1416 aj = Aloc->j; 1417 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1418 1419 /* copy over the B part */ 1420 Aloc = (Mat_SeqAIJ*)a->B->data; 1421 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1422 row = a->rstart; 1423 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1424 ct = cols; 1425 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1426 for (i=0; i<m; i++) { 1427 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1428 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1429 } 1430 ierr = PetscFree(ct);CHKERRQ(ierr); 1431 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1432 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1433 if (matout) { 1434 *matout = B; 1435 } else { 1436 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1437 } 1438 PetscFunctionReturn(0); 1439 } 1440 1441 #undef __FUNCT__ 1442 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1443 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1444 { 1445 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1446 Mat a = aij->A,b = aij->B; 1447 PetscErrorCode ierr; 1448 PetscInt s1,s2,s3; 1449 1450 PetscFunctionBegin; 1451 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1452 if (rr) { 1453 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1454 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1455 /* Overlap communication with computation. */ 1456 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1457 } 1458 if (ll) { 1459 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1460 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1461 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1462 } 1463 /* scale the diagonal block */ 1464 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1465 1466 if (rr) { 1467 /* Do a scatter end and then right scale the off-diagonal block */ 1468 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1469 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1470 } 1471 1472 PetscFunctionReturn(0); 1473 } 1474 1475 1476 #undef __FUNCT__ 1477 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1478 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1479 { 1480 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1481 PetscErrorCode ierr; 1482 1483 PetscFunctionBegin; 1484 if (!a->rank) { 1485 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1486 } 1487 PetscFunctionReturn(0); 1488 } 1489 1490 #undef __FUNCT__ 1491 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1492 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1493 { 1494 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1495 PetscErrorCode ierr; 1496 1497 PetscFunctionBegin; 1498 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1499 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1500 PetscFunctionReturn(0); 1501 } 1502 #undef __FUNCT__ 1503 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1504 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1505 { 1506 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1507 PetscErrorCode ierr; 1508 1509 PetscFunctionBegin; 1510 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1511 PetscFunctionReturn(0); 1512 } 1513 1514 #undef __FUNCT__ 1515 #define __FUNCT__ "MatEqual_MPIAIJ" 1516 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1517 { 1518 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1519 Mat a,b,c,d; 1520 PetscTruth flg; 1521 PetscErrorCode ierr; 1522 1523 PetscFunctionBegin; 1524 a = matA->A; b = matA->B; 1525 c = matB->A; d = matB->B; 1526 1527 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1528 if (flg) { 1529 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1530 } 1531 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1532 PetscFunctionReturn(0); 1533 } 1534 1535 #undef __FUNCT__ 1536 #define __FUNCT__ "MatCopy_MPIAIJ" 1537 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1538 { 1539 PetscErrorCode ierr; 1540 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1541 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1542 1543 PetscFunctionBegin; 1544 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1545 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1546 /* because of the column compression in the off-processor part of the matrix a->B, 1547 the number of columns in a->B and b->B may be different, hence we cannot call 1548 the MatCopy() directly on the two parts. If need be, we can provide a more 1549 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1550 then copying the submatrices */ 1551 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1552 } else { 1553 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1554 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1555 } 1556 PetscFunctionReturn(0); 1557 } 1558 1559 #undef __FUNCT__ 1560 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1561 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1562 { 1563 PetscErrorCode ierr; 1564 1565 PetscFunctionBegin; 1566 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1567 PetscFunctionReturn(0); 1568 } 1569 1570 #include "petscblaslapack.h" 1571 #undef __FUNCT__ 1572 #define __FUNCT__ "MatAXPY_MPIAIJ" 1573 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1574 { 1575 PetscErrorCode ierr; 1576 PetscInt i; 1577 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1578 PetscBLASInt bnz,one=1; 1579 Mat_SeqAIJ *x,*y; 1580 1581 PetscFunctionBegin; 1582 if (str == SAME_NONZERO_PATTERN) { 1583 x = (Mat_SeqAIJ *)xx->A->data; 1584 y = (Mat_SeqAIJ *)yy->A->data; 1585 bnz = (PetscBLASInt)x->nz; 1586 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1587 x = (Mat_SeqAIJ *)xx->B->data; 1588 y = (Mat_SeqAIJ *)yy->B->data; 1589 bnz = (PetscBLASInt)x->nz; 1590 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1591 } else if (str == SUBSET_NONZERO_PATTERN) { 1592 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1593 1594 x = (Mat_SeqAIJ *)xx->B->data; 1595 y = (Mat_SeqAIJ *)yy->B->data; 1596 if (y->xtoy && y->XtoY != xx->B) { 1597 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1598 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1599 } 1600 if (!y->xtoy) { /* get xtoy */ 1601 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1602 y->XtoY = xx->B; 1603 } 1604 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1605 } else { 1606 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1607 } 1608 PetscFunctionReturn(0); 1609 } 1610 1611 /* -------------------------------------------------------------------*/ 1612 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1613 MatGetRow_MPIAIJ, 1614 MatRestoreRow_MPIAIJ, 1615 MatMult_MPIAIJ, 1616 /* 4*/ MatMultAdd_MPIAIJ, 1617 MatMultTranspose_MPIAIJ, 1618 MatMultTransposeAdd_MPIAIJ, 1619 0, 1620 0, 1621 0, 1622 /*10*/ 0, 1623 0, 1624 0, 1625 MatRelax_MPIAIJ, 1626 MatTranspose_MPIAIJ, 1627 /*15*/ MatGetInfo_MPIAIJ, 1628 MatEqual_MPIAIJ, 1629 MatGetDiagonal_MPIAIJ, 1630 MatDiagonalScale_MPIAIJ, 1631 MatNorm_MPIAIJ, 1632 /*20*/ MatAssemblyBegin_MPIAIJ, 1633 MatAssemblyEnd_MPIAIJ, 1634 0, 1635 MatSetOption_MPIAIJ, 1636 MatZeroEntries_MPIAIJ, 1637 /*25*/ MatZeroRows_MPIAIJ, 1638 0, 1639 0, 1640 0, 1641 0, 1642 /*30*/ MatSetUpPreallocation_MPIAIJ, 1643 0, 1644 0, 1645 0, 1646 0, 1647 /*35*/ MatDuplicate_MPIAIJ, 1648 0, 1649 0, 1650 0, 1651 0, 1652 /*40*/ MatAXPY_MPIAIJ, 1653 MatGetSubMatrices_MPIAIJ, 1654 MatIncreaseOverlap_MPIAIJ, 1655 MatGetValues_MPIAIJ, 1656 MatCopy_MPIAIJ, 1657 /*45*/ MatPrintHelp_MPIAIJ, 1658 MatScale_MPIAIJ, 1659 0, 1660 0, 1661 0, 1662 /*50*/ MatSetBlockSize_MPIAIJ, 1663 0, 1664 0, 1665 0, 1666 0, 1667 /*55*/ MatFDColoringCreate_MPIAIJ, 1668 0, 1669 MatSetUnfactored_MPIAIJ, 1670 0, 1671 0, 1672 /*60*/ MatGetSubMatrix_MPIAIJ, 1673 MatDestroy_MPIAIJ, 1674 MatView_MPIAIJ, 1675 MatGetPetscMaps_Petsc, 1676 0, 1677 /*65*/ 0, 1678 0, 1679 0, 1680 0, 1681 0, 1682 /*70*/ 0, 1683 0, 1684 MatSetColoring_MPIAIJ, 1685 #if defined(PETSC_HAVE_ADIC) 1686 MatSetValuesAdic_MPIAIJ, 1687 #else 1688 0, 1689 #endif 1690 MatSetValuesAdifor_MPIAIJ, 1691 /*75*/ 0, 1692 0, 1693 0, 1694 0, 1695 0, 1696 /*80*/ 0, 1697 0, 1698 0, 1699 0, 1700 /*84*/ MatLoad_MPIAIJ, 1701 0, 1702 0, 1703 0, 1704 0, 1705 0, 1706 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1707 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1708 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1709 MatPtAP_MPIAIJ_MPIAIJ, 1710 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1711 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 1712 0, 1713 0, 1714 0}; 1715 1716 /* ----------------------------------------------------------------------------------------*/ 1717 1718 EXTERN_C_BEGIN 1719 #undef __FUNCT__ 1720 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1721 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 1722 { 1723 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1724 PetscErrorCode ierr; 1725 1726 PetscFunctionBegin; 1727 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1728 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1729 PetscFunctionReturn(0); 1730 } 1731 EXTERN_C_END 1732 1733 EXTERN_C_BEGIN 1734 #undef __FUNCT__ 1735 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1736 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 1737 { 1738 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1739 PetscErrorCode ierr; 1740 1741 PetscFunctionBegin; 1742 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1743 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1744 PetscFunctionReturn(0); 1745 } 1746 EXTERN_C_END 1747 1748 #include "petscpc.h" 1749 EXTERN_C_BEGIN 1750 #undef __FUNCT__ 1751 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1752 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1753 { 1754 Mat_MPIAIJ *b; 1755 PetscErrorCode ierr; 1756 PetscInt i; 1757 1758 PetscFunctionBegin; 1759 B->preallocated = PETSC_TRUE; 1760 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1761 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1762 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1763 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1764 if (d_nnz) { 1765 for (i=0; i<B->m; i++) { 1766 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1767 } 1768 } 1769 if (o_nnz) { 1770 for (i=0; i<B->m; i++) { 1771 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1772 } 1773 } 1774 b = (Mat_MPIAIJ*)B->data; 1775 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1776 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1777 1778 PetscFunctionReturn(0); 1779 } 1780 EXTERN_C_END 1781 1782 #undef __FUNCT__ 1783 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1784 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1785 { 1786 Mat mat; 1787 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1788 PetscErrorCode ierr; 1789 1790 PetscFunctionBegin; 1791 *newmat = 0; 1792 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1793 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1794 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1795 a = (Mat_MPIAIJ*)mat->data; 1796 1797 mat->factor = matin->factor; 1798 mat->bs = matin->bs; 1799 mat->assembled = PETSC_TRUE; 1800 mat->insertmode = NOT_SET_VALUES; 1801 mat->preallocated = PETSC_TRUE; 1802 1803 a->rstart = oldmat->rstart; 1804 a->rend = oldmat->rend; 1805 a->cstart = oldmat->cstart; 1806 a->cend = oldmat->cend; 1807 a->size = oldmat->size; 1808 a->rank = oldmat->rank; 1809 a->donotstash = oldmat->donotstash; 1810 a->roworiented = oldmat->roworiented; 1811 a->rowindices = 0; 1812 a->rowvalues = 0; 1813 a->getrowactive = PETSC_FALSE; 1814 1815 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1816 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1817 if (oldmat->colmap) { 1818 #if defined (PETSC_USE_CTABLE) 1819 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1820 #else 1821 ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1822 PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt)); 1823 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1824 #endif 1825 } else a->colmap = 0; 1826 if (oldmat->garray) { 1827 PetscInt len; 1828 len = oldmat->B->n; 1829 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1830 PetscLogObjectMemory(mat,len*sizeof(PetscInt)); 1831 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1832 } else a->garray = 0; 1833 1834 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1835 PetscLogObjectParent(mat,a->lvec); 1836 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1837 PetscLogObjectParent(mat,a->Mvctx); 1838 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1839 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1840 PetscLogObjectParent(mat,a->A); 1841 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1842 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1843 PetscLogObjectParent(mat,a->B); 1844 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1845 *newmat = mat; 1846 PetscFunctionReturn(0); 1847 } 1848 1849 #include "petscsys.h" 1850 1851 #undef __FUNCT__ 1852 #define __FUNCT__ "MatLoad_MPIAIJ" 1853 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1854 { 1855 Mat A; 1856 PetscScalar *vals,*svals; 1857 MPI_Comm comm = ((PetscObject)viewer)->comm; 1858 MPI_Status status; 1859 PetscErrorCode ierr; 1860 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1861 PetscInt i,nz,j,rstart,rend; 1862 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1863 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1864 PetscInt cend,cstart,n,*rowners; 1865 int fd; 1866 1867 PetscFunctionBegin; 1868 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1869 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1870 if (!rank) { 1871 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1872 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1873 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1874 } 1875 1876 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1877 M = header[1]; N = header[2]; 1878 /* determine ownership of all rows */ 1879 m = M/size + ((M % size) > rank); 1880 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1881 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1882 rowners[0] = 0; 1883 for (i=2; i<=size; i++) { 1884 rowners[i] += rowners[i-1]; 1885 } 1886 rstart = rowners[rank]; 1887 rend = rowners[rank+1]; 1888 1889 /* distribute row lengths to all processors */ 1890 ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr); 1891 if (!rank) { 1892 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 1893 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1894 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1895 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1896 for (j=0; j<m; j++) { 1897 procsnz[0] += ourlens[j]; 1898 } 1899 for (i=1; i<size; i++) { 1900 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 1901 /* calculate the number of nonzeros on each processor */ 1902 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 1903 procsnz[i] += rowlengths[j]; 1904 } 1905 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1906 } 1907 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1908 } else { 1909 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1910 } 1911 1912 if (!rank) { 1913 /* determine max buffer needed and allocate it */ 1914 maxnz = 0; 1915 for (i=0; i<size; i++) { 1916 maxnz = PetscMax(maxnz,procsnz[i]); 1917 } 1918 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1919 1920 /* read in my part of the matrix column indices */ 1921 nz = procsnz[0]; 1922 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1923 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1924 1925 /* read in every one elses and ship off */ 1926 for (i=1; i<size; i++) { 1927 nz = procsnz[i]; 1928 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1929 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1930 } 1931 ierr = PetscFree(cols);CHKERRQ(ierr); 1932 } else { 1933 /* determine buffer space needed for message */ 1934 nz = 0; 1935 for (i=0; i<m; i++) { 1936 nz += ourlens[i]; 1937 } 1938 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1939 1940 /* receive message of column indices*/ 1941 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1942 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1943 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1944 } 1945 1946 /* determine column ownership if matrix is not square */ 1947 if (N != M) { 1948 n = N/size + ((N % size) > rank); 1949 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1950 cstart = cend - n; 1951 } else { 1952 cstart = rstart; 1953 cend = rend; 1954 n = cend - cstart; 1955 } 1956 1957 /* loop over local rows, determining number of off diagonal entries */ 1958 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1959 jj = 0; 1960 for (i=0; i<m; i++) { 1961 for (j=0; j<ourlens[i]; j++) { 1962 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1963 jj++; 1964 } 1965 } 1966 1967 /* create our matrix */ 1968 for (i=0; i<m; i++) { 1969 ourlens[i] -= offlens[i]; 1970 } 1971 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 1972 ierr = MatSetType(A,type);CHKERRQ(ierr); 1973 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 1974 1975 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1976 for (i=0; i<m; i++) { 1977 ourlens[i] += offlens[i]; 1978 } 1979 1980 if (!rank) { 1981 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1982 1983 /* read in my part of the matrix numerical values */ 1984 nz = procsnz[0]; 1985 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1986 1987 /* insert into matrix */ 1988 jj = rstart; 1989 smycols = mycols; 1990 svals = vals; 1991 for (i=0; i<m; i++) { 1992 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1993 smycols += ourlens[i]; 1994 svals += ourlens[i]; 1995 jj++; 1996 } 1997 1998 /* read in other processors and ship out */ 1999 for (i=1; i<size; i++) { 2000 nz = procsnz[i]; 2001 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2002 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2003 } 2004 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2005 } else { 2006 /* receive numeric values */ 2007 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2008 2009 /* receive message of values*/ 2010 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2011 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2012 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2013 2014 /* insert into matrix */ 2015 jj = rstart; 2016 smycols = mycols; 2017 svals = vals; 2018 for (i=0; i<m; i++) { 2019 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2020 smycols += ourlens[i]; 2021 svals += ourlens[i]; 2022 jj++; 2023 } 2024 } 2025 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2026 ierr = PetscFree(vals);CHKERRQ(ierr); 2027 ierr = PetscFree(mycols);CHKERRQ(ierr); 2028 ierr = PetscFree(rowners);CHKERRQ(ierr); 2029 2030 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2031 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2032 *newmat = A; 2033 PetscFunctionReturn(0); 2034 } 2035 2036 #undef __FUNCT__ 2037 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2038 /* 2039 Not great since it makes two copies of the submatrix, first an SeqAIJ 2040 in local and then by concatenating the local matrices the end result. 2041 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2042 */ 2043 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2044 { 2045 PetscErrorCode ierr; 2046 PetscMPIInt rank,size; 2047 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2048 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2049 Mat *local,M,Mreuse; 2050 PetscScalar *vwork,*aa; 2051 MPI_Comm comm = mat->comm; 2052 Mat_SeqAIJ *aij; 2053 2054 2055 PetscFunctionBegin; 2056 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2057 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2058 2059 if (call == MAT_REUSE_MATRIX) { 2060 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2061 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2062 local = &Mreuse; 2063 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2064 } else { 2065 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2066 Mreuse = *local; 2067 ierr = PetscFree(local);CHKERRQ(ierr); 2068 } 2069 2070 /* 2071 m - number of local rows 2072 n - number of columns (same on all processors) 2073 rstart - first row in new global matrix generated 2074 */ 2075 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2076 if (call == MAT_INITIAL_MATRIX) { 2077 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2078 ii = aij->i; 2079 jj = aij->j; 2080 2081 /* 2082 Determine the number of non-zeros in the diagonal and off-diagonal 2083 portions of the matrix in order to do correct preallocation 2084 */ 2085 2086 /* first get start and end of "diagonal" columns */ 2087 if (csize == PETSC_DECIDE) { 2088 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2089 if (mglobal == n) { /* square matrix */ 2090 nlocal = m; 2091 } else { 2092 nlocal = n/size + ((n % size) > rank); 2093 } 2094 } else { 2095 nlocal = csize; 2096 } 2097 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2098 rstart = rend - nlocal; 2099 if (rank == size - 1 && rend != n) { 2100 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2101 } 2102 2103 /* next, compute all the lengths */ 2104 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2105 olens = dlens + m; 2106 for (i=0; i<m; i++) { 2107 jend = ii[i+1] - ii[i]; 2108 olen = 0; 2109 dlen = 0; 2110 for (j=0; j<jend; j++) { 2111 if (*jj < rstart || *jj >= rend) olen++; 2112 else dlen++; 2113 jj++; 2114 } 2115 olens[i] = olen; 2116 dlens[i] = dlen; 2117 } 2118 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2119 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2120 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2121 ierr = PetscFree(dlens);CHKERRQ(ierr); 2122 } else { 2123 PetscInt ml,nl; 2124 2125 M = *newmat; 2126 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2127 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2128 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2129 /* 2130 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2131 rather than the slower MatSetValues(). 2132 */ 2133 M->was_assembled = PETSC_TRUE; 2134 M->assembled = PETSC_FALSE; 2135 } 2136 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2137 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2138 ii = aij->i; 2139 jj = aij->j; 2140 aa = aij->a; 2141 for (i=0; i<m; i++) { 2142 row = rstart + i; 2143 nz = ii[i+1] - ii[i]; 2144 cwork = jj; jj += nz; 2145 vwork = aa; aa += nz; 2146 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2147 } 2148 2149 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2150 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2151 *newmat = M; 2152 2153 /* save submatrix used in processor for next request */ 2154 if (call == MAT_INITIAL_MATRIX) { 2155 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2156 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2157 } 2158 2159 PetscFunctionReturn(0); 2160 } 2161 2162 EXTERN_C_BEGIN 2163 #undef __FUNCT__ 2164 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2165 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2166 { 2167 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2168 PetscInt m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2169 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2170 const PetscInt *JJ; 2171 PetscScalar *values; 2172 PetscErrorCode ierr; 2173 2174 PetscFunctionBegin; 2175 #if defined(PETSC_OPT_g) 2176 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2177 #endif 2178 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2179 o_nnz = d_nnz + m; 2180 2181 for (i=0; i<m; i++) { 2182 nnz = I[i+1]- I[i]; 2183 JJ = J + I[i]; 2184 nnz_max = PetscMax(nnz_max,nnz); 2185 #if defined(PETSC_OPT_g) 2186 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2187 #endif 2188 for (j=0; j<nnz; j++) { 2189 if (*JJ >= cstart) break; 2190 JJ++; 2191 } 2192 d = 0; 2193 for (; j<nnz; j++) { 2194 if (*JJ++ >= cend) break; 2195 d++; 2196 } 2197 d_nnz[i] = d; 2198 o_nnz[i] = nnz - d; 2199 } 2200 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2201 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2202 2203 if (v) values = (PetscScalar*)v; 2204 else { 2205 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2206 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2207 } 2208 2209 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2210 for (i=0; i<m; i++) { 2211 ii = i + rstart; 2212 nnz = I[i+1]- I[i]; 2213 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr); 2214 } 2215 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2216 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2217 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2218 2219 if (!v) { 2220 ierr = PetscFree(values);CHKERRQ(ierr); 2221 } 2222 PetscFunctionReturn(0); 2223 } 2224 EXTERN_C_END 2225 2226 #undef __FUNCT__ 2227 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2228 /*@C 2229 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2230 (the default parallel PETSc format). 2231 2232 Collective on MPI_Comm 2233 2234 Input Parameters: 2235 + A - the matrix 2236 . i - the indices into j for the start of each local row (starts with zero) 2237 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2238 - v - optional values in the matrix 2239 2240 Level: developer 2241 2242 .keywords: matrix, aij, compressed row, sparse, parallel 2243 2244 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2245 @*/ 2246 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2247 { 2248 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2249 2250 PetscFunctionBegin; 2251 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2252 if (f) { 2253 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2254 } 2255 PetscFunctionReturn(0); 2256 } 2257 2258 #undef __FUNCT__ 2259 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2260 /*@C 2261 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2262 (the default parallel PETSc format). For good matrix assembly performance 2263 the user should preallocate the matrix storage by setting the parameters 2264 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2265 performance can be increased by more than a factor of 50. 2266 2267 Collective on MPI_Comm 2268 2269 Input Parameters: 2270 + A - the matrix 2271 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2272 (same value is used for all local rows) 2273 . d_nnz - array containing the number of nonzeros in the various rows of the 2274 DIAGONAL portion of the local submatrix (possibly different for each row) 2275 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2276 The size of this array is equal to the number of local rows, i.e 'm'. 2277 You must leave room for the diagonal entry even if it is zero. 2278 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2279 submatrix (same value is used for all local rows). 2280 - o_nnz - array containing the number of nonzeros in the various rows of the 2281 OFF-DIAGONAL portion of the local submatrix (possibly different for 2282 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2283 structure. The size of this array is equal to the number 2284 of local rows, i.e 'm'. 2285 2286 If the *_nnz parameter is given then the *_nz parameter is ignored 2287 2288 The AIJ format (also called the Yale sparse matrix format or 2289 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2290 storage. The stored row and column indices begin with zero. See the users manual for details. 2291 2292 The parallel matrix is partitioned such that the first m0 rows belong to 2293 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2294 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2295 2296 The DIAGONAL portion of the local submatrix of a processor can be defined 2297 as the submatrix which is obtained by extraction the part corresponding 2298 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2299 first row that belongs to the processor, and r2 is the last row belonging 2300 to the this processor. This is a square mxm matrix. The remaining portion 2301 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2302 2303 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2304 2305 Example usage: 2306 2307 Consider the following 8x8 matrix with 34 non-zero values, that is 2308 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2309 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2310 as follows: 2311 2312 .vb 2313 1 2 0 | 0 3 0 | 0 4 2314 Proc0 0 5 6 | 7 0 0 | 8 0 2315 9 0 10 | 11 0 0 | 12 0 2316 ------------------------------------- 2317 13 0 14 | 15 16 17 | 0 0 2318 Proc1 0 18 0 | 19 20 21 | 0 0 2319 0 0 0 | 22 23 0 | 24 0 2320 ------------------------------------- 2321 Proc2 25 26 27 | 0 0 28 | 29 0 2322 30 0 0 | 31 32 33 | 0 34 2323 .ve 2324 2325 This can be represented as a collection of submatrices as: 2326 2327 .vb 2328 A B C 2329 D E F 2330 G H I 2331 .ve 2332 2333 Where the submatrices A,B,C are owned by proc0, D,E,F are 2334 owned by proc1, G,H,I are owned by proc2. 2335 2336 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2337 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2338 The 'M','N' parameters are 8,8, and have the same values on all procs. 2339 2340 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2341 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2342 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2343 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2344 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2345 matrix, ans [DF] as another SeqAIJ matrix. 2346 2347 When d_nz, o_nz parameters are specified, d_nz storage elements are 2348 allocated for every row of the local diagonal submatrix, and o_nz 2349 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2350 One way to choose d_nz and o_nz is to use the max nonzerors per local 2351 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2352 In this case, the values of d_nz,o_nz are: 2353 .vb 2354 proc0 : dnz = 2, o_nz = 2 2355 proc1 : dnz = 3, o_nz = 2 2356 proc2 : dnz = 1, o_nz = 4 2357 .ve 2358 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2359 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2360 for proc3. i.e we are using 12+15+10=37 storage locations to store 2361 34 values. 2362 2363 When d_nnz, o_nnz parameters are specified, the storage is specified 2364 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2365 In the above case the values for d_nnz,o_nnz are: 2366 .vb 2367 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2368 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2369 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2370 .ve 2371 Here the space allocated is sum of all the above values i.e 34, and 2372 hence pre-allocation is perfect. 2373 2374 Level: intermediate 2375 2376 .keywords: matrix, aij, compressed row, sparse, parallel 2377 2378 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2379 MPIAIJ 2380 @*/ 2381 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2382 { 2383 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2384 2385 PetscFunctionBegin; 2386 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2387 if (f) { 2388 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2389 } 2390 PetscFunctionReturn(0); 2391 } 2392 2393 #undef __FUNCT__ 2394 #define __FUNCT__ "MatCreateMPIAIJ" 2395 /*@C 2396 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2397 (the default parallel PETSc format). For good matrix assembly performance 2398 the user should preallocate the matrix storage by setting the parameters 2399 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2400 performance can be increased by more than a factor of 50. 2401 2402 Collective on MPI_Comm 2403 2404 Input Parameters: 2405 + comm - MPI communicator 2406 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2407 This value should be the same as the local size used in creating the 2408 y vector for the matrix-vector product y = Ax. 2409 . n - This value should be the same as the local size used in creating the 2410 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2411 calculated if N is given) For square matrices n is almost always m. 2412 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2413 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2414 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2415 (same value is used for all local rows) 2416 . d_nnz - array containing the number of nonzeros in the various rows of the 2417 DIAGONAL portion of the local submatrix (possibly different for each row) 2418 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2419 The size of this array is equal to the number of local rows, i.e 'm'. 2420 You must leave room for the diagonal entry even if it is zero. 2421 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2422 submatrix (same value is used for all local rows). 2423 - o_nnz - array containing the number of nonzeros in the various rows of the 2424 OFF-DIAGONAL portion of the local submatrix (possibly different for 2425 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2426 structure. The size of this array is equal to the number 2427 of local rows, i.e 'm'. 2428 2429 Output Parameter: 2430 . A - the matrix 2431 2432 Notes: 2433 If the *_nnz parameter is given then the *_nz parameter is ignored 2434 2435 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2436 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2437 storage requirements for this matrix. 2438 2439 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2440 processor than it must be used on all processors that share the object for 2441 that argument. 2442 2443 The user MUST specify either the local or global matrix dimensions 2444 (possibly both). 2445 2446 The parallel matrix is partitioned such that the first m0 rows belong to 2447 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2448 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2449 2450 The DIAGONAL portion of the local submatrix of a processor can be defined 2451 as the submatrix which is obtained by extraction the part corresponding 2452 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2453 first row that belongs to the processor, and r2 is the last row belonging 2454 to the this processor. This is a square mxm matrix. The remaining portion 2455 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2456 2457 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2458 2459 When calling this routine with a single process communicator, a matrix of 2460 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2461 type of communicator, use the construction mechanism: 2462 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2463 2464 By default, this format uses inodes (identical nodes) when possible. 2465 We search for consecutive rows with the same nonzero structure, thereby 2466 reusing matrix information to achieve increased efficiency. 2467 2468 Options Database Keys: 2469 + -mat_aij_no_inode - Do not use inodes 2470 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2471 - -mat_aij_oneindex - Internally use indexing starting at 1 2472 rather than 0. Note that when calling MatSetValues(), 2473 the user still MUST index entries starting at 0! 2474 2475 2476 Example usage: 2477 2478 Consider the following 8x8 matrix with 34 non-zero values, that is 2479 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2480 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2481 as follows: 2482 2483 .vb 2484 1 2 0 | 0 3 0 | 0 4 2485 Proc0 0 5 6 | 7 0 0 | 8 0 2486 9 0 10 | 11 0 0 | 12 0 2487 ------------------------------------- 2488 13 0 14 | 15 16 17 | 0 0 2489 Proc1 0 18 0 | 19 20 21 | 0 0 2490 0 0 0 | 22 23 0 | 24 0 2491 ------------------------------------- 2492 Proc2 25 26 27 | 0 0 28 | 29 0 2493 30 0 0 | 31 32 33 | 0 34 2494 .ve 2495 2496 This can be represented as a collection of submatrices as: 2497 2498 .vb 2499 A B C 2500 D E F 2501 G H I 2502 .ve 2503 2504 Where the submatrices A,B,C are owned by proc0, D,E,F are 2505 owned by proc1, G,H,I are owned by proc2. 2506 2507 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2508 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2509 The 'M','N' parameters are 8,8, and have the same values on all procs. 2510 2511 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2512 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2513 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2514 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2515 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2516 matrix, ans [DF] as another SeqAIJ matrix. 2517 2518 When d_nz, o_nz parameters are specified, d_nz storage elements are 2519 allocated for every row of the local diagonal submatrix, and o_nz 2520 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2521 One way to choose d_nz and o_nz is to use the max nonzerors per local 2522 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2523 In this case, the values of d_nz,o_nz are: 2524 .vb 2525 proc0 : dnz = 2, o_nz = 2 2526 proc1 : dnz = 3, o_nz = 2 2527 proc2 : dnz = 1, o_nz = 4 2528 .ve 2529 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2530 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2531 for proc3. i.e we are using 12+15+10=37 storage locations to store 2532 34 values. 2533 2534 When d_nnz, o_nnz parameters are specified, the storage is specified 2535 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2536 In the above case the values for d_nnz,o_nnz are: 2537 .vb 2538 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2539 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2540 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2541 .ve 2542 Here the space allocated is sum of all the above values i.e 34, and 2543 hence pre-allocation is perfect. 2544 2545 Level: intermediate 2546 2547 .keywords: matrix, aij, compressed row, sparse, parallel 2548 2549 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2550 MPIAIJ 2551 @*/ 2552 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2553 { 2554 PetscErrorCode ierr; 2555 PetscMPIInt size; 2556 2557 PetscFunctionBegin; 2558 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2559 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2560 if (size > 1) { 2561 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2562 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2563 } else { 2564 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2565 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2566 } 2567 PetscFunctionReturn(0); 2568 } 2569 2570 #undef __FUNCT__ 2571 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2572 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2573 { 2574 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2575 2576 PetscFunctionBegin; 2577 *Ad = a->A; 2578 *Ao = a->B; 2579 *colmap = a->garray; 2580 PetscFunctionReturn(0); 2581 } 2582 2583 #undef __FUNCT__ 2584 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2585 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2586 { 2587 PetscErrorCode ierr; 2588 PetscInt i; 2589 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2590 2591 PetscFunctionBegin; 2592 if (coloring->ctype == IS_COLORING_LOCAL) { 2593 ISColoringValue *allcolors,*colors; 2594 ISColoring ocoloring; 2595 2596 /* set coloring for diagonal portion */ 2597 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2598 2599 /* set coloring for off-diagonal portion */ 2600 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2601 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2602 for (i=0; i<a->B->n; i++) { 2603 colors[i] = allcolors[a->garray[i]]; 2604 } 2605 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2606 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2607 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2608 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2609 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2610 ISColoringValue *colors; 2611 PetscInt *larray; 2612 ISColoring ocoloring; 2613 2614 /* set coloring for diagonal portion */ 2615 ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2616 for (i=0; i<a->A->n; i++) { 2617 larray[i] = i + a->cstart; 2618 } 2619 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2620 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2621 for (i=0; i<a->A->n; i++) { 2622 colors[i] = coloring->colors[larray[i]]; 2623 } 2624 ierr = PetscFree(larray);CHKERRQ(ierr); 2625 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2626 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2627 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2628 2629 /* set coloring for off-diagonal portion */ 2630 ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2631 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2632 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2633 for (i=0; i<a->B->n; i++) { 2634 colors[i] = coloring->colors[larray[i]]; 2635 } 2636 ierr = PetscFree(larray);CHKERRQ(ierr); 2637 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2638 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2639 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2640 } else { 2641 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2642 } 2643 2644 PetscFunctionReturn(0); 2645 } 2646 2647 #if defined(PETSC_HAVE_ADIC) 2648 #undef __FUNCT__ 2649 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2650 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2651 { 2652 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2653 PetscErrorCode ierr; 2654 2655 PetscFunctionBegin; 2656 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2657 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2658 PetscFunctionReturn(0); 2659 } 2660 #endif 2661 2662 #undef __FUNCT__ 2663 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2664 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2665 { 2666 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2667 PetscErrorCode ierr; 2668 2669 PetscFunctionBegin; 2670 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2671 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2672 PetscFunctionReturn(0); 2673 } 2674 2675 #undef __FUNCT__ 2676 #define __FUNCT__ "MatMerge" 2677 /*@C 2678 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2679 matrices from each processor 2680 2681 Collective on MPI_Comm 2682 2683 Input Parameters: 2684 + comm - the communicators the parallel matrix will live on 2685 . inmat - the input sequential matrices 2686 . n - number of local columns (or PETSC_DECIDE) 2687 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2688 2689 Output Parameter: 2690 . outmat - the parallel matrix generated 2691 2692 Level: advanced 2693 2694 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2695 2696 @*/ 2697 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2698 { 2699 PetscErrorCode ierr; 2700 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2701 PetscInt *indx; 2702 PetscScalar *values; 2703 PetscMap columnmap,rowmap; 2704 2705 PetscFunctionBegin; 2706 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2707 /* 2708 PetscMPIInt rank; 2709 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2710 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N); 2711 */ 2712 if (scall == MAT_INITIAL_MATRIX){ 2713 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2714 if (n == PETSC_DECIDE){ 2715 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2716 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2717 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2718 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2719 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2720 } 2721 2722 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2723 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2724 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2725 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2726 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2727 2728 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2729 for (i=0;i<m;i++) { 2730 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2731 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2732 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2733 } 2734 /* This routine will ONLY return MPIAIJ type matrix */ 2735 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2736 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2737 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2738 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2739 2740 } else if (scall == MAT_REUSE_MATRIX){ 2741 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2742 } else { 2743 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2744 } 2745 2746 for (i=0;i<m;i++) { 2747 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2748 I = i + rstart; 2749 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2750 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2751 } 2752 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2753 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2754 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2755 2756 PetscFunctionReturn(0); 2757 } 2758 2759 #undef __FUNCT__ 2760 #define __FUNCT__ "MatFileSplit" 2761 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2762 { 2763 PetscErrorCode ierr; 2764 PetscMPIInt rank; 2765 PetscInt m,N,i,rstart,nnz; 2766 size_t len; 2767 const PetscInt *indx; 2768 PetscViewer out; 2769 char *name; 2770 Mat B; 2771 const PetscScalar *values; 2772 2773 PetscFunctionBegin; 2774 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2775 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2776 /* Should this be the type of the diagonal block of A? */ 2777 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2778 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2779 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2780 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2781 for (i=0;i<m;i++) { 2782 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2783 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2784 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2785 } 2786 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2787 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2788 2789 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2790 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2791 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2792 sprintf(name,"%s.%d",outfile,rank); 2793 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2794 ierr = PetscFree(name); 2795 ierr = MatView(B,out);CHKERRQ(ierr); 2796 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2797 ierr = MatDestroy(B);CHKERRQ(ierr); 2798 PetscFunctionReturn(0); 2799 } 2800 2801 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2802 #undef __FUNCT__ 2803 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2804 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2805 { 2806 PetscErrorCode ierr; 2807 Mat_Merge_SeqsToMPI *merge; 2808 PetscObjectContainer container; 2809 2810 PetscFunctionBegin; 2811 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2812 if (container) { 2813 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2814 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2815 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2816 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2817 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2818 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2819 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2820 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2821 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2822 if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);} 2823 if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);} 2824 if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);} 2825 2826 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2827 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2828 } 2829 ierr = PetscFree(merge);CHKERRQ(ierr); 2830 2831 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2832 PetscFunctionReturn(0); 2833 } 2834 2835 #include "src/mat/utils/freespace.h" 2836 #include "petscbt.h" 2837 #undef __FUNCT__ 2838 #define __FUNCT__ "MatMerge_SeqsToMPI" 2839 /*@C 2840 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2841 matrices from each processor 2842 2843 Collective on MPI_Comm 2844 2845 Input Parameters: 2846 + comm - the communicators the parallel matrix will live on 2847 . seqmat - the input sequential matrices 2848 . m - number of local rows (or PETSC_DECIDE) 2849 . n - number of local columns (or PETSC_DECIDE) 2850 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2851 2852 Output Parameter: 2853 . mpimat - the parallel matrix generated 2854 2855 Level: advanced 2856 2857 Notes: 2858 The dimensions of the sequential matrix in each processor MUST be the same. 2859 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2860 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2861 @*/ 2862 static PetscEvent logkey_seqstompinum = 0; 2863 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2864 { 2865 PetscErrorCode ierr; 2866 MPI_Comm comm=mpimat->comm; 2867 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2868 PetscMPIInt size,rank,taga,*len_s; 2869 PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2870 PetscInt proc,m; 2871 PetscInt **buf_ri,**buf_rj; 2872 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2873 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2874 MPI_Request *s_waits,*r_waits; 2875 MPI_Status *status; 2876 MatScalar *aa=a->a,**abuf_r,*ba_i; 2877 Mat_Merge_SeqsToMPI *merge; 2878 PetscObjectContainer container; 2879 2880 PetscFunctionBegin; 2881 if (!logkey_seqstompinum) { 2882 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2883 } 2884 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2885 2886 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2887 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2888 2889 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2890 if (container) { 2891 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2892 } 2893 bi = merge->bi; 2894 bj = merge->bj; 2895 buf_ri = merge->buf_ri; 2896 buf_rj = merge->buf_rj; 2897 2898 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2899 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2900 len_s = merge->len_s; 2901 2902 /* send and recv matrix values */ 2903 /*-----------------------------*/ 2904 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2905 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 2906 2907 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2908 for (proc=0,k=0; proc<size; proc++){ 2909 if (!len_s[proc]) continue; 2910 i = owners[proc]; 2911 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 2912 k++; 2913 } 2914 2915 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 2916 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 2917 ierr = PetscFree(status);CHKERRQ(ierr); 2918 2919 ierr = PetscFree(s_waits);CHKERRQ(ierr); 2920 ierr = PetscFree(r_waits);CHKERRQ(ierr); 2921 2922 /* insert mat values of mpimat */ 2923 /*----------------------------*/ 2924 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 2925 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 2926 nextrow = buf_ri_k + merge->nrecv; 2927 nextai = nextrow + merge->nrecv; 2928 2929 for (k=0; k<merge->nrecv; k++){ 2930 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 2931 nrows = *(buf_ri_k[k]); 2932 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 2933 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 2934 } 2935 2936 /* set values of ba */ 2937 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2938 for (i=0; i<m; i++) { 2939 arow = owners[rank] + i; 2940 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 2941 bnzi = bi[i+1] - bi[i]; 2942 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 2943 2944 /* add local non-zero vals of this proc's seqmat into ba */ 2945 anzi = ai[arow+1] - ai[arow]; 2946 aj = a->j + ai[arow]; 2947 aa = a->a + ai[arow]; 2948 nextaj = 0; 2949 for (j=0; nextaj<anzi; j++){ 2950 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2951 ba_i[j] += aa[nextaj++]; 2952 } 2953 } 2954 2955 /* add received vals into ba */ 2956 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 2957 /* i-th row */ 2958 if (i == *nextrow[k]) { 2959 anzi = *(nextai[k]+1) - *nextai[k]; 2960 aj = buf_rj[k] + *(nextai[k]); 2961 aa = abuf_r[k] + *(nextai[k]); 2962 nextaj = 0; 2963 for (j=0; nextaj<anzi; j++){ 2964 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2965 ba_i[j] += aa[nextaj++]; 2966 } 2967 } 2968 nextrow[k]++; nextai[k]++; 2969 } 2970 } 2971 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 2972 } 2973 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2974 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2975 2976 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 2977 ierr = PetscFree(ba_i);CHKERRQ(ierr); 2978 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 2979 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2980 PetscFunctionReturn(0); 2981 } 2982 static PetscEvent logkey_seqstompisym = 0; 2983 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 2984 { 2985 PetscErrorCode ierr; 2986 Mat B_mpi; 2987 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2988 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 2989 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 2990 PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 2991 PetscInt len,proc,*dnz,*onz; 2992 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 2993 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 2994 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 2995 MPI_Status *status; 2996 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2997 PetscBT lnkbt; 2998 Mat_Merge_SeqsToMPI *merge; 2999 PetscObjectContainer container; 3000 3001 PetscFunctionBegin; 3002 if (!logkey_seqstompisym) { 3003 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3004 } 3005 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3006 3007 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3008 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3009 3010 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3011 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3012 3013 /* determine row ownership */ 3014 /*---------------------------------------------------------*/ 3015 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3016 if (m == PETSC_DECIDE) { 3017 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 3018 } else { 3019 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3020 } 3021 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 3022 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3023 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3024 3025 if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); } 3026 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3027 3028 /* determine the number of messages to send, their lengths */ 3029 /*---------------------------------------------------------*/ 3030 len_s = merge->len_s; 3031 3032 len = 0; /* length of buf_si[] */ 3033 merge->nsend = 0; 3034 for (proc=0; proc<size; proc++){ 3035 len_si[proc] = 0; 3036 if (proc == rank){ 3037 len_s[proc] = 0; 3038 } else { 3039 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3040 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3041 } 3042 if (len_s[proc]) { 3043 merge->nsend++; 3044 nrows = 0; 3045 for (i=owners[proc]; i<owners[proc+1]; i++){ 3046 if (ai[i+1] > ai[i]) nrows++; 3047 } 3048 len_si[proc] = 2*(nrows+1); 3049 len += len_si[proc]; 3050 } 3051 } 3052 3053 /* determine the number and length of messages to receive for ij-structure */ 3054 /*-------------------------------------------------------------------------*/ 3055 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3056 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3057 3058 /* post the Irecv of j-structure */ 3059 /*-------------------------------*/ 3060 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3061 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3062 3063 /* post the Isend of j-structure */ 3064 /*--------------------------------*/ 3065 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3066 sj_waits = si_waits + merge->nsend; 3067 3068 for (proc=0, k=0; proc<size; proc++){ 3069 if (!len_s[proc]) continue; 3070 i = owners[proc]; 3071 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3072 k++; 3073 } 3074 3075 /* receives and sends of j-structure are complete */ 3076 /*------------------------------------------------*/ 3077 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 3078 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 3079 3080 /* send and recv i-structure */ 3081 /*---------------------------*/ 3082 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3083 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3084 3085 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3086 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3087 for (proc=0,k=0; proc<size; proc++){ 3088 if (!len_s[proc]) continue; 3089 /* form outgoing message for i-structure: 3090 buf_si[0]: nrows to be sent 3091 [1:nrows]: row index (global) 3092 [nrows+1:2*nrows+1]: i-structure index 3093 */ 3094 /*-------------------------------------------*/ 3095 nrows = len_si[proc]/2 - 1; 3096 buf_si_i = buf_si + nrows+1; 3097 buf_si[0] = nrows; 3098 buf_si_i[0] = 0; 3099 nrows = 0; 3100 for (i=owners[proc]; i<owners[proc+1]; i++){ 3101 anzi = ai[i+1] - ai[i]; 3102 if (anzi) { 3103 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3104 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3105 nrows++; 3106 } 3107 } 3108 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3109 k++; 3110 buf_si += len_si[proc]; 3111 } 3112 3113 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 3114 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 3115 3116 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3117 for (i=0; i<merge->nrecv; i++){ 3118 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3119 } 3120 3121 ierr = PetscFree(len_si);CHKERRQ(ierr); 3122 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3123 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3124 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3125 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3126 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3127 ierr = PetscFree(status);CHKERRQ(ierr); 3128 3129 /* compute a local seq matrix in each processor */ 3130 /*----------------------------------------------*/ 3131 /* allocate bi array and free space for accumulating nonzero column info */ 3132 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3133 bi[0] = 0; 3134 3135 /* create and initialize a linked list */ 3136 nlnk = N+1; 3137 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3138 3139 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3140 len = 0; 3141 len = ai[owners[rank+1]] - ai[owners[rank]]; 3142 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3143 current_space = free_space; 3144 3145 /* determine symbolic info for each local row */ 3146 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3147 nextrow = buf_ri_k + merge->nrecv; 3148 nextai = nextrow + merge->nrecv; 3149 for (k=0; k<merge->nrecv; k++){ 3150 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3151 nrows = *buf_ri_k[k]; 3152 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3153 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3154 } 3155 3156 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3157 len = 0; 3158 for (i=0;i<m;i++) { 3159 bnzi = 0; 3160 /* add local non-zero cols of this proc's seqmat into lnk */ 3161 arow = owners[rank] + i; 3162 anzi = ai[arow+1] - ai[arow]; 3163 aj = a->j + ai[arow]; 3164 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3165 bnzi += nlnk; 3166 /* add received col data into lnk */ 3167 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3168 if (i == *nextrow[k]) { /* i-th row */ 3169 anzi = *(nextai[k]+1) - *nextai[k]; 3170 aj = buf_rj[k] + *nextai[k]; 3171 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3172 bnzi += nlnk; 3173 nextrow[k]++; nextai[k]++; 3174 } 3175 } 3176 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3177 3178 /* if free space is not available, make more free space */ 3179 if (current_space->local_remaining<bnzi) { 3180 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3181 nspacedouble++; 3182 } 3183 /* copy data into free space, then initialize lnk */ 3184 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3185 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3186 3187 current_space->array += bnzi; 3188 current_space->local_used += bnzi; 3189 current_space->local_remaining -= bnzi; 3190 3191 bi[i+1] = bi[i] + bnzi; 3192 } 3193 3194 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3195 3196 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3197 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3198 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3199 3200 /* create symbolic parallel matrix B_mpi */ 3201 /*---------------------------------------*/ 3202 if (n==PETSC_DECIDE) { 3203 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr); 3204 } else { 3205 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 3206 } 3207 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3208 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3209 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3210 3211 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3212 B_mpi->assembled = PETSC_FALSE; 3213 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3214 merge->bi = bi; 3215 merge->bj = bj; 3216 merge->buf_ri = buf_ri; 3217 merge->buf_rj = buf_rj; 3218 merge->coi = PETSC_NULL; 3219 merge->coj = PETSC_NULL; 3220 merge->owners_co = PETSC_NULL; 3221 3222 /* attach the supporting struct to B_mpi for reuse */ 3223 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3224 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3225 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3226 *mpimat = B_mpi; 3227 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3228 PetscFunctionReturn(0); 3229 } 3230 3231 static PetscEvent logkey_seqstompi = 0; 3232 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3233 { 3234 PetscErrorCode ierr; 3235 3236 PetscFunctionBegin; 3237 if (!logkey_seqstompi) { 3238 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3239 } 3240 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3241 if (scall == MAT_INITIAL_MATRIX){ 3242 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3243 } 3244 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3245 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3246 PetscFunctionReturn(0); 3247 } 3248 static PetscEvent logkey_getlocalmat = 0; 3249 #undef __FUNCT__ 3250 #define __FUNCT__ "MatGetLocalMat" 3251 /*@C 3252 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3253 3254 Not Collective 3255 3256 Input Parameters: 3257 + A - the matrix 3258 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3259 3260 Output Parameter: 3261 . A_loc - the local sequential matrix generated 3262 3263 Level: developer 3264 3265 @*/ 3266 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3267 { 3268 PetscErrorCode ierr; 3269 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3270 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3271 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3272 PetscScalar *aa=a->a,*ba=b->a,*ca; 3273 PetscInt am=A->m,i,j,k,cstart=mpimat->cstart; 3274 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3275 3276 PetscFunctionBegin; 3277 if (!logkey_getlocalmat) { 3278 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3279 } 3280 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3281 if (scall == MAT_INITIAL_MATRIX){ 3282 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3283 ci[0] = 0; 3284 for (i=0; i<am; i++){ 3285 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3286 } 3287 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3288 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3289 k = 0; 3290 for (i=0; i<am; i++) { 3291 ncols_o = bi[i+1] - bi[i]; 3292 ncols_d = ai[i+1] - ai[i]; 3293 /* off-diagonal portion of A */ 3294 for (jo=0; jo<ncols_o; jo++) { 3295 col = cmap[*bj]; 3296 if (col >= cstart) break; 3297 cj[k] = col; bj++; 3298 ca[k++] = *ba++; 3299 } 3300 /* diagonal portion of A */ 3301 for (j=0; j<ncols_d; j++) { 3302 cj[k] = cstart + *aj++; 3303 ca[k++] = *aa++; 3304 } 3305 /* off-diagonal portion of A */ 3306 for (j=jo; j<ncols_o; j++) { 3307 cj[k] = cmap[*bj++]; 3308 ca[k++] = *ba++; 3309 } 3310 } 3311 /* put together the new matrix */ 3312 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3313 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3314 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3315 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3316 mat->freedata = PETSC_TRUE; 3317 mat->nonew = 0; 3318 } else if (scall == MAT_REUSE_MATRIX){ 3319 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3320 ci = mat->i; cj = mat->j; ca = mat->a; 3321 for (i=0; i<am; i++) { 3322 /* off-diagonal portion of A */ 3323 ncols_o = bi[i+1] - bi[i]; 3324 for (jo=0; jo<ncols_o; jo++) { 3325 col = cmap[*bj]; 3326 if (col >= cstart) break; 3327 *ca++ = *ba++; bj++; 3328 } 3329 /* diagonal portion of A */ 3330 ncols_d = ai[i+1] - ai[i]; 3331 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3332 /* off-diagonal portion of A */ 3333 for (j=jo; j<ncols_o; j++) { 3334 *ca++ = *ba++; bj++; 3335 } 3336 } 3337 } else { 3338 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3339 } 3340 3341 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3342 PetscFunctionReturn(0); 3343 } 3344 3345 static PetscEvent logkey_getlocalmatcondensed = 0; 3346 #undef __FUNCT__ 3347 #define __FUNCT__ "MatGetLocalMatCondensed" 3348 /*@C 3349 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3350 3351 Not Collective 3352 3353 Input Parameters: 3354 + A - the matrix 3355 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3356 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3357 3358 Output Parameter: 3359 . A_loc - the local sequential matrix generated 3360 3361 Level: developer 3362 3363 @*/ 3364 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3365 { 3366 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3367 PetscErrorCode ierr; 3368 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3369 IS isrowa,iscola; 3370 Mat *aloc; 3371 3372 PetscFunctionBegin; 3373 if (!logkey_getlocalmatcondensed) { 3374 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3375 } 3376 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3377 if (!row){ 3378 start = a->rstart; end = a->rend; 3379 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3380 } else { 3381 isrowa = *row; 3382 } 3383 if (!col){ 3384 start = a->cstart; 3385 cmap = a->garray; 3386 nzA = a->A->n; 3387 nzB = a->B->n; 3388 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3389 ncols = 0; 3390 for (i=0; i<nzB; i++) { 3391 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3392 else break; 3393 } 3394 imark = i; 3395 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3396 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3397 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3398 ierr = PetscFree(idx);CHKERRQ(ierr); 3399 } else { 3400 iscola = *col; 3401 } 3402 if (scall != MAT_INITIAL_MATRIX){ 3403 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3404 aloc[0] = *A_loc; 3405 } 3406 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3407 *A_loc = aloc[0]; 3408 ierr = PetscFree(aloc);CHKERRQ(ierr); 3409 if (!row){ 3410 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3411 } 3412 if (!col){ 3413 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3414 } 3415 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3416 PetscFunctionReturn(0); 3417 } 3418 3419 static PetscEvent logkey_GetBrowsOfAcols = 0; 3420 #undef __FUNCT__ 3421 #define __FUNCT__ "MatGetBrowsOfAcols" 3422 /*@C 3423 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3424 3425 Collective on Mat 3426 3427 Input Parameters: 3428 + A,B - the matrices in mpiaij format 3429 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3430 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3431 3432 Output Parameter: 3433 + rowb, colb - index sets of rows and columns of B to extract 3434 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3435 - B_seq - the sequential matrix generated 3436 3437 Level: developer 3438 3439 @*/ 3440 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3441 { 3442 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3443 PetscErrorCode ierr; 3444 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3445 IS isrowb,iscolb; 3446 Mat *bseq; 3447 3448 PetscFunctionBegin; 3449 if (a->cstart != b->rstart || a->cend != b->rend){ 3450 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3451 } 3452 if (!logkey_GetBrowsOfAcols) { 3453 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3454 } 3455 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3456 3457 if (scall == MAT_INITIAL_MATRIX){ 3458 start = a->cstart; 3459 cmap = a->garray; 3460 nzA = a->A->n; 3461 nzB = a->B->n; 3462 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3463 ncols = 0; 3464 for (i=0; i<nzB; i++) { /* row < local row index */ 3465 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3466 else break; 3467 } 3468 imark = i; 3469 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3470 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3471 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3472 ierr = PetscFree(idx);CHKERRQ(ierr); 3473 *brstart = imark; 3474 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3475 } else { 3476 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3477 isrowb = *rowb; iscolb = *colb; 3478 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3479 bseq[0] = *B_seq; 3480 } 3481 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3482 *B_seq = bseq[0]; 3483 ierr = PetscFree(bseq);CHKERRQ(ierr); 3484 if (!rowb){ 3485 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3486 } else { 3487 *rowb = isrowb; 3488 } 3489 if (!colb){ 3490 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3491 } else { 3492 *colb = iscolb; 3493 } 3494 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3495 PetscFunctionReturn(0); 3496 } 3497 3498 static PetscEvent logkey_GetBrowsOfAocols = 0; 3499 #undef __FUNCT__ 3500 #define __FUNCT__ "MatGetBrowsOfAoCols" 3501 /*@C 3502 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3503 of the OFF-DIAGONAL portion of local A 3504 3505 Collective on Mat 3506 3507 Input Parameters: 3508 + A,B - the matrices in mpiaij format 3509 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3510 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3511 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3512 3513 Output Parameter: 3514 + B_oth - the sequential matrix generated 3515 3516 Level: developer 3517 3518 @*/ 3519 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3520 { 3521 VecScatter_MPI_General *gen_to,*gen_from; 3522 PetscErrorCode ierr; 3523 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3524 Mat_SeqAIJ *b_oth; 3525 VecScatter ctx=a->Mvctx; 3526 MPI_Comm comm=ctx->comm; 3527 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3528 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj; 3529 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3530 PetscInt i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3531 MPI_Request *rwaits,*swaits; 3532 MPI_Status *sstatus,rstatus; 3533 PetscInt *cols; 3534 PetscScalar *vals; 3535 PetscMPIInt j; 3536 3537 PetscFunctionBegin; 3538 if (a->cstart != b->rstart || a->cend != b->rend){ 3539 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3540 } 3541 if (!logkey_GetBrowsOfAocols) { 3542 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3543 } 3544 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3545 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3546 3547 gen_to = (VecScatter_MPI_General*)ctx->todata; 3548 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3549 rvalues = gen_from->values; /* holds the length of sending row */ 3550 svalues = gen_to->values; /* holds the length of receiving row */ 3551 nrecvs = gen_from->n; 3552 nsends = gen_to->n; 3553 rwaits = gen_from->requests; 3554 swaits = gen_to->requests; 3555 srow = gen_to->indices; /* local row index to be sent */ 3556 rstarts = gen_from->starts; 3557 sstarts = gen_to->starts; 3558 rprocs = gen_from->procs; 3559 sprocs = gen_to->procs; 3560 sstatus = gen_to->sstatus; 3561 3562 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3563 if (scall == MAT_INITIAL_MATRIX){ 3564 /* i-array */ 3565 /*---------*/ 3566 /* post receives */ 3567 for (i=0; i<nrecvs; i++){ 3568 rowlen = (PetscInt*)rvalues + rstarts[i]; 3569 nrows = rstarts[i+1]-rstarts[i]; 3570 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3571 } 3572 3573 /* pack the outgoing message */ 3574 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3575 rstartsj = sstartsj + nsends +1; 3576 sstartsj[0] = 0; rstartsj[0] = 0; 3577 len = 0; /* total length of j or a array to be sent */ 3578 k = 0; 3579 for (i=0; i<nsends; i++){ 3580 rowlen = (PetscInt*)svalues + sstarts[i]; 3581 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3582 for (j=0; j<nrows; j++) { 3583 row = srow[k] + b->rowners[rank]; /* global row idx */ 3584 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3585 len += rowlen[j]; 3586 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3587 k++; 3588 } 3589 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3590 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3591 } 3592 /* recvs and sends of i-array are completed */ 3593 i = nrecvs; 3594 while (i--) { 3595 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3596 } 3597 if (nsends) { 3598 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3599 } 3600 /* allocate buffers for sending j and a arrays */ 3601 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3602 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3603 3604 /* create i-array of B_oth */ 3605 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3606 b_othi[0] = 0; 3607 len = 0; /* total length of j or a array to be received */ 3608 k = 0; 3609 for (i=0; i<nrecvs; i++){ 3610 rowlen = (PetscInt*)rvalues + rstarts[i]; 3611 nrows = rstarts[i+1]-rstarts[i]; 3612 for (j=0; j<nrows; j++) { 3613 b_othi[k+1] = b_othi[k] + rowlen[j]; 3614 len += rowlen[j]; k++; 3615 } 3616 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3617 } 3618 3619 /* allocate space for j and a arrrays of B_oth */ 3620 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3621 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3622 3623 /* j-array */ 3624 /*---------*/ 3625 /* post receives of j-array */ 3626 for (i=0; i<nrecvs; i++){ 3627 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3628 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3629 } 3630 k = 0; 3631 for (i=0; i<nsends; i++){ 3632 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3633 bufJ = bufj+sstartsj[i]; 3634 for (j=0; j<nrows; j++) { 3635 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3636 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3637 for (l=0; l<ncols; l++){ 3638 *bufJ++ = cols[l]; 3639 } 3640 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3641 } 3642 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3643 } 3644 3645 /* recvs and sends of j-array are completed */ 3646 i = nrecvs; 3647 while (i--) { 3648 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3649 } 3650 if (nsends) { 3651 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3652 } 3653 } else if (scall == MAT_REUSE_MATRIX){ 3654 sstartsj = *startsj; 3655 rstartsj = sstartsj + nsends +1; 3656 bufa = *bufa_ptr; 3657 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3658 b_otha = b_oth->a; 3659 } else { 3660 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3661 } 3662 3663 /* a-array */ 3664 /*---------*/ 3665 /* post receives of a-array */ 3666 for (i=0; i<nrecvs; i++){ 3667 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3668 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3669 } 3670 k = 0; 3671 for (i=0; i<nsends; i++){ 3672 nrows = sstarts[i+1]-sstarts[i]; 3673 bufA = bufa+sstartsj[i]; 3674 for (j=0; j<nrows; j++) { 3675 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3676 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3677 for (l=0; l<ncols; l++){ 3678 *bufA++ = vals[l]; 3679 } 3680 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3681 3682 } 3683 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3684 } 3685 /* recvs and sends of a-array are completed */ 3686 i = nrecvs; 3687 while (i--) { 3688 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3689 } 3690 if (nsends) { 3691 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3692 } 3693 3694 if (scall == MAT_INITIAL_MATRIX){ 3695 /* put together the new matrix */ 3696 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3697 3698 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3699 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3700 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3701 b_oth->freedata = PETSC_TRUE; 3702 b_oth->nonew = 0; 3703 3704 ierr = PetscFree(bufj);CHKERRQ(ierr); 3705 if (!startsj || !bufa_ptr){ 3706 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3707 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3708 } else { 3709 *startsj = sstartsj; 3710 *bufa_ptr = bufa; 3711 } 3712 } 3713 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3714 3715 PetscFunctionReturn(0); 3716 } 3717 3718 /*MC 3719 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3720 3721 Options Database Keys: 3722 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3723 3724 Level: beginner 3725 3726 .seealso: MatCreateMPIAIJ 3727 M*/ 3728 3729 EXTERN_C_BEGIN 3730 #undef __FUNCT__ 3731 #define __FUNCT__ "MatCreate_MPIAIJ" 3732 PetscErrorCode MatCreate_MPIAIJ(Mat B) 3733 { 3734 Mat_MPIAIJ *b; 3735 PetscErrorCode ierr; 3736 PetscInt i; 3737 PetscMPIInt size; 3738 3739 PetscFunctionBegin; 3740 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3741 3742 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3743 B->data = (void*)b; 3744 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3745 B->factor = 0; 3746 B->bs = 1; 3747 B->assembled = PETSC_FALSE; 3748 B->mapping = 0; 3749 3750 B->insertmode = NOT_SET_VALUES; 3751 b->size = size; 3752 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3753 3754 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 3755 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 3756 3757 /* the information in the maps duplicates the information computed below, eventually 3758 we should remove the duplicate information that is not contained in the maps */ 3759 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 3760 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 3761 3762 /* build local table of row and column ownerships */ 3763 ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 3764 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 3765 b->cowners = b->rowners + b->size + 2; 3766 ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3767 b->rowners[0] = 0; 3768 for (i=2; i<=b->size; i++) { 3769 b->rowners[i] += b->rowners[i-1]; 3770 } 3771 b->rstart = b->rowners[b->rank]; 3772 b->rend = b->rowners[b->rank+1]; 3773 ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3774 b->cowners[0] = 0; 3775 for (i=2; i<=b->size; i++) { 3776 b->cowners[i] += b->cowners[i-1]; 3777 } 3778 b->cstart = b->cowners[b->rank]; 3779 b->cend = b->cowners[b->rank+1]; 3780 3781 /* build cache for off array entries formed */ 3782 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3783 b->donotstash = PETSC_FALSE; 3784 b->colmap = 0; 3785 b->garray = 0; 3786 b->roworiented = PETSC_TRUE; 3787 3788 /* stuff used for matrix vector multiply */ 3789 b->lvec = PETSC_NULL; 3790 b->Mvctx = PETSC_NULL; 3791 3792 /* stuff for MatGetRow() */ 3793 b->rowindices = 0; 3794 b->rowvalues = 0; 3795 b->getrowactive = PETSC_FALSE; 3796 3797 /* Explicitly create 2 MATSEQAIJ matrices. */ 3798 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 3799 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3800 PetscLogObjectParent(B,b->A); 3801 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 3802 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3803 PetscLogObjectParent(B,b->B); 3804 3805 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3806 "MatStoreValues_MPIAIJ", 3807 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3808 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3809 "MatRetrieveValues_MPIAIJ", 3810 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3811 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3812 "MatGetDiagonalBlock_MPIAIJ", 3813 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3814 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3815 "MatIsTranspose_MPIAIJ", 3816 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3817 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3818 "MatMPIAIJSetPreallocation_MPIAIJ", 3819 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3820 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3821 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3822 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3823 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3824 "MatDiagonalScaleLocal_MPIAIJ", 3825 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3826 PetscFunctionReturn(0); 3827 } 3828 EXTERN_C_END 3829