1 2 #include "src/mat/impls/aij/mpi/mpiaij.h" 3 #include "src/inline/spops.h" 4 5 /* 6 Local utility routine that creates a mapping from the global column 7 number to the local number in the off-diagonal part of the local 8 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 9 a slightly higher hash table cost; without it it is not scalable (each processor 10 has an order N integer array but is fast to acess. 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 17 PetscErrorCode ierr; 18 PetscInt n = aij->B->n,i; 19 20 PetscFunctionBegin; 21 #if defined (PETSC_USE_CTABLE) 22 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 23 for (i=0; i<n; i++){ 24 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 25 } 26 #else 27 ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 28 PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt)); 29 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 30 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 31 #endif 32 PetscFunctionReturn(0); 33 } 34 35 #define CHUNKSIZE 15 36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 40 rmax = aimax[row]; nrow = ailen[row]; \ 41 col1 = col - shift; \ 42 \ 43 low = 0; high = nrow; \ 44 while (high-low > 5) { \ 45 t = (low+high)/2; \ 46 if (rp[t] > col) high = t; \ 47 else low = t; \ 48 } \ 49 for (_i=low; _i<high; _i++) { \ 50 if (rp[_i] > col1) break; \ 51 if (rp[_i] == col1) { \ 52 if (addv == ADD_VALUES) ap[_i] += value; \ 53 else ap[_i] = value; \ 54 goto a_noinsert; \ 55 } \ 56 } \ 57 if (nonew == 1) goto a_noinsert; \ 58 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 59 if (nrow >= rmax) { \ 60 /* there is no extra room in row, therefore enlarge */ \ 61 PetscInt new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 62 PetscScalar *new_a; \ 63 \ 64 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 65 \ 66 /* malloc new storage space */ \ 67 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \ 68 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 69 new_j = (PetscInt*)(new_a + new_nz); \ 70 new_i = new_j + new_nz; \ 71 \ 72 /* copy over old data into new slots */ \ 73 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 74 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 75 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 76 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 77 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 78 len*sizeof(PetscInt));CHKERRQ(ierr); \ 79 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 81 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 82 /* free up old matrix storage */ \ 83 \ 84 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 85 if (!a->singlemalloc) { \ 86 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 87 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 88 } \ 89 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 90 a->singlemalloc = PETSC_TRUE; \ 91 \ 92 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 93 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 94 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \ 95 a->maxnz += CHUNKSIZE; \ 96 a->reallocs++; \ 97 } \ 98 N = nrow++ - 1; a->nz++; \ 99 /* shift up all the later entries in this row */ \ 100 for (ii=N; ii>=_i; ii--) { \ 101 rp[ii+1] = rp[ii]; \ 102 ap[ii+1] = ap[ii]; \ 103 } \ 104 rp[_i] = col1; \ 105 ap[_i] = value; \ 106 a_noinsert: ; \ 107 ailen[row] = nrow; \ 108 } 109 110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 111 { \ 112 \ 113 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 114 rmax = bimax[row]; nrow = bilen[row]; \ 115 col1 = col - shift; \ 116 \ 117 low = 0; high = nrow; \ 118 while (high-low > 5) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > col) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > col1) break; \ 125 if (rp[_i] == col1) { \ 126 if (addv == ADD_VALUES) ap[_i] += value; \ 127 else ap[_i] = value; \ 128 goto b_noinsert; \ 129 } \ 130 } \ 131 if (nonew == 1) goto b_noinsert; \ 132 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 133 if (nrow >= rmax) { \ 134 /* there is no extra room in row, therefore enlarge */ \ 135 PetscInt new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 136 PetscScalar *new_a; \ 137 \ 138 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 139 \ 140 /* malloc new storage space */ \ 141 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \ 142 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 143 new_j = (PetscInt*)(new_a + new_nz); \ 144 new_i = new_j + new_nz; \ 145 \ 146 /* copy over old data into new slots */ \ 147 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 148 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 149 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 150 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 151 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 152 len*sizeof(PetscInt));CHKERRQ(ierr); \ 153 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 154 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 155 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 156 /* free up old matrix storage */ \ 157 \ 158 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 159 if (!b->singlemalloc) { \ 160 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 161 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 162 } \ 163 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 164 b->singlemalloc = PETSC_TRUE; \ 165 \ 166 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 167 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 168 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \ 169 b->maxnz += CHUNKSIZE; \ 170 b->reallocs++; \ 171 } \ 172 N = nrow++ - 1; b->nz++; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ap[ii+1] = ap[ii]; \ 177 } \ 178 rp[_i] = col1; \ 179 ap[_i] = value; \ 180 b_noinsert: ; \ 181 bilen[row] = nrow; \ 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatSetValues_MPIAIJ" 186 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 187 { 188 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 189 PetscScalar value; 190 PetscErrorCode ierr; 191 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 192 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 193 PetscTruth roworiented = aij->roworiented; 194 195 /* Some Variables required in the macro */ 196 Mat A = aij->A; 197 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 198 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 199 PetscScalar *aa = a->a; 200 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 201 Mat B = aij->B; 202 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 203 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 204 PetscScalar *ba = b->a; 205 206 PetscInt *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 207 PetscInt nonew = a->nonew,shift=0; 208 PetscScalar *ap; 209 210 PetscFunctionBegin; 211 for (i=0; i<m; i++) { 212 if (im[i] < 0) continue; 213 #if defined(PETSC_USE_BOPT_g) 214 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 215 #endif 216 if (im[i] >= rstart && im[i] < rend) { 217 row = im[i] - rstart; 218 for (j=0; j<n; j++) { 219 if (in[j] >= cstart && in[j] < cend){ 220 col = in[j] - cstart; 221 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 222 if (ignorezeroentries && value == 0.0) continue; 223 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 224 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 225 } else if (in[j] < 0) continue; 226 #if defined(PETSC_USE_BOPT_g) 227 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 228 #endif 229 else { 230 if (mat->was_assembled) { 231 if (!aij->colmap) { 232 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 233 } 234 #if defined (PETSC_USE_CTABLE) 235 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 236 col--; 237 #else 238 col = aij->colmap[in[j]] - 1; 239 #endif 240 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 241 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 244 B = aij->B; 245 b = (Mat_SeqAIJ*)B->data; 246 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 247 ba = b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 if (ignorezeroentries && value == 0.0) continue; 252 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 253 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 254 } 255 } 256 } else { 257 if (!aij->donotstash) { 258 if (roworiented) { 259 if (ignorezeroentries && v[i*n] == 0.0) continue; 260 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 261 } else { 262 if (ignorezeroentries && v[i] == 0.0) continue; 263 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatGetValues_MPIAIJ" 273 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 274 { 275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 276 PetscErrorCode ierr; 277 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 278 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 279 280 PetscFunctionBegin; 281 for (i=0; i<m; i++) { 282 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 283 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 284 if (idxm[i] >= rstart && idxm[i] < rend) { 285 row = idxm[i] - rstart; 286 for (j=0; j<n; j++) { 287 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 288 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 289 if (idxn[j] >= cstart && idxn[j] < cend){ 290 col = idxn[j] - cstart; 291 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 292 } else { 293 if (!aij->colmap) { 294 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 295 } 296 #if defined (PETSC_USE_CTABLE) 297 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 298 col --; 299 #else 300 col = aij->colmap[idxn[j]] - 1; 301 #endif 302 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 303 else { 304 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 305 } 306 } 307 } 308 } else { 309 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 310 } 311 } 312 PetscFunctionReturn(0); 313 } 314 315 #undef __FUNCT__ 316 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 317 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 318 { 319 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 320 PetscErrorCode ierr; 321 PetscInt nstash,reallocs; 322 InsertMode addv; 323 324 PetscFunctionBegin; 325 if (aij->donotstash) { 326 PetscFunctionReturn(0); 327 } 328 329 /* make sure all processors are either in INSERTMODE or ADDMODE */ 330 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 331 if (addv == (ADD_VALUES|INSERT_VALUES)) { 332 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 333 } 334 mat->insertmode = addv; /* in case this processor had no cache */ 335 336 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 337 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 338 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs); 339 PetscFunctionReturn(0); 340 } 341 342 343 #undef __FUNCT__ 344 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 345 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 346 { 347 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 348 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 349 PetscErrorCode ierr; 350 PetscMPIInt n; 351 PetscInt i,j,rstart,ncols,flg; 352 PetscInt *row,*col,other_disassembled; 353 PetscScalar *val; 354 InsertMode addv = mat->insertmode; 355 356 PetscFunctionBegin; 357 if (!aij->donotstash) { 358 while (1) { 359 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 360 if (!flg) break; 361 362 for (i=0; i<n;) { 363 /* Now identify the consecutive vals belonging to the same row */ 364 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 365 if (j < n) ncols = j-i; 366 else ncols = n-i; 367 /* Now assemble all these values with a single function call */ 368 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 369 i = j; 370 } 371 } 372 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 373 } 374 375 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 376 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 377 378 /* determine if any processor has disassembled, if so we must 379 also disassemble ourselfs, in order that we may reassemble. */ 380 /* 381 if nonzero structure of submatrix B cannot change then we know that 382 no processor disassembled thus we can skip this stuff 383 */ 384 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 385 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 386 if (mat->was_assembled && !other_disassembled) { 387 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 388 /* reaccess the b because aij->B was changed */ 389 b = (Mat_SeqAIJ *)aij->B->data; 390 } 391 } 392 393 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 394 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 395 } 396 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 398 399 if (aij->rowvalues) { 400 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 401 aij->rowvalues = 0; 402 } 403 404 /* used by MatAXPY() */ 405 a->xtoy = 0; b->xtoy = 0; 406 a->XtoY = 0; b->XtoY = 0; 407 408 PetscFunctionReturn(0); 409 } 410 411 #undef __FUNCT__ 412 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 413 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 414 { 415 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 416 PetscErrorCode ierr; 417 418 PetscFunctionBegin; 419 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 420 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 421 PetscFunctionReturn(0); 422 } 423 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatZeroRows_MPIAIJ" 426 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 427 { 428 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 429 PetscErrorCode ierr; 430 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag; 431 PetscInt i,N,*rows,*owners = l->rowners; 432 PetscInt *nprocs,j,idx,nsends,row; 433 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 434 PetscInt *rvalues,count,base,slen,*source; 435 PetscInt *lens,*lrows,*values,rstart=l->rstart; 436 MPI_Comm comm = A->comm; 437 MPI_Request *send_waits,*recv_waits; 438 MPI_Status recv_status,*send_status; 439 IS istmp; 440 PetscTruth found; 441 442 PetscFunctionBegin; 443 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 444 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 445 446 /* first count number of contributors to each processor */ 447 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 448 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 449 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 450 for (i=0; i<N; i++) { 451 idx = rows[i]; 452 found = PETSC_FALSE; 453 for (j=0; j<size; j++) { 454 if (idx >= owners[j] && idx < owners[j+1]) { 455 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 456 } 457 } 458 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 459 } 460 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 461 462 /* inform other processors of number of messages and max length*/ 463 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 464 465 /* post receives: */ 466 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 467 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 468 for (i=0; i<nrecvs; i++) { 469 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 470 } 471 472 /* do sends: 473 1) starts[i] gives the starting index in svalues for stuff going to 474 the ith processor 475 */ 476 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 477 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 478 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 479 starts[0] = 0; 480 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 481 for (i=0; i<N; i++) { 482 svalues[starts[owner[i]]++] = rows[i]; 483 } 484 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 485 486 starts[0] = 0; 487 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 488 count = 0; 489 for (i=0; i<size; i++) { 490 if (nprocs[2*i+1]) { 491 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 492 } 493 } 494 ierr = PetscFree(starts);CHKERRQ(ierr); 495 496 base = owners[rank]; 497 498 /* wait on receives */ 499 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 500 source = lens + nrecvs; 501 count = nrecvs; slen = 0; 502 while (count) { 503 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 504 /* unpack receives into our local space */ 505 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 506 source[imdex] = recv_status.MPI_SOURCE; 507 lens[imdex] = n; 508 slen += n; 509 count--; 510 } 511 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 512 513 /* move the data into the send scatter */ 514 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 515 count = 0; 516 for (i=0; i<nrecvs; i++) { 517 values = rvalues + i*nmax; 518 for (j=0; j<lens[i]; j++) { 519 lrows[count++] = values[j] - base; 520 } 521 } 522 ierr = PetscFree(rvalues);CHKERRQ(ierr); 523 ierr = PetscFree(lens);CHKERRQ(ierr); 524 ierr = PetscFree(owner);CHKERRQ(ierr); 525 ierr = PetscFree(nprocs);CHKERRQ(ierr); 526 527 /* actually zap the local rows */ 528 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 529 PetscLogObjectParent(A,istmp); 530 531 /* 532 Zero the required rows. If the "diagonal block" of the matrix 533 is square and the user wishes to set the diagonal we use seperate 534 code so that MatSetValues() is not called for each diagonal allocating 535 new memory, thus calling lots of mallocs and slowing things down. 536 537 Contributed by: Mathew Knepley 538 */ 539 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 540 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 541 if (diag && (l->A->M == l->A->N)) { 542 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 543 } else if (diag) { 544 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 545 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 546 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 547 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 548 } 549 for (i = 0; i < slen; i++) { 550 row = lrows[i] + rstart; 551 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 552 } 553 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 554 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 555 } else { 556 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 557 } 558 ierr = ISDestroy(istmp);CHKERRQ(ierr); 559 ierr = PetscFree(lrows);CHKERRQ(ierr); 560 561 /* wait on sends */ 562 if (nsends) { 563 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 564 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 565 ierr = PetscFree(send_status);CHKERRQ(ierr); 566 } 567 ierr = PetscFree(send_waits);CHKERRQ(ierr); 568 ierr = PetscFree(svalues);CHKERRQ(ierr); 569 570 PetscFunctionReturn(0); 571 } 572 573 #undef __FUNCT__ 574 #define __FUNCT__ "MatMult_MPIAIJ" 575 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 576 { 577 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 578 PetscErrorCode ierr; 579 PetscInt nt; 580 581 PetscFunctionBegin; 582 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 583 if (nt != A->n) { 584 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt); 585 } 586 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 587 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 588 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 589 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 590 PetscFunctionReturn(0); 591 } 592 593 #undef __FUNCT__ 594 #define __FUNCT__ "MatMultAdd_MPIAIJ" 595 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 596 { 597 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 598 PetscErrorCode ierr; 599 600 PetscFunctionBegin; 601 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 602 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 603 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 604 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 605 PetscFunctionReturn(0); 606 } 607 608 #undef __FUNCT__ 609 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 610 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 611 { 612 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 613 PetscErrorCode ierr; 614 615 PetscFunctionBegin; 616 /* do nondiagonal part */ 617 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 618 /* send it on its way */ 619 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 620 /* do local part */ 621 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 622 /* receive remote parts: note this assumes the values are not actually */ 623 /* inserted in yy until the next line, which is true for my implementation*/ 624 /* but is not perhaps always true. */ 625 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 626 PetscFunctionReturn(0); 627 } 628 629 EXTERN_C_BEGIN 630 #undef __FUNCT__ 631 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 632 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 633 { 634 MPI_Comm comm; 635 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 636 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 637 IS Me,Notme; 638 PetscErrorCode ierr; 639 PetscInt M,N,first,last,*notme,i; 640 PetscMPIInt size; 641 642 PetscFunctionBegin; 643 644 /* Easy test: symmetric diagonal block */ 645 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 646 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 647 if (!*f) PetscFunctionReturn(0); 648 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 649 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 650 if (size == 1) PetscFunctionReturn(0); 651 652 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 653 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 654 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 655 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 656 for (i=0; i<first; i++) notme[i] = i; 657 for (i=last; i<M; i++) notme[i-last+first] = i; 658 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 659 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 660 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 661 Aoff = Aoffs[0]; 662 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 663 Boff = Boffs[0]; 664 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 665 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 666 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 667 ierr = ISDestroy(Me);CHKERRQ(ierr); 668 ierr = ISDestroy(Notme);CHKERRQ(ierr); 669 670 PetscFunctionReturn(0); 671 } 672 EXTERN_C_END 673 674 #undef __FUNCT__ 675 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 676 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 677 { 678 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 679 PetscErrorCode ierr; 680 681 PetscFunctionBegin; 682 /* do nondiagonal part */ 683 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 684 /* send it on its way */ 685 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 686 /* do local part */ 687 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 688 /* receive remote parts: note this assumes the values are not actually */ 689 /* inserted in yy until the next line, which is true for my implementation*/ 690 /* but is not perhaps always true. */ 691 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } 694 695 /* 696 This only works correctly for square matrices where the subblock A->A is the 697 diagonal block 698 */ 699 #undef __FUNCT__ 700 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 701 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 702 { 703 PetscErrorCode ierr; 704 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 705 706 PetscFunctionBegin; 707 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 708 if (a->rstart != a->cstart || a->rend != a->cend) { 709 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 710 } 711 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 712 PetscFunctionReturn(0); 713 } 714 715 #undef __FUNCT__ 716 #define __FUNCT__ "MatScale_MPIAIJ" 717 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 718 { 719 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 720 PetscErrorCode ierr; 721 722 PetscFunctionBegin; 723 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 724 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 725 PetscFunctionReturn(0); 726 } 727 728 #undef __FUNCT__ 729 #define __FUNCT__ "MatDestroy_MPIAIJ" 730 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 731 { 732 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 733 PetscErrorCode ierr; 734 735 PetscFunctionBegin; 736 #if defined(PETSC_USE_LOG) 737 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 738 #endif 739 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 740 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 741 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 742 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 743 #if defined (PETSC_USE_CTABLE) 744 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 745 #else 746 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 747 #endif 748 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 749 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 750 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 751 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 752 ierr = PetscFree(aij);CHKERRQ(ierr); 753 754 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 755 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 756 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 757 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 758 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 759 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 760 PetscFunctionReturn(0); 761 } 762 763 #undef __FUNCT__ 764 #define __FUNCT__ "MatView_MPIAIJ_Binary" 765 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 766 { 767 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 768 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 769 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 770 PetscErrorCode ierr; 771 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 772 int fd; 773 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 774 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 775 PetscScalar *column_values; 776 777 PetscFunctionBegin; 778 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 779 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 780 nz = A->nz + B->nz; 781 if (!rank) { 782 header[0] = MAT_FILE_COOKIE; 783 header[1] = mat->M; 784 header[2] = mat->N; 785 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 786 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 787 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 788 /* get largest number of rows any processor has */ 789 rlen = mat->m; 790 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 791 for (i=1; i<size; i++) { 792 rlen = PetscMax(rlen,range[i+1] - range[i]); 793 } 794 } else { 795 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 796 rlen = mat->m; 797 } 798 799 /* load up the local row counts */ 800 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 801 for (i=0; i<mat->m; i++) { 802 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 803 } 804 805 /* store the row lengths to the file */ 806 if (!rank) { 807 MPI_Status status; 808 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 809 for (i=1; i<size; i++) { 810 rlen = range[i+1] - range[i]; 811 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 812 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 813 } 814 } else { 815 ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 816 } 817 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 818 819 /* load up the local column indices */ 820 nzmax = nz; /* )th processor needs space a largest processor needs */ 821 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 822 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 823 cnt = 0; 824 for (i=0; i<mat->m; i++) { 825 for (j=B->i[i]; j<B->i[i+1]; j++) { 826 if ( (col = garray[B->j[j]]) > cstart) break; 827 column_indices[cnt++] = col; 828 } 829 for (k=A->i[i]; k<A->i[i+1]; k++) { 830 column_indices[cnt++] = A->j[k] + cstart; 831 } 832 for (; j<B->i[i+1]; j++) { 833 column_indices[cnt++] = garray[B->j[j]]; 834 } 835 } 836 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 837 838 /* store the column indices to the file */ 839 if (!rank) { 840 MPI_Status status; 841 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 842 for (i=1; i<size; i++) { 843 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 844 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 845 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 846 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 847 } 848 } else { 849 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 850 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 851 } 852 ierr = PetscFree(column_indices);CHKERRQ(ierr); 853 854 /* load up the local column values */ 855 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 856 cnt = 0; 857 for (i=0; i<mat->m; i++) { 858 for (j=B->i[i]; j<B->i[i+1]; j++) { 859 if ( garray[B->j[j]] > cstart) break; 860 column_values[cnt++] = B->a[j]; 861 } 862 for (k=A->i[i]; k<A->i[i+1]; k++) { 863 column_values[cnt++] = A->a[k]; 864 } 865 for (; j<B->i[i+1]; j++) { 866 column_values[cnt++] = B->a[j]; 867 } 868 } 869 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 870 871 /* store the column values to the file */ 872 if (!rank) { 873 MPI_Status status; 874 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 875 for (i=1; i<size; i++) { 876 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 877 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 878 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 879 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 880 } 881 } else { 882 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 883 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 884 } 885 ierr = PetscFree(column_values);CHKERRQ(ierr); 886 PetscFunctionReturn(0); 887 } 888 889 #undef __FUNCT__ 890 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 891 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 892 { 893 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 894 PetscErrorCode ierr; 895 PetscMPIInt rank = aij->rank,size = aij->size; 896 PetscTruth isdraw,iascii,flg,isbinary; 897 PetscViewer sviewer; 898 PetscViewerFormat format; 899 900 PetscFunctionBegin; 901 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 902 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 903 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 904 if (iascii) { 905 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 906 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 907 MatInfo info; 908 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 909 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 910 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 911 if (flg) { 912 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 913 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 914 } else { 915 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 916 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 917 } 918 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 919 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 920 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 921 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 922 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 923 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 924 PetscFunctionReturn(0); 925 } else if (format == PETSC_VIEWER_ASCII_INFO) { 926 PetscFunctionReturn(0); 927 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 928 PetscFunctionReturn(0); 929 } 930 } else if (isbinary) { 931 if (size == 1) { 932 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 933 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 934 } else { 935 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 936 } 937 PetscFunctionReturn(0); 938 } else if (isdraw) { 939 PetscDraw draw; 940 PetscTruth isnull; 941 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 942 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 943 } 944 945 if (size == 1) { 946 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 947 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 948 } else { 949 /* assemble the entire matrix onto first processor. */ 950 Mat A; 951 Mat_SeqAIJ *Aloc; 952 PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 953 PetscScalar *a; 954 955 if (!rank) { 956 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 957 } else { 958 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 959 } 960 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 961 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 962 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 963 PetscLogObjectParent(mat,A); 964 965 /* copy over the A part */ 966 Aloc = (Mat_SeqAIJ*)aij->A->data; 967 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 968 row = aij->rstart; 969 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 970 for (i=0; i<m; i++) { 971 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 972 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 973 } 974 aj = Aloc->j; 975 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 976 977 /* copy over the B part */ 978 Aloc = (Mat_SeqAIJ*)aij->B->data; 979 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 980 row = aij->rstart; 981 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 982 ct = cols; 983 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 984 for (i=0; i<m; i++) { 985 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 986 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 987 } 988 ierr = PetscFree(ct);CHKERRQ(ierr); 989 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 990 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 991 /* 992 Everyone has to call to draw the matrix since the graphics waits are 993 synchronized across all processors that share the PetscDraw object 994 */ 995 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 996 if (!rank) { 997 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 998 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 999 } 1000 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1001 ierr = MatDestroy(A);CHKERRQ(ierr); 1002 } 1003 PetscFunctionReturn(0); 1004 } 1005 1006 #undef __FUNCT__ 1007 #define __FUNCT__ "MatView_MPIAIJ" 1008 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1009 { 1010 PetscErrorCode ierr; 1011 PetscTruth iascii,isdraw,issocket,isbinary; 1012 1013 PetscFunctionBegin; 1014 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1015 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1016 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1017 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1018 if (iascii || isdraw || isbinary || issocket) { 1019 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1020 } else { 1021 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1022 } 1023 PetscFunctionReturn(0); 1024 } 1025 1026 1027 1028 #undef __FUNCT__ 1029 #define __FUNCT__ "MatRelax_MPIAIJ" 1030 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1031 { 1032 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1033 PetscErrorCode ierr; 1034 Vec bb1; 1035 PetscScalar mone=-1.0; 1036 1037 PetscFunctionBegin; 1038 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1039 1040 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1041 1042 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1043 if (flag & SOR_ZERO_INITIAL_GUESS) { 1044 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1045 its--; 1046 } 1047 1048 while (its--) { 1049 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1050 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1051 1052 /* update rhs: bb1 = bb - B*x */ 1053 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1054 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1055 1056 /* local sweep */ 1057 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1058 CHKERRQ(ierr); 1059 } 1060 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1061 if (flag & SOR_ZERO_INITIAL_GUESS) { 1062 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1063 its--; 1064 } 1065 while (its--) { 1066 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1067 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1068 1069 /* update rhs: bb1 = bb - B*x */ 1070 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1071 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1072 1073 /* local sweep */ 1074 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1075 CHKERRQ(ierr); 1076 } 1077 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1078 if (flag & SOR_ZERO_INITIAL_GUESS) { 1079 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1080 its--; 1081 } 1082 while (its--) { 1083 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1084 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1085 1086 /* update rhs: bb1 = bb - B*x */ 1087 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1088 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1089 1090 /* local sweep */ 1091 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1092 CHKERRQ(ierr); 1093 } 1094 } else { 1095 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1096 } 1097 1098 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1104 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1105 { 1106 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1107 Mat A = mat->A,B = mat->B; 1108 PetscErrorCode ierr; 1109 PetscReal isend[5],irecv[5]; 1110 1111 PetscFunctionBegin; 1112 info->block_size = 1.0; 1113 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1114 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1115 isend[3] = info->memory; isend[4] = info->mallocs; 1116 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1117 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1118 isend[3] += info->memory; isend[4] += info->mallocs; 1119 if (flag == MAT_LOCAL) { 1120 info->nz_used = isend[0]; 1121 info->nz_allocated = isend[1]; 1122 info->nz_unneeded = isend[2]; 1123 info->memory = isend[3]; 1124 info->mallocs = isend[4]; 1125 } else if (flag == MAT_GLOBAL_MAX) { 1126 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1127 info->nz_used = irecv[0]; 1128 info->nz_allocated = irecv[1]; 1129 info->nz_unneeded = irecv[2]; 1130 info->memory = irecv[3]; 1131 info->mallocs = irecv[4]; 1132 } else if (flag == MAT_GLOBAL_SUM) { 1133 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1134 info->nz_used = irecv[0]; 1135 info->nz_allocated = irecv[1]; 1136 info->nz_unneeded = irecv[2]; 1137 info->memory = irecv[3]; 1138 info->mallocs = irecv[4]; 1139 } 1140 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1141 info->fill_ratio_needed = 0; 1142 info->factor_mallocs = 0; 1143 info->rows_global = (double)matin->M; 1144 info->columns_global = (double)matin->N; 1145 info->rows_local = (double)matin->m; 1146 info->columns_local = (double)matin->N; 1147 1148 PetscFunctionReturn(0); 1149 } 1150 1151 #undef __FUNCT__ 1152 #define __FUNCT__ "MatSetOption_MPIAIJ" 1153 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1154 { 1155 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1156 PetscErrorCode ierr; 1157 1158 PetscFunctionBegin; 1159 switch (op) { 1160 case MAT_NO_NEW_NONZERO_LOCATIONS: 1161 case MAT_YES_NEW_NONZERO_LOCATIONS: 1162 case MAT_COLUMNS_UNSORTED: 1163 case MAT_COLUMNS_SORTED: 1164 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1165 case MAT_KEEP_ZEROED_ROWS: 1166 case MAT_NEW_NONZERO_LOCATION_ERR: 1167 case MAT_USE_INODES: 1168 case MAT_DO_NOT_USE_INODES: 1169 case MAT_IGNORE_ZERO_ENTRIES: 1170 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1171 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1172 break; 1173 case MAT_ROW_ORIENTED: 1174 a->roworiented = PETSC_TRUE; 1175 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1176 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1177 break; 1178 case MAT_ROWS_SORTED: 1179 case MAT_ROWS_UNSORTED: 1180 case MAT_YES_NEW_DIAGONALS: 1181 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1182 break; 1183 case MAT_COLUMN_ORIENTED: 1184 a->roworiented = PETSC_FALSE; 1185 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1186 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1187 break; 1188 case MAT_IGNORE_OFF_PROC_ENTRIES: 1189 a->donotstash = PETSC_TRUE; 1190 break; 1191 case MAT_NO_NEW_DIAGONALS: 1192 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1193 case MAT_SYMMETRIC: 1194 case MAT_STRUCTURALLY_SYMMETRIC: 1195 case MAT_HERMITIAN: 1196 case MAT_SYMMETRY_ETERNAL: 1197 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1198 break; 1199 case MAT_NOT_SYMMETRIC: 1200 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1201 case MAT_NOT_HERMITIAN: 1202 case MAT_NOT_SYMMETRY_ETERNAL: 1203 break; 1204 default: 1205 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1206 } 1207 PetscFunctionReturn(0); 1208 } 1209 1210 #undef __FUNCT__ 1211 #define __FUNCT__ "MatGetRow_MPIAIJ" 1212 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1213 { 1214 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1215 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1216 PetscErrorCode ierr; 1217 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1218 PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1219 PetscInt *cmap,*idx_p; 1220 1221 PetscFunctionBegin; 1222 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1223 mat->getrowactive = PETSC_TRUE; 1224 1225 if (!mat->rowvalues && (idx || v)) { 1226 /* 1227 allocate enough space to hold information from the longest row. 1228 */ 1229 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1230 PetscInt max = 1,tmp; 1231 for (i=0; i<matin->m; i++) { 1232 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1233 if (max < tmp) { max = tmp; } 1234 } 1235 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1236 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1237 } 1238 1239 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1240 lrow = row - rstart; 1241 1242 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1243 if (!v) {pvA = 0; pvB = 0;} 1244 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1245 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1246 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1247 nztot = nzA + nzB; 1248 1249 cmap = mat->garray; 1250 if (v || idx) { 1251 if (nztot) { 1252 /* Sort by increasing column numbers, assuming A and B already sorted */ 1253 PetscInt imark = -1; 1254 if (v) { 1255 *v = v_p = mat->rowvalues; 1256 for (i=0; i<nzB; i++) { 1257 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1258 else break; 1259 } 1260 imark = i; 1261 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1262 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1263 } 1264 if (idx) { 1265 *idx = idx_p = mat->rowindices; 1266 if (imark > -1) { 1267 for (i=0; i<imark; i++) { 1268 idx_p[i] = cmap[cworkB[i]]; 1269 } 1270 } else { 1271 for (i=0; i<nzB; i++) { 1272 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1273 else break; 1274 } 1275 imark = i; 1276 } 1277 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1278 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1279 } 1280 } else { 1281 if (idx) *idx = 0; 1282 if (v) *v = 0; 1283 } 1284 } 1285 *nz = nztot; 1286 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1287 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1288 PetscFunctionReturn(0); 1289 } 1290 1291 #undef __FUNCT__ 1292 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1293 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1294 { 1295 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1296 1297 PetscFunctionBegin; 1298 if (aij->getrowactive == PETSC_FALSE) { 1299 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1300 } 1301 aij->getrowactive = PETSC_FALSE; 1302 PetscFunctionReturn(0); 1303 } 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatNorm_MPIAIJ" 1307 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1308 { 1309 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1310 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1311 PetscErrorCode ierr; 1312 PetscInt i,j,cstart = aij->cstart; 1313 PetscReal sum = 0.0; 1314 PetscScalar *v; 1315 1316 PetscFunctionBegin; 1317 if (aij->size == 1) { 1318 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1319 } else { 1320 if (type == NORM_FROBENIUS) { 1321 v = amat->a; 1322 for (i=0; i<amat->nz; i++) { 1323 #if defined(PETSC_USE_COMPLEX) 1324 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1325 #else 1326 sum += (*v)*(*v); v++; 1327 #endif 1328 } 1329 v = bmat->a; 1330 for (i=0; i<bmat->nz; i++) { 1331 #if defined(PETSC_USE_COMPLEX) 1332 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1333 #else 1334 sum += (*v)*(*v); v++; 1335 #endif 1336 } 1337 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1338 *norm = sqrt(*norm); 1339 } else if (type == NORM_1) { /* max column norm */ 1340 PetscReal *tmp,*tmp2; 1341 PetscInt *jj,*garray = aij->garray; 1342 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1343 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1344 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1345 *norm = 0.0; 1346 v = amat->a; jj = amat->j; 1347 for (j=0; j<amat->nz; j++) { 1348 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1349 } 1350 v = bmat->a; jj = bmat->j; 1351 for (j=0; j<bmat->nz; j++) { 1352 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1353 } 1354 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1355 for (j=0; j<mat->N; j++) { 1356 if (tmp2[j] > *norm) *norm = tmp2[j]; 1357 } 1358 ierr = PetscFree(tmp);CHKERRQ(ierr); 1359 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1360 } else if (type == NORM_INFINITY) { /* max row norm */ 1361 PetscReal ntemp = 0.0; 1362 for (j=0; j<aij->A->m; j++) { 1363 v = amat->a + amat->i[j]; 1364 sum = 0.0; 1365 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1366 sum += PetscAbsScalar(*v); v++; 1367 } 1368 v = bmat->a + bmat->i[j]; 1369 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1370 sum += PetscAbsScalar(*v); v++; 1371 } 1372 if (sum > ntemp) ntemp = sum; 1373 } 1374 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1375 } else { 1376 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1377 } 1378 } 1379 PetscFunctionReturn(0); 1380 } 1381 1382 #undef __FUNCT__ 1383 #define __FUNCT__ "MatTranspose_MPIAIJ" 1384 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1385 { 1386 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1387 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1388 PetscErrorCode ierr; 1389 PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1390 Mat B; 1391 PetscScalar *array; 1392 1393 PetscFunctionBegin; 1394 if (!matout && M != N) { 1395 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1396 } 1397 1398 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1399 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1400 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1401 1402 /* copy over the A part */ 1403 Aloc = (Mat_SeqAIJ*)a->A->data; 1404 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1405 row = a->rstart; 1406 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1407 for (i=0; i<m; i++) { 1408 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1409 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1410 } 1411 aj = Aloc->j; 1412 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1413 1414 /* copy over the B part */ 1415 Aloc = (Mat_SeqAIJ*)a->B->data; 1416 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1417 row = a->rstart; 1418 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1419 ct = cols; 1420 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1421 for (i=0; i<m; i++) { 1422 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1423 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1424 } 1425 ierr = PetscFree(ct);CHKERRQ(ierr); 1426 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1427 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1428 if (matout) { 1429 *matout = B; 1430 } else { 1431 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1432 } 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNCT__ 1437 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1438 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1439 { 1440 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1441 Mat a = aij->A,b = aij->B; 1442 PetscErrorCode ierr; 1443 PetscInt s1,s2,s3; 1444 1445 PetscFunctionBegin; 1446 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1447 if (rr) { 1448 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1449 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1450 /* Overlap communication with computation. */ 1451 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1452 } 1453 if (ll) { 1454 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1455 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1456 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1457 } 1458 /* scale the diagonal block */ 1459 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1460 1461 if (rr) { 1462 /* Do a scatter end and then right scale the off-diagonal block */ 1463 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1464 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1465 } 1466 1467 PetscFunctionReturn(0); 1468 } 1469 1470 1471 #undef __FUNCT__ 1472 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1473 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1474 { 1475 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1476 PetscErrorCode ierr; 1477 1478 PetscFunctionBegin; 1479 if (!a->rank) { 1480 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1481 } 1482 PetscFunctionReturn(0); 1483 } 1484 1485 #undef __FUNCT__ 1486 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1487 PetscErrorCode MatGetBlockSize_MPIAIJ(Mat A,PetscInt *bs) 1488 { 1489 PetscFunctionBegin; 1490 *bs = 1; 1491 PetscFunctionReturn(0); 1492 } 1493 #undef __FUNCT__ 1494 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1495 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1496 { 1497 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1498 PetscErrorCode ierr; 1499 1500 PetscFunctionBegin; 1501 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1502 PetscFunctionReturn(0); 1503 } 1504 1505 #undef __FUNCT__ 1506 #define __FUNCT__ "MatEqual_MPIAIJ" 1507 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1508 { 1509 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1510 Mat a,b,c,d; 1511 PetscTruth flg; 1512 PetscErrorCode ierr; 1513 1514 PetscFunctionBegin; 1515 a = matA->A; b = matA->B; 1516 c = matB->A; d = matB->B; 1517 1518 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1519 if (flg == PETSC_TRUE) { 1520 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1521 } 1522 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1523 PetscFunctionReturn(0); 1524 } 1525 1526 #undef __FUNCT__ 1527 #define __FUNCT__ "MatCopy_MPIAIJ" 1528 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1529 { 1530 PetscErrorCode ierr; 1531 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1532 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1533 1534 PetscFunctionBegin; 1535 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1536 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1537 /* because of the column compression in the off-processor part of the matrix a->B, 1538 the number of columns in a->B and b->B may be different, hence we cannot call 1539 the MatCopy() directly on the two parts. If need be, we can provide a more 1540 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1541 then copying the submatrices */ 1542 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1543 } else { 1544 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1545 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1546 } 1547 PetscFunctionReturn(0); 1548 } 1549 1550 #undef __FUNCT__ 1551 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1552 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1553 { 1554 PetscErrorCode ierr; 1555 1556 PetscFunctionBegin; 1557 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1558 PetscFunctionReturn(0); 1559 } 1560 1561 #include "petscblaslapack.h" 1562 #undef __FUNCT__ 1563 #define __FUNCT__ "MatAXPY_MPIAIJ" 1564 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1565 { 1566 PetscErrorCode ierr; 1567 PetscInt i; 1568 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1569 PetscBLASInt bnz,one=1; 1570 Mat_SeqAIJ *x,*y; 1571 1572 PetscFunctionBegin; 1573 if (str == SAME_NONZERO_PATTERN) { 1574 x = (Mat_SeqAIJ *)xx->A->data; 1575 y = (Mat_SeqAIJ *)yy->A->data; 1576 bnz = (PetscBLASInt)x->nz; 1577 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1578 x = (Mat_SeqAIJ *)xx->B->data; 1579 y = (Mat_SeqAIJ *)yy->B->data; 1580 bnz = (PetscBLASInt)x->nz; 1581 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1582 } else if (str == SUBSET_NONZERO_PATTERN) { 1583 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1584 1585 x = (Mat_SeqAIJ *)xx->B->data; 1586 y = (Mat_SeqAIJ *)yy->B->data; 1587 if (y->xtoy && y->XtoY != xx->B) { 1588 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1589 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1590 } 1591 if (!y->xtoy) { /* get xtoy */ 1592 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1593 y->XtoY = xx->B; 1594 } 1595 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1596 } else { 1597 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1598 } 1599 PetscFunctionReturn(0); 1600 } 1601 1602 /* -------------------------------------------------------------------*/ 1603 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1604 MatGetRow_MPIAIJ, 1605 MatRestoreRow_MPIAIJ, 1606 MatMult_MPIAIJ, 1607 /* 4*/ MatMultAdd_MPIAIJ, 1608 MatMultTranspose_MPIAIJ, 1609 MatMultTransposeAdd_MPIAIJ, 1610 0, 1611 0, 1612 0, 1613 /*10*/ 0, 1614 0, 1615 0, 1616 MatRelax_MPIAIJ, 1617 MatTranspose_MPIAIJ, 1618 /*15*/ MatGetInfo_MPIAIJ, 1619 MatEqual_MPIAIJ, 1620 MatGetDiagonal_MPIAIJ, 1621 MatDiagonalScale_MPIAIJ, 1622 MatNorm_MPIAIJ, 1623 /*20*/ MatAssemblyBegin_MPIAIJ, 1624 MatAssemblyEnd_MPIAIJ, 1625 0, 1626 MatSetOption_MPIAIJ, 1627 MatZeroEntries_MPIAIJ, 1628 /*25*/ MatZeroRows_MPIAIJ, 1629 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_64BIT_INT) 1630 MatLUFactorSymbolic_MPIAIJ_TFS, 1631 #else 1632 0, 1633 #endif 1634 0, 1635 0, 1636 0, 1637 /*30*/ MatSetUpPreallocation_MPIAIJ, 1638 0, 1639 0, 1640 0, 1641 0, 1642 /*35*/ MatDuplicate_MPIAIJ, 1643 0, 1644 0, 1645 0, 1646 0, 1647 /*40*/ MatAXPY_MPIAIJ, 1648 MatGetSubMatrices_MPIAIJ, 1649 MatIncreaseOverlap_MPIAIJ, 1650 MatGetValues_MPIAIJ, 1651 MatCopy_MPIAIJ, 1652 /*45*/ MatPrintHelp_MPIAIJ, 1653 MatScale_MPIAIJ, 1654 0, 1655 0, 1656 0, 1657 /*50*/ MatGetBlockSize_MPIAIJ, 1658 0, 1659 0, 1660 0, 1661 0, 1662 /*55*/ MatFDColoringCreate_MPIAIJ, 1663 0, 1664 MatSetUnfactored_MPIAIJ, 1665 0, 1666 0, 1667 /*60*/ MatGetSubMatrix_MPIAIJ, 1668 MatDestroy_MPIAIJ, 1669 MatView_MPIAIJ, 1670 MatGetPetscMaps_Petsc, 1671 0, 1672 /*65*/ 0, 1673 0, 1674 0, 1675 0, 1676 0, 1677 /*70*/ 0, 1678 0, 1679 MatSetColoring_MPIAIJ, 1680 MatSetValuesAdic_MPIAIJ, 1681 MatSetValuesAdifor_MPIAIJ, 1682 /*75*/ 0, 1683 0, 1684 0, 1685 0, 1686 0, 1687 /*80*/ 0, 1688 0, 1689 0, 1690 0, 1691 /*84*/ MatLoad_MPIAIJ, 1692 0, 1693 0, 1694 0, 1695 0, 1696 0, 1697 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1698 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1699 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1700 MatPtAP_MPIAIJ_MPIAIJ, 1701 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1702 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 1703 0, 1704 0, 1705 0}; 1706 1707 /* ----------------------------------------------------------------------------------------*/ 1708 1709 EXTERN_C_BEGIN 1710 #undef __FUNCT__ 1711 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1712 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 1713 { 1714 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1715 PetscErrorCode ierr; 1716 1717 PetscFunctionBegin; 1718 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1719 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1720 PetscFunctionReturn(0); 1721 } 1722 EXTERN_C_END 1723 1724 EXTERN_C_BEGIN 1725 #undef __FUNCT__ 1726 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1727 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 1728 { 1729 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1730 PetscErrorCode ierr; 1731 1732 PetscFunctionBegin; 1733 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1734 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1735 PetscFunctionReturn(0); 1736 } 1737 EXTERN_C_END 1738 1739 #include "petscpc.h" 1740 EXTERN_C_BEGIN 1741 #undef __FUNCT__ 1742 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1743 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1744 { 1745 Mat_MPIAIJ *b; 1746 PetscErrorCode ierr; 1747 PetscInt i; 1748 1749 PetscFunctionBegin; 1750 B->preallocated = PETSC_TRUE; 1751 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1752 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1753 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1754 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1755 if (d_nnz) { 1756 for (i=0; i<B->m; i++) { 1757 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1758 } 1759 } 1760 if (o_nnz) { 1761 for (i=0; i<B->m; i++) { 1762 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1763 } 1764 } 1765 b = (Mat_MPIAIJ*)B->data; 1766 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1767 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1768 1769 PetscFunctionReturn(0); 1770 } 1771 EXTERN_C_END 1772 1773 /*MC 1774 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 1775 1776 Options Database Keys: 1777 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 1778 1779 Level: beginner 1780 1781 .seealso: MatCreateMPIAIJ 1782 M*/ 1783 1784 EXTERN_C_BEGIN 1785 #undef __FUNCT__ 1786 #define __FUNCT__ "MatCreate_MPIAIJ" 1787 PetscErrorCode MatCreate_MPIAIJ(Mat B) 1788 { 1789 Mat_MPIAIJ *b; 1790 PetscErrorCode ierr; 1791 PetscInt i; 1792 PetscMPIInt size; 1793 1794 PetscFunctionBegin; 1795 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1796 1797 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1798 B->data = (void*)b; 1799 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1800 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1801 B->factor = 0; 1802 B->assembled = PETSC_FALSE; 1803 B->mapping = 0; 1804 1805 B->insertmode = NOT_SET_VALUES; 1806 b->size = size; 1807 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1808 1809 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1810 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1811 1812 /* the information in the maps duplicates the information computed below, eventually 1813 we should remove the duplicate information that is not contained in the maps */ 1814 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1815 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1816 1817 /* build local table of row and column ownerships */ 1818 ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 1819 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1820 b->cowners = b->rowners + b->size + 2; 1821 ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 1822 b->rowners[0] = 0; 1823 for (i=2; i<=b->size; i++) { 1824 b->rowners[i] += b->rowners[i-1]; 1825 } 1826 b->rstart = b->rowners[b->rank]; 1827 b->rend = b->rowners[b->rank+1]; 1828 ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 1829 b->cowners[0] = 0; 1830 for (i=2; i<=b->size; i++) { 1831 b->cowners[i] += b->cowners[i-1]; 1832 } 1833 b->cstart = b->cowners[b->rank]; 1834 b->cend = b->cowners[b->rank+1]; 1835 1836 /* build cache for off array entries formed */ 1837 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1838 b->donotstash = PETSC_FALSE; 1839 b->colmap = 0; 1840 b->garray = 0; 1841 b->roworiented = PETSC_TRUE; 1842 1843 /* stuff used for matrix vector multiply */ 1844 b->lvec = PETSC_NULL; 1845 b->Mvctx = PETSC_NULL; 1846 1847 /* stuff for MatGetRow() */ 1848 b->rowindices = 0; 1849 b->rowvalues = 0; 1850 b->getrowactive = PETSC_FALSE; 1851 1852 /* Explicitly create 2 MATSEQAIJ matrices. */ 1853 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 1854 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 1855 PetscLogObjectParent(B,b->A); 1856 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 1857 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 1858 PetscLogObjectParent(B,b->B); 1859 1860 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1861 "MatStoreValues_MPIAIJ", 1862 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1863 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1864 "MatRetrieveValues_MPIAIJ", 1865 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1866 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1867 "MatGetDiagonalBlock_MPIAIJ", 1868 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1869 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 1870 "MatIsTranspose_MPIAIJ", 1871 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 1872 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 1873 "MatMPIAIJSetPreallocation_MPIAIJ", 1874 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 1875 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 1876 "MatDiagonalScaleLocal_MPIAIJ", 1877 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 1878 PetscFunctionReturn(0); 1879 } 1880 EXTERN_C_END 1881 1882 /*MC 1883 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1884 1885 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1886 and MATMPIAIJ otherwise. As a result, for single process communicators, 1887 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 1888 for communicators controlling multiple processes. It is recommended that you call both of 1889 the above preallocation routines for simplicity. 1890 1891 Options Database Keys: 1892 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 1893 1894 Level: beginner 1895 1896 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ 1897 M*/ 1898 1899 EXTERN_C_BEGIN 1900 #undef __FUNCT__ 1901 #define __FUNCT__ "MatCreate_AIJ" 1902 PetscErrorCode MatCreate_AIJ(Mat A) 1903 { 1904 PetscErrorCode ierr; 1905 PetscMPIInt size; 1906 1907 PetscFunctionBegin; 1908 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr); 1909 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1910 if (size == 1) { 1911 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1912 } else { 1913 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1914 } 1915 PetscFunctionReturn(0); 1916 } 1917 EXTERN_C_END 1918 1919 #undef __FUNCT__ 1920 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1921 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1922 { 1923 Mat mat; 1924 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1925 PetscErrorCode ierr; 1926 1927 PetscFunctionBegin; 1928 *newmat = 0; 1929 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1930 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1931 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1932 a = (Mat_MPIAIJ*)mat->data; 1933 1934 mat->factor = matin->factor; 1935 mat->assembled = PETSC_TRUE; 1936 mat->insertmode = NOT_SET_VALUES; 1937 mat->preallocated = PETSC_TRUE; 1938 1939 a->rstart = oldmat->rstart; 1940 a->rend = oldmat->rend; 1941 a->cstart = oldmat->cstart; 1942 a->cend = oldmat->cend; 1943 a->size = oldmat->size; 1944 a->rank = oldmat->rank; 1945 a->donotstash = oldmat->donotstash; 1946 a->roworiented = oldmat->roworiented; 1947 a->rowindices = 0; 1948 a->rowvalues = 0; 1949 a->getrowactive = PETSC_FALSE; 1950 1951 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1952 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1953 if (oldmat->colmap) { 1954 #if defined (PETSC_USE_CTABLE) 1955 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1956 #else 1957 ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1958 PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt)); 1959 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1960 #endif 1961 } else a->colmap = 0; 1962 if (oldmat->garray) { 1963 PetscInt len; 1964 len = oldmat->B->n; 1965 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1966 PetscLogObjectMemory(mat,len*sizeof(PetscInt)); 1967 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1968 } else a->garray = 0; 1969 1970 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1971 PetscLogObjectParent(mat,a->lvec); 1972 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1973 PetscLogObjectParent(mat,a->Mvctx); 1974 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1975 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1976 PetscLogObjectParent(mat,a->A); 1977 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1978 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1979 PetscLogObjectParent(mat,a->B); 1980 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1981 *newmat = mat; 1982 PetscFunctionReturn(0); 1983 } 1984 1985 #include "petscsys.h" 1986 1987 #undef __FUNCT__ 1988 #define __FUNCT__ "MatLoad_MPIAIJ" 1989 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1990 { 1991 Mat A; 1992 PetscScalar *vals,*svals; 1993 MPI_Comm comm = ((PetscObject)viewer)->comm; 1994 MPI_Status status; 1995 PetscErrorCode ierr; 1996 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*rowners,maxnz,mm; 1997 PetscInt i,nz,j,rstart,rend; 1998 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1999 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 2000 PetscInt cend,cstart,n; 2001 int fd; 2002 2003 PetscFunctionBegin; 2004 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2005 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2006 if (!rank) { 2007 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2008 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2009 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2010 if (header[3] < 0) { 2011 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 2012 } 2013 } 2014 2015 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2016 M = header[1]; N = header[2]; 2017 /* determine ownership of all rows */ 2018 m = M/size + ((M % size) > rank); 2019 mm = (PetscMPIInt) m; 2020 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2021 ierr = MPI_Allgather(&mm,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2022 rowners[0] = 0; 2023 for (i=2; i<=size; i++) { 2024 rowners[i] += rowners[i-1]; 2025 } 2026 rstart = rowners[rank]; 2027 rend = rowners[rank+1]; 2028 2029 /* distribute row lengths to all processors */ 2030 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr); 2031 offlens = ourlens + (rend-rstart); 2032 if (!rank) { 2033 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2034 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2035 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2036 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 2037 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 2038 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2039 } else { 2040 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 2041 } 2042 2043 if (!rank) { 2044 /* calculate the number of nonzeros on each processor */ 2045 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2046 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2047 for (i=0; i<size; i++) { 2048 for (j=rowners[i]; j< rowners[i+1]; j++) { 2049 procsnz[i] += rowlengths[j]; 2050 } 2051 } 2052 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2053 2054 /* determine max buffer needed and allocate it */ 2055 maxnz = 0; 2056 for (i=0; i<size; i++) { 2057 maxnz = PetscMax(maxnz,procsnz[i]); 2058 } 2059 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2060 2061 /* read in my part of the matrix column indices */ 2062 nz = procsnz[0]; 2063 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2064 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2065 2066 /* read in every one elses and ship off */ 2067 for (i=1; i<size; i++) { 2068 nz = procsnz[i]; 2069 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2070 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2071 } 2072 ierr = PetscFree(cols);CHKERRQ(ierr); 2073 } else { 2074 /* determine buffer space needed for message */ 2075 nz = 0; 2076 for (i=0; i<m; i++) { 2077 nz += ourlens[i]; 2078 } 2079 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2080 2081 /* receive message of column indices*/ 2082 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2083 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2084 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2085 } 2086 2087 /* determine column ownership if matrix is not square */ 2088 if (N != M) { 2089 n = N/size + ((N % size) > rank); 2090 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2091 cstart = cend - n; 2092 } else { 2093 cstart = rstart; 2094 cend = rend; 2095 n = cend - cstart; 2096 } 2097 2098 /* loop over local rows, determining number of off diagonal entries */ 2099 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2100 jj = 0; 2101 for (i=0; i<m; i++) { 2102 for (j=0; j<ourlens[i]; j++) { 2103 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2104 jj++; 2105 } 2106 } 2107 2108 /* create our matrix */ 2109 for (i=0; i<m; i++) { 2110 ourlens[i] -= offlens[i]; 2111 } 2112 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2113 ierr = MatSetType(A,type);CHKERRQ(ierr); 2114 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2115 2116 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2117 for (i=0; i<m; i++) { 2118 ourlens[i] += offlens[i]; 2119 } 2120 2121 if (!rank) { 2122 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2123 2124 /* read in my part of the matrix numerical values */ 2125 nz = procsnz[0]; 2126 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2127 2128 /* insert into matrix */ 2129 jj = rstart; 2130 smycols = mycols; 2131 svals = vals; 2132 for (i=0; i<m; i++) { 2133 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2134 smycols += ourlens[i]; 2135 svals += ourlens[i]; 2136 jj++; 2137 } 2138 2139 /* read in other processors and ship out */ 2140 for (i=1; i<size; i++) { 2141 nz = procsnz[i]; 2142 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2143 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2144 } 2145 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2146 } else { 2147 /* receive numeric values */ 2148 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2149 2150 /* receive message of values*/ 2151 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2152 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2153 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2154 2155 /* insert into matrix */ 2156 jj = rstart; 2157 smycols = mycols; 2158 svals = vals; 2159 for (i=0; i<m; i++) { 2160 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2161 smycols += ourlens[i]; 2162 svals += ourlens[i]; 2163 jj++; 2164 } 2165 } 2166 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2167 ierr = PetscFree(vals);CHKERRQ(ierr); 2168 ierr = PetscFree(mycols);CHKERRQ(ierr); 2169 ierr = PetscFree(rowners);CHKERRQ(ierr); 2170 2171 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2172 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2173 *newmat = A; 2174 PetscFunctionReturn(0); 2175 } 2176 2177 #undef __FUNCT__ 2178 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2179 /* 2180 Not great since it makes two copies of the submatrix, first an SeqAIJ 2181 in local and then by concatenating the local matrices the end result. 2182 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2183 */ 2184 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2185 { 2186 PetscErrorCode ierr; 2187 PetscMPIInt rank,size; 2188 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2189 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2190 Mat *local,M,Mreuse; 2191 PetscScalar *vwork,*aa; 2192 MPI_Comm comm = mat->comm; 2193 Mat_SeqAIJ *aij; 2194 2195 2196 PetscFunctionBegin; 2197 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2198 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2199 2200 if (call == MAT_REUSE_MATRIX) { 2201 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2202 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2203 local = &Mreuse; 2204 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2205 } else { 2206 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2207 Mreuse = *local; 2208 ierr = PetscFree(local);CHKERRQ(ierr); 2209 } 2210 2211 /* 2212 m - number of local rows 2213 n - number of columns (same on all processors) 2214 rstart - first row in new global matrix generated 2215 */ 2216 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2217 if (call == MAT_INITIAL_MATRIX) { 2218 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2219 ii = aij->i; 2220 jj = aij->j; 2221 2222 /* 2223 Determine the number of non-zeros in the diagonal and off-diagonal 2224 portions of the matrix in order to do correct preallocation 2225 */ 2226 2227 /* first get start and end of "diagonal" columns */ 2228 if (csize == PETSC_DECIDE) { 2229 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2230 if (mglobal == n) { /* square matrix */ 2231 nlocal = m; 2232 } else { 2233 nlocal = n/size + ((n % size) > rank); 2234 } 2235 } else { 2236 nlocal = csize; 2237 } 2238 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2239 rstart = rend - nlocal; 2240 if (rank == size - 1 && rend != n) { 2241 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2242 } 2243 2244 /* next, compute all the lengths */ 2245 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2246 olens = dlens + m; 2247 for (i=0; i<m; i++) { 2248 jend = ii[i+1] - ii[i]; 2249 olen = 0; 2250 dlen = 0; 2251 for (j=0; j<jend; j++) { 2252 if (*jj < rstart || *jj >= rend) olen++; 2253 else dlen++; 2254 jj++; 2255 } 2256 olens[i] = olen; 2257 dlens[i] = dlen; 2258 } 2259 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2260 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2261 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2262 ierr = PetscFree(dlens);CHKERRQ(ierr); 2263 } else { 2264 PetscInt ml,nl; 2265 2266 M = *newmat; 2267 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2268 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2269 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2270 /* 2271 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2272 rather than the slower MatSetValues(). 2273 */ 2274 M->was_assembled = PETSC_TRUE; 2275 M->assembled = PETSC_FALSE; 2276 } 2277 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2278 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2279 ii = aij->i; 2280 jj = aij->j; 2281 aa = aij->a; 2282 for (i=0; i<m; i++) { 2283 row = rstart + i; 2284 nz = ii[i+1] - ii[i]; 2285 cwork = jj; jj += nz; 2286 vwork = aa; aa += nz; 2287 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2288 } 2289 2290 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2291 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2292 *newmat = M; 2293 2294 /* save submatrix used in processor for next request */ 2295 if (call == MAT_INITIAL_MATRIX) { 2296 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2297 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2298 } 2299 2300 PetscFunctionReturn(0); 2301 } 2302 2303 #undef __FUNCT__ 2304 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2305 /*@C 2306 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2307 (the default parallel PETSc format). For good matrix assembly performance 2308 the user should preallocate the matrix storage by setting the parameters 2309 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2310 performance can be increased by more than a factor of 50. 2311 2312 Collective on MPI_Comm 2313 2314 Input Parameters: 2315 + A - the matrix 2316 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2317 (same value is used for all local rows) 2318 . d_nnz - array containing the number of nonzeros in the various rows of the 2319 DIAGONAL portion of the local submatrix (possibly different for each row) 2320 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2321 The size of this array is equal to the number of local rows, i.e 'm'. 2322 You must leave room for the diagonal entry even if it is zero. 2323 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2324 submatrix (same value is used for all local rows). 2325 - o_nnz - array containing the number of nonzeros in the various rows of the 2326 OFF-DIAGONAL portion of the local submatrix (possibly different for 2327 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2328 structure. The size of this array is equal to the number 2329 of local rows, i.e 'm'. 2330 2331 The AIJ format (also called the Yale sparse matrix format or 2332 compressed row storage), is fully compatible with standard Fortran 77 2333 storage. That is, the stored row and column indices can begin at 2334 either one (as in Fortran) or zero. See the users manual for details. 2335 2336 The user MUST specify either the local or global matrix dimensions 2337 (possibly both). 2338 2339 The parallel matrix is partitioned such that the first m0 rows belong to 2340 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2341 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2342 2343 The DIAGONAL portion of the local submatrix of a processor can be defined 2344 as the submatrix which is obtained by extraction the part corresponding 2345 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2346 first row that belongs to the processor, and r2 is the last row belonging 2347 to the this processor. This is a square mxm matrix. The remaining portion 2348 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2349 2350 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2351 2352 By default, this format uses inodes (identical nodes) when possible. 2353 We search for consecutive rows with the same nonzero structure, thereby 2354 reusing matrix information to achieve increased efficiency. 2355 2356 Options Database Keys: 2357 + -mat_aij_no_inode - Do not use inodes 2358 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2359 - -mat_aij_oneindex - Internally use indexing starting at 1 2360 rather than 0. Note that when calling MatSetValues(), 2361 the user still MUST index entries starting at 0! 2362 2363 Example usage: 2364 2365 Consider the following 8x8 matrix with 34 non-zero values, that is 2366 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2367 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2368 as follows: 2369 2370 .vb 2371 1 2 0 | 0 3 0 | 0 4 2372 Proc0 0 5 6 | 7 0 0 | 8 0 2373 9 0 10 | 11 0 0 | 12 0 2374 ------------------------------------- 2375 13 0 14 | 15 16 17 | 0 0 2376 Proc1 0 18 0 | 19 20 21 | 0 0 2377 0 0 0 | 22 23 0 | 24 0 2378 ------------------------------------- 2379 Proc2 25 26 27 | 0 0 28 | 29 0 2380 30 0 0 | 31 32 33 | 0 34 2381 .ve 2382 2383 This can be represented as a collection of submatrices as: 2384 2385 .vb 2386 A B C 2387 D E F 2388 G H I 2389 .ve 2390 2391 Where the submatrices A,B,C are owned by proc0, D,E,F are 2392 owned by proc1, G,H,I are owned by proc2. 2393 2394 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2395 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2396 The 'M','N' parameters are 8,8, and have the same values on all procs. 2397 2398 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2399 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2400 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2401 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2402 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2403 matrix, ans [DF] as another SeqAIJ matrix. 2404 2405 When d_nz, o_nz parameters are specified, d_nz storage elements are 2406 allocated for every row of the local diagonal submatrix, and o_nz 2407 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2408 One way to choose d_nz and o_nz is to use the max nonzerors per local 2409 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2410 In this case, the values of d_nz,o_nz are: 2411 .vb 2412 proc0 : dnz = 2, o_nz = 2 2413 proc1 : dnz = 3, o_nz = 2 2414 proc2 : dnz = 1, o_nz = 4 2415 .ve 2416 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2417 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2418 for proc3. i.e we are using 12+15+10=37 storage locations to store 2419 34 values. 2420 2421 When d_nnz, o_nnz parameters are specified, the storage is specified 2422 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2423 In the above case the values for d_nnz,o_nnz are: 2424 .vb 2425 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2426 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2427 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2428 .ve 2429 Here the space allocated is sum of all the above values i.e 34, and 2430 hence pre-allocation is perfect. 2431 2432 Level: intermediate 2433 2434 .keywords: matrix, aij, compressed row, sparse, parallel 2435 2436 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2437 @*/ 2438 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2439 { 2440 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2441 2442 PetscFunctionBegin; 2443 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2444 if (f) { 2445 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2446 } 2447 PetscFunctionReturn(0); 2448 } 2449 2450 #undef __FUNCT__ 2451 #define __FUNCT__ "MatCreateMPIAIJ" 2452 /*@C 2453 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2454 (the default parallel PETSc format). For good matrix assembly performance 2455 the user should preallocate the matrix storage by setting the parameters 2456 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2457 performance can be increased by more than a factor of 50. 2458 2459 Collective on MPI_Comm 2460 2461 Input Parameters: 2462 + comm - MPI communicator 2463 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2464 This value should be the same as the local size used in creating the 2465 y vector for the matrix-vector product y = Ax. 2466 . n - This value should be the same as the local size used in creating the 2467 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2468 calculated if N is given) For square matrices n is almost always m. 2469 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2470 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2471 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2472 (same value is used for all local rows) 2473 . d_nnz - array containing the number of nonzeros in the various rows of the 2474 DIAGONAL portion of the local submatrix (possibly different for each row) 2475 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2476 The size of this array is equal to the number of local rows, i.e 'm'. 2477 You must leave room for the diagonal entry even if it is zero. 2478 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2479 submatrix (same value is used for all local rows). 2480 - o_nnz - array containing the number of nonzeros in the various rows of the 2481 OFF-DIAGONAL portion of the local submatrix (possibly different for 2482 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2483 structure. The size of this array is equal to the number 2484 of local rows, i.e 'm'. 2485 2486 Output Parameter: 2487 . A - the matrix 2488 2489 Notes: 2490 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2491 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2492 storage requirements for this matrix. 2493 2494 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2495 processor than it must be used on all processors that share the object for 2496 that argument. 2497 2498 The AIJ format (also called the Yale sparse matrix format or 2499 compressed row storage), is fully compatible with standard Fortran 77 2500 storage. That is, the stored row and column indices can begin at 2501 either one (as in Fortran) or zero. See the users manual for details. 2502 2503 The user MUST specify either the local or global matrix dimensions 2504 (possibly both). 2505 2506 The parallel matrix is partitioned such that the first m0 rows belong to 2507 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2508 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2509 2510 The DIAGONAL portion of the local submatrix of a processor can be defined 2511 as the submatrix which is obtained by extraction the part corresponding 2512 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2513 first row that belongs to the processor, and r2 is the last row belonging 2514 to the this processor. This is a square mxm matrix. The remaining portion 2515 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2516 2517 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2518 2519 When calling this routine with a single process communicator, a matrix of 2520 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2521 type of communicator, use the construction mechanism: 2522 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2523 2524 By default, this format uses inodes (identical nodes) when possible. 2525 We search for consecutive rows with the same nonzero structure, thereby 2526 reusing matrix information to achieve increased efficiency. 2527 2528 Options Database Keys: 2529 + -mat_aij_no_inode - Do not use inodes 2530 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2531 - -mat_aij_oneindex - Internally use indexing starting at 1 2532 rather than 0. Note that when calling MatSetValues(), 2533 the user still MUST index entries starting at 0! 2534 2535 2536 Example usage: 2537 2538 Consider the following 8x8 matrix with 34 non-zero values, that is 2539 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2540 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2541 as follows: 2542 2543 .vb 2544 1 2 0 | 0 3 0 | 0 4 2545 Proc0 0 5 6 | 7 0 0 | 8 0 2546 9 0 10 | 11 0 0 | 12 0 2547 ------------------------------------- 2548 13 0 14 | 15 16 17 | 0 0 2549 Proc1 0 18 0 | 19 20 21 | 0 0 2550 0 0 0 | 22 23 0 | 24 0 2551 ------------------------------------- 2552 Proc2 25 26 27 | 0 0 28 | 29 0 2553 30 0 0 | 31 32 33 | 0 34 2554 .ve 2555 2556 This can be represented as a collection of submatrices as: 2557 2558 .vb 2559 A B C 2560 D E F 2561 G H I 2562 .ve 2563 2564 Where the submatrices A,B,C are owned by proc0, D,E,F are 2565 owned by proc1, G,H,I are owned by proc2. 2566 2567 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2568 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2569 The 'M','N' parameters are 8,8, and have the same values on all procs. 2570 2571 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2572 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2573 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2574 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2575 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2576 matrix, ans [DF] as another SeqAIJ matrix. 2577 2578 When d_nz, o_nz parameters are specified, d_nz storage elements are 2579 allocated for every row of the local diagonal submatrix, and o_nz 2580 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2581 One way to choose d_nz and o_nz is to use the max nonzerors per local 2582 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2583 In this case, the values of d_nz,o_nz are: 2584 .vb 2585 proc0 : dnz = 2, o_nz = 2 2586 proc1 : dnz = 3, o_nz = 2 2587 proc2 : dnz = 1, o_nz = 4 2588 .ve 2589 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2590 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2591 for proc3. i.e we are using 12+15+10=37 storage locations to store 2592 34 values. 2593 2594 When d_nnz, o_nnz parameters are specified, the storage is specified 2595 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2596 In the above case the values for d_nnz,o_nnz are: 2597 .vb 2598 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2599 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2600 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2601 .ve 2602 Here the space allocated is sum of all the above values i.e 34, and 2603 hence pre-allocation is perfect. 2604 2605 Level: intermediate 2606 2607 .keywords: matrix, aij, compressed row, sparse, parallel 2608 2609 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2610 @*/ 2611 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2612 { 2613 PetscErrorCode ierr; 2614 PetscMPIInt size; 2615 2616 PetscFunctionBegin; 2617 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2618 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2619 if (size > 1) { 2620 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2621 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2622 } else { 2623 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2624 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2625 } 2626 PetscFunctionReturn(0); 2627 } 2628 2629 #undef __FUNCT__ 2630 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2631 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2632 { 2633 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2634 2635 PetscFunctionBegin; 2636 *Ad = a->A; 2637 *Ao = a->B; 2638 *colmap = a->garray; 2639 PetscFunctionReturn(0); 2640 } 2641 2642 #undef __FUNCT__ 2643 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2644 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2645 { 2646 PetscErrorCode ierr; 2647 PetscInt i; 2648 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2649 2650 PetscFunctionBegin; 2651 if (coloring->ctype == IS_COLORING_LOCAL) { 2652 ISColoringValue *allcolors,*colors; 2653 ISColoring ocoloring; 2654 2655 /* set coloring for diagonal portion */ 2656 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2657 2658 /* set coloring for off-diagonal portion */ 2659 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2660 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2661 for (i=0; i<a->B->n; i++) { 2662 colors[i] = allcolors[a->garray[i]]; 2663 } 2664 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2665 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2666 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2667 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2668 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2669 ISColoringValue *colors; 2670 PetscInt *larray; 2671 ISColoring ocoloring; 2672 2673 /* set coloring for diagonal portion */ 2674 ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2675 for (i=0; i<a->A->n; i++) { 2676 larray[i] = i + a->cstart; 2677 } 2678 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2679 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2680 for (i=0; i<a->A->n; i++) { 2681 colors[i] = coloring->colors[larray[i]]; 2682 } 2683 ierr = PetscFree(larray);CHKERRQ(ierr); 2684 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2685 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2686 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2687 2688 /* set coloring for off-diagonal portion */ 2689 ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2690 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2691 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2692 for (i=0; i<a->B->n; i++) { 2693 colors[i] = coloring->colors[larray[i]]; 2694 } 2695 ierr = PetscFree(larray);CHKERRQ(ierr); 2696 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2697 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2698 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2699 } else { 2700 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2701 } 2702 2703 PetscFunctionReturn(0); 2704 } 2705 2706 #undef __FUNCT__ 2707 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2708 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2709 { 2710 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2711 PetscErrorCode ierr; 2712 2713 PetscFunctionBegin; 2714 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2715 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2716 PetscFunctionReturn(0); 2717 } 2718 2719 #undef __FUNCT__ 2720 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2721 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2722 { 2723 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2724 PetscErrorCode ierr; 2725 2726 PetscFunctionBegin; 2727 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2728 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2729 PetscFunctionReturn(0); 2730 } 2731 2732 #undef __FUNCT__ 2733 #define __FUNCT__ "MatMerge" 2734 /*@C 2735 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2736 matrices from each processor 2737 2738 Collective on MPI_Comm 2739 2740 Input Parameters: 2741 + comm - the communicators the parallel matrix will live on 2742 . inmat - the input sequential matrices 2743 . n - number of local columns (or PETSC_DECIDE) 2744 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2745 2746 Output Parameter: 2747 . outmat - the parallel matrix generated 2748 2749 Level: advanced 2750 2751 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2752 2753 @*/ 2754 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2755 { 2756 PetscErrorCode ierr; 2757 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2758 const PetscInt *indx; 2759 const PetscScalar *values; 2760 PetscMap columnmap,rowmap; 2761 2762 PetscFunctionBegin; 2763 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2764 /* 2765 PetscMPIInt rank; 2766 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2767 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N); 2768 */ 2769 if (scall == MAT_INITIAL_MATRIX){ 2770 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2771 if (n == PETSC_DECIDE){ 2772 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2773 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2774 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2775 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2776 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2777 } 2778 2779 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2780 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2781 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2782 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2783 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2784 2785 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2786 for (i=0;i<m;i++) { 2787 ierr = MatGetRow(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2788 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2789 ierr = MatRestoreRow(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2790 } 2791 /* This routine will ONLY return MPIAIJ type matrix */ 2792 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2793 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2794 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2795 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2796 2797 } else if (scall == MAT_REUSE_MATRIX){ 2798 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2799 } else { 2800 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2801 } 2802 2803 for (i=0;i<m;i++) { 2804 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2805 I = i + rstart; 2806 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2807 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2808 } 2809 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2810 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2811 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2812 2813 PetscFunctionReturn(0); 2814 } 2815 2816 #undef __FUNCT__ 2817 #define __FUNCT__ "MatFileSplit" 2818 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2819 { 2820 PetscErrorCode ierr; 2821 PetscMPIInt rank; 2822 PetscInt m,N,i,rstart,nnz; 2823 size_t len; 2824 const PetscInt *indx; 2825 PetscViewer out; 2826 char *name; 2827 Mat B; 2828 const PetscScalar *values; 2829 2830 PetscFunctionBegin; 2831 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2832 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2833 /* Should this be the type of the diagonal block of A? */ 2834 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2835 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2836 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2837 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2838 for (i=0;i<m;i++) { 2839 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2840 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2841 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2842 } 2843 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2844 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2845 2846 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2847 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2848 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2849 sprintf(name,"%s.%d",outfile,rank); 2850 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2851 ierr = PetscFree(name); 2852 ierr = MatView(B,out);CHKERRQ(ierr); 2853 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2854 ierr = MatDestroy(B);CHKERRQ(ierr); 2855 PetscFunctionReturn(0); 2856 } 2857 2858 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2859 #undef __FUNCT__ 2860 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2861 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2862 { 2863 PetscErrorCode ierr; 2864 Mat_Merge_SeqsToMPI *merge; 2865 PetscObjectContainer container; 2866 2867 PetscFunctionBegin; 2868 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2869 if (container) { 2870 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2871 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2872 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2873 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2874 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2875 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2876 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2877 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2878 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2879 if (merge->ci){ierr = PetscFree(merge->ci);CHKERRQ(ierr);} 2880 if (merge->cj){ierr = PetscFree(merge->cj);CHKERRQ(ierr);} 2881 if (merge->C_seq){ierr = MatDestroy(merge->C_seq);CHKERRQ(ierr);} 2882 2883 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2884 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2885 } 2886 ierr = PetscFree(merge);CHKERRQ(ierr); 2887 2888 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2889 PetscFunctionReturn(0); 2890 } 2891 2892 #include "src/mat/utils/freespace.h" 2893 #include "petscbt.h" 2894 #undef __FUNCT__ 2895 #define __FUNCT__ "MatMerge_SeqsToMPI" 2896 /*@C 2897 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2898 matrices from each processor 2899 2900 Collective on MPI_Comm 2901 2902 Input Parameters: 2903 + comm - the communicators the parallel matrix will live on 2904 . seqmat - the input sequential matrices 2905 . m - number of local rows (or PETSC_DECIDE) 2906 . n - number of local columns (or PETSC_DECIDE) 2907 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2908 2909 Output Parameter: 2910 . mpimat - the parallel matrix generated 2911 2912 Level: advanced 2913 2914 Notes: 2915 The dimensions of the sequential matrix in each processor MUST be the same. 2916 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2917 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2918 @*/ 2919 static PetscEvent logkey_seqstompinum = 0; 2920 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2921 { 2922 PetscErrorCode ierr; 2923 MPI_Comm comm=mpimat->comm; 2924 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2925 PetscMPIInt size,rank,taga,*len_s; 2926 PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2927 PetscInt proc,m; 2928 PetscInt **buf_ri,**buf_rj; 2929 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2930 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2931 MPI_Request *s_waits,*r_waits; 2932 MPI_Status *status; 2933 MatScalar *aa=a->a,**abuf_r,*ba_i; 2934 Mat_Merge_SeqsToMPI *merge; 2935 PetscObjectContainer container; 2936 2937 PetscFunctionBegin; 2938 if (!logkey_seqstompinum) { 2939 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2940 } 2941 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2942 2943 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2944 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2945 2946 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2947 if (container) { 2948 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2949 } 2950 bi = merge->bi; 2951 bj = merge->bj; 2952 buf_ri = merge->buf_ri; 2953 buf_rj = merge->buf_rj; 2954 2955 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2956 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2957 len_s = merge->len_s; 2958 2959 /* send and recv matrix values */ 2960 /*-----------------------------*/ 2961 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2962 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 2963 2964 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2965 for (proc=0,k=0; proc<size; proc++){ 2966 if (!len_s[proc]) continue; 2967 i = owners[proc]; 2968 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 2969 k++; 2970 } 2971 2972 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 2973 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 2974 ierr = PetscFree(status);CHKERRQ(ierr); 2975 2976 ierr = PetscFree(s_waits);CHKERRQ(ierr); 2977 ierr = PetscFree(r_waits);CHKERRQ(ierr); 2978 2979 /* insert mat values of mpimat */ 2980 /*----------------------------*/ 2981 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 2982 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 2983 nextrow = buf_ri_k + merge->nrecv; 2984 nextai = nextrow + merge->nrecv; 2985 2986 for (k=0; k<merge->nrecv; k++){ 2987 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 2988 nrows = *(buf_ri_k[k]); 2989 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 2990 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 2991 } 2992 2993 /* set values of ba */ 2994 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2995 for (i=0; i<m; i++) { 2996 arow = owners[rank] + i; 2997 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 2998 bnzi = bi[i+1] - bi[i]; 2999 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3000 3001 /* add local non-zero vals of this proc's seqmat into ba */ 3002 anzi = ai[arow+1] - ai[arow]; 3003 aj = a->j + ai[arow]; 3004 aa = a->a + ai[arow]; 3005 nextaj = 0; 3006 for (j=0; nextaj<anzi; j++){ 3007 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3008 ba_i[j] += aa[nextaj++]; 3009 } 3010 } 3011 3012 /* add received vals into ba */ 3013 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3014 /* i-th row */ 3015 if (i == *nextrow[k]) { 3016 anzi = *(nextai[k]+1) - *nextai[k]; 3017 aj = buf_rj[k] + *(nextai[k]); 3018 aa = abuf_r[k] + *(nextai[k]); 3019 nextaj = 0; 3020 for (j=0; nextaj<anzi; j++){ 3021 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3022 ba_i[j] += aa[nextaj++]; 3023 } 3024 } 3025 nextrow[k]++; nextai[k]++; 3026 } 3027 } 3028 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3029 } 3030 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3031 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3032 3033 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3034 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3035 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3036 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3037 PetscFunctionReturn(0); 3038 } 3039 static PetscEvent logkey_seqstompisym = 0; 3040 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3041 { 3042 PetscErrorCode ierr; 3043 Mat B_mpi; 3044 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3045 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3046 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3047 PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 3048 PetscInt len,proc,*dnz,*onz; 3049 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3050 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3051 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3052 MPI_Status *status; 3053 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3054 PetscBT lnkbt; 3055 Mat_Merge_SeqsToMPI *merge; 3056 PetscObjectContainer container; 3057 3058 PetscFunctionBegin; 3059 if (!logkey_seqstompisym) { 3060 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3061 } 3062 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3063 3064 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3065 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3066 3067 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3068 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3069 3070 /* determine row ownership */ 3071 /*---------------------------------------------------------*/ 3072 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3073 if (m == PETSC_DECIDE) { 3074 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 3075 } else { 3076 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3077 } 3078 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 3079 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3080 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3081 3082 if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); } 3083 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3084 3085 /* determine the number of messages to send, their lengths */ 3086 /*---------------------------------------------------------*/ 3087 len_s = merge->len_s; 3088 3089 len = 0; /* length of buf_si[] */ 3090 merge->nsend = 0; 3091 for (proc=0; proc<size; proc++){ 3092 len_si[proc] = 0; 3093 if (proc == rank){ 3094 len_s[proc] = 0; 3095 } else { 3096 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3097 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3098 } 3099 if (len_s[proc]) { 3100 merge->nsend++; 3101 nrows = 0; 3102 for (i=owners[proc]; i<owners[proc+1]; i++){ 3103 if (ai[i+1] > ai[i]) nrows++; 3104 } 3105 len_si[proc] = 2*(nrows+1); 3106 len += len_si[proc]; 3107 } 3108 } 3109 3110 /* determine the number and length of messages to receive for ij-structure */ 3111 /*-------------------------------------------------------------------------*/ 3112 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3113 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3114 3115 /* post the Irecv of j-structure */ 3116 /*-------------------------------*/ 3117 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3118 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3119 3120 /* post the Isend of j-structure */ 3121 /*--------------------------------*/ 3122 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3123 sj_waits = si_waits + merge->nsend; 3124 3125 for (proc=0, k=0; proc<size; proc++){ 3126 if (!len_s[proc]) continue; 3127 i = owners[proc]; 3128 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3129 k++; 3130 } 3131 3132 /* receives and sends of j-structure are complete */ 3133 /*------------------------------------------------*/ 3134 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 3135 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 3136 3137 /* send and recv i-structure */ 3138 /*---------------------------*/ 3139 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3140 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3141 3142 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3143 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3144 for (proc=0,k=0; proc<size; proc++){ 3145 if (!len_s[proc]) continue; 3146 /* form outgoing message for i-structure: 3147 buf_si[0]: nrows to be sent 3148 [1:nrows]: row index (global) 3149 [nrows+1:2*nrows+1]: i-structure index 3150 */ 3151 /*-------------------------------------------*/ 3152 nrows = len_si[proc]/2 - 1; 3153 buf_si_i = buf_si + nrows+1; 3154 buf_si[0] = nrows; 3155 buf_si_i[0] = 0; 3156 nrows = 0; 3157 for (i=owners[proc]; i<owners[proc+1]; i++){ 3158 anzi = ai[i+1] - ai[i]; 3159 if (anzi) { 3160 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3161 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3162 nrows++; 3163 } 3164 } 3165 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3166 k++; 3167 buf_si += len_si[proc]; 3168 } 3169 3170 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 3171 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 3172 3173 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3174 for (i=0; i<merge->nrecv; i++){ 3175 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3176 } 3177 3178 ierr = PetscFree(len_si);CHKERRQ(ierr); 3179 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3180 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3181 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3182 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3183 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3184 ierr = PetscFree(status);CHKERRQ(ierr); 3185 3186 /* compute a local seq matrix in each processor */ 3187 /*----------------------------------------------*/ 3188 /* allocate bi array and free space for accumulating nonzero column info */ 3189 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3190 bi[0] = 0; 3191 3192 /* create and initialize a linked list */ 3193 nlnk = N+1; 3194 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3195 3196 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3197 len = 0; 3198 len = ai[owners[rank+1]] - ai[owners[rank]]; 3199 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3200 current_space = free_space; 3201 3202 /* determine symbolic info for each local row */ 3203 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3204 nextrow = buf_ri_k + merge->nrecv; 3205 nextai = nextrow + merge->nrecv; 3206 for (k=0; k<merge->nrecv; k++){ 3207 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3208 nrows = *buf_ri_k[k]; 3209 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3210 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3211 } 3212 3213 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3214 len = 0; 3215 for (i=0;i<m;i++) { 3216 bnzi = 0; 3217 /* add local non-zero cols of this proc's seqmat into lnk */ 3218 arow = owners[rank] + i; 3219 anzi = ai[arow+1] - ai[arow]; 3220 aj = a->j + ai[arow]; 3221 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3222 bnzi += nlnk; 3223 /* add received col data into lnk */ 3224 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3225 if (i == *nextrow[k]) { /* i-th row */ 3226 anzi = *(nextai[k]+1) - *nextai[k]; 3227 aj = buf_rj[k] + *nextai[k]; 3228 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3229 bnzi += nlnk; 3230 nextrow[k]++; nextai[k]++; 3231 } 3232 } 3233 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3234 3235 /* if free space is not available, make more free space */ 3236 if (current_space->local_remaining<bnzi) { 3237 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3238 nspacedouble++; 3239 } 3240 /* copy data into free space, then initialize lnk */ 3241 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3242 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3243 3244 current_space->array += bnzi; 3245 current_space->local_used += bnzi; 3246 current_space->local_remaining -= bnzi; 3247 3248 bi[i+1] = bi[i] + bnzi; 3249 } 3250 3251 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3252 3253 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3254 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3255 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3256 3257 /* create symbolic parallel matrix B_mpi */ 3258 /*---------------------------------------*/ 3259 if (n==PETSC_DECIDE) { 3260 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr); 3261 } else { 3262 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 3263 } 3264 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3265 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3266 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3267 3268 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3269 B_mpi->assembled = PETSC_FALSE; 3270 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3271 merge->bi = bi; 3272 merge->bj = bj; 3273 merge->buf_ri = buf_ri; 3274 merge->buf_rj = buf_rj; 3275 merge->C_seq = seqmat; 3276 merge->ci = PETSC_NULL; 3277 merge->cj = PETSC_NULL; 3278 3279 /* attach the supporting struct to B_mpi for reuse */ 3280 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3281 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3282 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3283 *mpimat = B_mpi; 3284 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3285 PetscFunctionReturn(0); 3286 } 3287 3288 static PetscEvent logkey_seqstompi = 0; 3289 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3290 { 3291 PetscErrorCode ierr; 3292 3293 PetscFunctionBegin; 3294 if (!logkey_seqstompi) { 3295 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3296 } 3297 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3298 if (scall == MAT_INITIAL_MATRIX){ 3299 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3300 } 3301 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3302 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3303 PetscFunctionReturn(0); 3304 } 3305 static PetscEvent logkey_getlocalmat = 0; 3306 #undef __FUNCT__ 3307 #define __FUNCT__ "MatGetLocalMat" 3308 /*@C 3309 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3310 3311 Not Collective 3312 3313 Input Parameters: 3314 + A - the matrix 3315 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3316 3317 Output Parameter: 3318 . A_loc - the local sequential matrix generated 3319 3320 Level: developer 3321 3322 @*/ 3323 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3324 { 3325 PetscErrorCode ierr; 3326 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3327 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3328 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3329 MatScalar *aa=a->a,*ba=b->a; 3330 PetscInt am=A->m,i,j,*nnz,nnz_max,ncols,*cols,cstart=mpimat->cstart; 3331 Mat Aw; 3332 3333 PetscFunctionBegin; 3334 if (!logkey_getlocalmat) { 3335 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3336 } 3337 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3338 if (scall == MAT_INITIAL_MATRIX){ 3339 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 3340 nnz_max = 0; 3341 for (i=0; i<am; i++){ 3342 nnz[i] = (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3343 if (nnz[i] > nnz_max) nnz_max = nnz[i]; 3344 } 3345 ierr = MatCreate(PETSC_COMM_SELF,am,A->N,am,A->N,&Aw);CHKERRQ(ierr); 3346 ierr = MatSetType(Aw,MATSEQAIJ);CHKERRQ(ierr); 3347 ierr = MatSeqAIJSetPreallocation(Aw,0,nnz);CHKERRQ(ierr); 3348 } else if (scall == MAT_REUSE_MATRIX){ 3349 Aw = *A_loc; 3350 mat=(Mat_SeqAIJ*)Aw->data; 3351 nnz_max = 0; 3352 for (i=0; i<am; i++){ 3353 j = mat->i[i+1] - mat->i[i]; 3354 if (j > nnz_max) nnz_max = j; 3355 } 3356 /* clear old values in *A_loc */ 3357 ierr = PetscMemzero(mat->a,mat->i[am]*sizeof(MatScalar));CHKERRQ(ierr); 3358 } else { 3359 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3360 } 3361 ierr = PetscMalloc((1+nnz_max)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3362 3363 for (i=0; i<am; i++) { 3364 /* diagonal portion of A */ 3365 ncols = ai[i+1] - ai[i]; 3366 for (j=0; j<ncols; j++) {cols[j] = cstart + *aj++;} 3367 ierr = MatSetValues(Aw,1,&i,ncols,cols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 3368 /* off-diagonal portion of A */ 3369 ncols = bi[i+1] - bi[i]; 3370 for (j=0; j<ncols; j++) cols[j] = cmap[*bj++]; /* cmap[*(bj+bi[i]+j)]; */ 3371 ierr = MatSetValues(Aw,1,&i,ncols,cols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 3372 } 3373 ierr = MatAssemblyBegin(Aw,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3374 ierr = MatAssemblyEnd(Aw,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3375 ierr = PetscFree(cols);CHKERRQ(ierr); 3376 if (scall == MAT_INITIAL_MATRIX){ 3377 ierr = PetscFree(nnz);CHKERRQ(ierr); 3378 *A_loc = Aw; 3379 } 3380 3381 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3382 PetscFunctionReturn(0); 3383 } 3384 3385 static PetscEvent logkey_getlocalmatcondensed = 0; 3386 #undef __FUNCT__ 3387 #define __FUNCT__ "MatGetLocalMatCondensed" 3388 /*@C 3389 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3390 3391 Not Collective 3392 3393 Input Parameters: 3394 + A - the matrix 3395 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3396 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3397 3398 Output Parameter: 3399 . A_loc - the local sequential matrix generated 3400 3401 Level: developer 3402 3403 @*/ 3404 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3405 { 3406 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3407 PetscErrorCode ierr; 3408 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3409 IS isrowa,iscola; 3410 Mat *aloc; 3411 3412 PetscFunctionBegin; 3413 if (!logkey_getlocalmatcondensed) { 3414 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3415 } 3416 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3417 if (!row){ 3418 start = a->rstart; end = a->rend; 3419 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3420 } else { 3421 isrowa = *row; 3422 } 3423 if (!col){ 3424 start = a->cstart; 3425 cmap = a->garray; 3426 nzA = a->A->n; 3427 nzB = a->B->n; 3428 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3429 ncols = 0; 3430 for (i=0; i<nzB; i++) { 3431 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3432 else break; 3433 } 3434 imark = i; 3435 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3436 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3437 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3438 ierr = PetscFree(idx);CHKERRQ(ierr); 3439 } else { 3440 iscola = *col; 3441 } 3442 if (scall != MAT_INITIAL_MATRIX){ 3443 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3444 aloc[0] = *A_loc; 3445 } 3446 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3447 *A_loc = aloc[0]; 3448 ierr = PetscFree(aloc);CHKERRQ(ierr); 3449 if (!row){ 3450 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3451 } 3452 if (!col){ 3453 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3454 } 3455 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3456 PetscFunctionReturn(0); 3457 } 3458 3459 static PetscEvent logkey_GetBrowsOfAcols = 0; 3460 #undef __FUNCT__ 3461 #define __FUNCT__ "MatGetBrowsOfAcols" 3462 /*@C 3463 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3464 3465 Collective on Mat 3466 3467 Input Parameters: 3468 + A,B - the matrices in mpiaij format 3469 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3470 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3471 3472 Output Parameter: 3473 + rowb, colb - index sets of rows and columns of B to extract 3474 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3475 - B_seq - the sequential matrix generated 3476 3477 Level: developer 3478 3479 @*/ 3480 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3481 { 3482 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3483 PetscErrorCode ierr; 3484 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3485 IS isrowb,iscolb; 3486 Mat *bseq; 3487 3488 PetscFunctionBegin; 3489 if (a->cstart != b->rstart || a->cend != b->rend){ 3490 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3491 } 3492 if (!logkey_GetBrowsOfAcols) { 3493 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3494 } 3495 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3496 3497 if (scall == MAT_INITIAL_MATRIX){ 3498 start = a->cstart; 3499 cmap = a->garray; 3500 nzA = a->A->n; 3501 nzB = a->B->n; 3502 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3503 ncols = 0; 3504 for (i=0; i<nzB; i++) { /* row < local row index */ 3505 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3506 else break; 3507 } 3508 imark = i; 3509 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3510 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3511 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3512 ierr = PetscFree(idx);CHKERRQ(ierr); 3513 *brstart = imark; 3514 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3515 } else { 3516 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3517 isrowb = *rowb; iscolb = *colb; 3518 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3519 bseq[0] = *B_seq; 3520 } 3521 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3522 *B_seq = bseq[0]; 3523 ierr = PetscFree(bseq);CHKERRQ(ierr); 3524 if (!rowb){ 3525 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3526 } else { 3527 *rowb = isrowb; 3528 } 3529 if (!colb){ 3530 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3531 } else { 3532 *colb = iscolb; 3533 } 3534 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3535 PetscFunctionReturn(0); 3536 } 3537 3538 static PetscEvent logkey_GetBrowsOfAocols = 0; 3539 #undef __FUNCT__ 3540 #define __FUNCT__ "MatGetBrowsOfAoCols" 3541 /*@C 3542 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3543 of the OFF-DIAGONAL portion of local A 3544 3545 Collective on Mat 3546 3547 Input Parameters: 3548 + A,B - the matrices in mpiaij format 3549 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3550 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3551 3552 Output Parameter: 3553 + rowb, colb - index sets of rows and columns of B to extract 3554 . brstart - row index of B_seq from which the remaining rows correspond to A's columns that are greater than cend. 3555 - B_seq - the sequential matrix generated 3556 3557 Level: developer 3558 3559 @*/ 3560 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3561 { 3562 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3563 PetscErrorCode ierr; 3564 PetscInt *idx,i,start,ncols,nzB,*cmap; 3565 IS isrowb,iscolb; 3566 Mat *bseq; 3567 3568 PetscFunctionBegin; 3569 if (a->cstart != b->rstart || a->cend != b->rend){ 3570 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3571 } 3572 if (!logkey_GetBrowsOfAocols) { 3573 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrowsOfAoCols",MAT_COOKIE); 3574 } 3575 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3576 3577 if (scall == MAT_INITIAL_MATRIX){ 3578 start = a->cstart; 3579 cmap = a->garray; 3580 nzB = a->B->n; 3581 if (!nzB){ /* output a null matrix */ 3582 B_seq = PETSC_NULL; 3583 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3584 PetscFunctionReturn(0); 3585 } 3586 ierr = PetscMalloc(nzB*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3587 ncols = 0; 3588 *brstart = nzB; 3589 for (i=0; i<nzB; i++) { /* row < local row index */ 3590 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3591 else break; 3592 } 3593 *brstart = i; 3594 for (i=*brstart; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3595 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3596 ierr = PetscFree(idx);CHKERRQ(ierr); 3597 3598 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3599 } else { 3600 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3601 isrowb = *rowb; iscolb = *colb; 3602 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3603 bseq[0] = *B_seq; 3604 } 3605 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3606 *B_seq = bseq[0]; 3607 ierr = PetscFree(bseq);CHKERRQ(ierr); 3608 if (!rowb){ 3609 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3610 } else { 3611 *rowb = isrowb; 3612 } 3613 if (!colb){ 3614 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3615 } else { 3616 *colb = iscolb; 3617 } 3618 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3619 PetscFunctionReturn(0); 3620 } 3621