1 2 #include "src/mat/impls/aij/mpi/mpiaij.h" 3 #include "src/inline/spops.h" 4 5 /* 6 Local utility routine that creates a mapping from the global column 7 number to the local number in the off-diagonal part of the local 8 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 9 a slightly higher hash table cost; without it it is not scalable (each processor 10 has an order N integer array but is fast to acess. 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 17 PetscErrorCode ierr; 18 PetscInt n = aij->B->n,i; 19 20 PetscFunctionBegin; 21 #if defined (PETSC_USE_CTABLE) 22 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 23 for (i=0; i<n; i++){ 24 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 25 } 26 #else 27 ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 28 PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt)); 29 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 30 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 31 #endif 32 PetscFunctionReturn(0); 33 } 34 35 #define CHUNKSIZE 15 36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 40 rmax = aimax[row]; nrow = ailen[row]; \ 41 col1 = col - shift; \ 42 \ 43 low = 0; high = nrow; \ 44 while (high-low > 5) { \ 45 t = (low+high)/2; \ 46 if (rp[t] > col) high = t; \ 47 else low = t; \ 48 } \ 49 for (_i=low; _i<high; _i++) { \ 50 if (rp[_i] > col1) break; \ 51 if (rp[_i] == col1) { \ 52 if (addv == ADD_VALUES) ap[_i] += value; \ 53 else ap[_i] = value; \ 54 goto a_noinsert; \ 55 } \ 56 } \ 57 if (nonew == 1) goto a_noinsert; \ 58 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 59 if (nrow >= rmax) { \ 60 /* there is no extra room in row, therefore enlarge */ \ 61 PetscInt new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 62 PetscScalar *new_a; \ 63 \ 64 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 65 \ 66 /* malloc new storage space */ \ 67 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \ 68 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 69 new_j = (PetscInt*)(new_a + new_nz); \ 70 new_i = new_j + new_nz; \ 71 \ 72 /* copy over old data into new slots */ \ 73 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 74 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 75 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 76 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 77 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 78 len*sizeof(PetscInt));CHKERRQ(ierr); \ 79 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 81 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 82 /* free up old matrix storage */ \ 83 \ 84 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 85 if (!a->singlemalloc) { \ 86 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 87 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 88 } \ 89 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 90 a->singlemalloc = PETSC_TRUE; \ 91 \ 92 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 93 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 94 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \ 95 a->maxnz += CHUNKSIZE; \ 96 a->reallocs++; \ 97 } \ 98 N = nrow++ - 1; a->nz++; \ 99 /* shift up all the later entries in this row */ \ 100 for (ii=N; ii>=_i; ii--) { \ 101 rp[ii+1] = rp[ii]; \ 102 ap[ii+1] = ap[ii]; \ 103 } \ 104 rp[_i] = col1; \ 105 ap[_i] = value; \ 106 a_noinsert: ; \ 107 ailen[row] = nrow; \ 108 } 109 110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 111 { \ 112 \ 113 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 114 rmax = bimax[row]; nrow = bilen[row]; \ 115 col1 = col - shift; \ 116 \ 117 low = 0; high = nrow; \ 118 while (high-low > 5) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > col) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > col1) break; \ 125 if (rp[_i] == col1) { \ 126 if (addv == ADD_VALUES) ap[_i] += value; \ 127 else ap[_i] = value; \ 128 goto b_noinsert; \ 129 } \ 130 } \ 131 if (nonew == 1) goto b_noinsert; \ 132 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 133 if (nrow >= rmax) { \ 134 /* there is no extra room in row, therefore enlarge */ \ 135 PetscInt new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 136 PetscScalar *new_a; \ 137 \ 138 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 139 \ 140 /* malloc new storage space */ \ 141 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \ 142 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 143 new_j = (PetscInt*)(new_a + new_nz); \ 144 new_i = new_j + new_nz; \ 145 \ 146 /* copy over old data into new slots */ \ 147 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 148 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 149 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 150 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 151 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 152 len*sizeof(PetscInt));CHKERRQ(ierr); \ 153 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 154 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 155 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 156 /* free up old matrix storage */ \ 157 \ 158 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 159 if (!b->singlemalloc) { \ 160 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 161 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 162 } \ 163 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 164 b->singlemalloc = PETSC_TRUE; \ 165 \ 166 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 167 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 168 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \ 169 b->maxnz += CHUNKSIZE; \ 170 b->reallocs++; \ 171 } \ 172 N = nrow++ - 1; b->nz++; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ap[ii+1] = ap[ii]; \ 177 } \ 178 rp[_i] = col1; \ 179 ap[_i] = value; \ 180 b_noinsert: ; \ 181 bilen[row] = nrow; \ 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatSetValues_MPIAIJ" 186 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 187 { 188 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 189 PetscScalar value; 190 PetscErrorCode ierr; 191 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 192 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 193 PetscTruth roworiented = aij->roworiented; 194 195 /* Some Variables required in the macro */ 196 Mat A = aij->A; 197 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 198 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 199 PetscScalar *aa = a->a; 200 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 201 Mat B = aij->B; 202 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 203 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 204 PetscScalar *ba = b->a; 205 206 PetscInt *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 207 PetscInt nonew = a->nonew,shift=0; 208 PetscScalar *ap; 209 210 PetscFunctionBegin; 211 for (i=0; i<m; i++) { 212 if (im[i] < 0) continue; 213 #if defined(PETSC_USE_BOPT_g) 214 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 215 #endif 216 if (im[i] >= rstart && im[i] < rend) { 217 row = im[i] - rstart; 218 for (j=0; j<n; j++) { 219 if (in[j] >= cstart && in[j] < cend){ 220 col = in[j] - cstart; 221 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 222 if (ignorezeroentries && value == 0.0) continue; 223 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 224 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 225 } else if (in[j] < 0) continue; 226 #if defined(PETSC_USE_BOPT_g) 227 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 228 #endif 229 else { 230 if (mat->was_assembled) { 231 if (!aij->colmap) { 232 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 233 } 234 #if defined (PETSC_USE_CTABLE) 235 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 236 col--; 237 #else 238 col = aij->colmap[in[j]] - 1; 239 #endif 240 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 241 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 244 B = aij->B; 245 b = (Mat_SeqAIJ*)B->data; 246 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 247 ba = b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 if (ignorezeroentries && value == 0.0) continue; 252 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 253 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 254 } 255 } 256 } else { 257 if (!aij->donotstash) { 258 if (roworiented) { 259 if (ignorezeroentries && v[i*n] == 0.0) continue; 260 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 261 } else { 262 if (ignorezeroentries && v[i] == 0.0) continue; 263 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatGetValues_MPIAIJ" 273 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 274 { 275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 276 PetscErrorCode ierr; 277 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 278 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 279 280 PetscFunctionBegin; 281 for (i=0; i<m; i++) { 282 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 283 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 284 if (idxm[i] >= rstart && idxm[i] < rend) { 285 row = idxm[i] - rstart; 286 for (j=0; j<n; j++) { 287 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 288 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 289 if (idxn[j] >= cstart && idxn[j] < cend){ 290 col = idxn[j] - cstart; 291 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 292 } else { 293 if (!aij->colmap) { 294 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 295 } 296 #if defined (PETSC_USE_CTABLE) 297 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 298 col --; 299 #else 300 col = aij->colmap[idxn[j]] - 1; 301 #endif 302 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 303 else { 304 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 305 } 306 } 307 } 308 } else { 309 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 310 } 311 } 312 PetscFunctionReturn(0); 313 } 314 315 #undef __FUNCT__ 316 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 317 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 318 { 319 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 320 PetscErrorCode ierr; 321 PetscInt nstash,reallocs; 322 InsertMode addv; 323 324 PetscFunctionBegin; 325 if (aij->donotstash) { 326 PetscFunctionReturn(0); 327 } 328 329 /* make sure all processors are either in INSERTMODE or ADDMODE */ 330 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 331 if (addv == (ADD_VALUES|INSERT_VALUES)) { 332 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 333 } 334 mat->insertmode = addv; /* in case this processor had no cache */ 335 336 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 337 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 338 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs); 339 PetscFunctionReturn(0); 340 } 341 342 343 #undef __FUNCT__ 344 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 345 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 346 { 347 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 348 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 349 PetscErrorCode ierr; 350 PetscMPIInt n; 351 PetscInt i,j,rstart,ncols,flg; 352 PetscInt *row,*col,other_disassembled; 353 PetscScalar *val; 354 InsertMode addv = mat->insertmode; 355 356 PetscFunctionBegin; 357 if (!aij->donotstash) { 358 while (1) { 359 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 360 if (!flg) break; 361 362 for (i=0; i<n;) { 363 /* Now identify the consecutive vals belonging to the same row */ 364 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 365 if (j < n) ncols = j-i; 366 else ncols = n-i; 367 /* Now assemble all these values with a single function call */ 368 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 369 i = j; 370 } 371 } 372 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 373 } 374 375 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 376 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 377 378 /* determine if any processor has disassembled, if so we must 379 also disassemble ourselfs, in order that we may reassemble. */ 380 /* 381 if nonzero structure of submatrix B cannot change then we know that 382 no processor disassembled thus we can skip this stuff 383 */ 384 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 385 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 386 if (mat->was_assembled && !other_disassembled) { 387 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 388 /* reaccess the b because aij->B was changed */ 389 b = (Mat_SeqAIJ *)aij->B->data; 390 } 391 } 392 393 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 394 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 395 } 396 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 398 399 if (aij->rowvalues) { 400 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 401 aij->rowvalues = 0; 402 } 403 404 /* used by MatAXPY() */ 405 a->xtoy = 0; b->xtoy = 0; 406 a->XtoY = 0; b->XtoY = 0; 407 408 PetscFunctionReturn(0); 409 } 410 411 #undef __FUNCT__ 412 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 413 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 414 { 415 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 416 PetscErrorCode ierr; 417 418 PetscFunctionBegin; 419 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 420 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 421 PetscFunctionReturn(0); 422 } 423 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatZeroRows_MPIAIJ" 426 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 427 { 428 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 429 PetscErrorCode ierr; 430 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag; 431 PetscInt i,N,*rows,*owners = l->rowners; 432 PetscInt *nprocs,j,idx,nsends,row; 433 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 434 PetscInt *rvalues,count,base,slen,*source; 435 PetscInt *lens,*lrows,*values,rstart=l->rstart; 436 MPI_Comm comm = A->comm; 437 MPI_Request *send_waits,*recv_waits; 438 MPI_Status recv_status,*send_status; 439 IS istmp; 440 PetscTruth found; 441 442 PetscFunctionBegin; 443 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 444 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 445 446 /* first count number of contributors to each processor */ 447 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 448 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 449 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 450 for (i=0; i<N; i++) { 451 idx = rows[i]; 452 found = PETSC_FALSE; 453 for (j=0; j<size; j++) { 454 if (idx >= owners[j] && idx < owners[j+1]) { 455 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 456 } 457 } 458 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 459 } 460 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 461 462 /* inform other processors of number of messages and max length*/ 463 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 464 465 /* post receives: */ 466 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 467 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 468 for (i=0; i<nrecvs; i++) { 469 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 470 } 471 472 /* do sends: 473 1) starts[i] gives the starting index in svalues for stuff going to 474 the ith processor 475 */ 476 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 477 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 478 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 479 starts[0] = 0; 480 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 481 for (i=0; i<N; i++) { 482 svalues[starts[owner[i]]++] = rows[i]; 483 } 484 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 485 486 starts[0] = 0; 487 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 488 count = 0; 489 for (i=0; i<size; i++) { 490 if (nprocs[2*i+1]) { 491 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 492 } 493 } 494 ierr = PetscFree(starts);CHKERRQ(ierr); 495 496 base = owners[rank]; 497 498 /* wait on receives */ 499 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 500 source = lens + nrecvs; 501 count = nrecvs; slen = 0; 502 while (count) { 503 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 504 /* unpack receives into our local space */ 505 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 506 source[imdex] = recv_status.MPI_SOURCE; 507 lens[imdex] = n; 508 slen += n; 509 count--; 510 } 511 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 512 513 /* move the data into the send scatter */ 514 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 515 count = 0; 516 for (i=0; i<nrecvs; i++) { 517 values = rvalues + i*nmax; 518 for (j=0; j<lens[i]; j++) { 519 lrows[count++] = values[j] - base; 520 } 521 } 522 ierr = PetscFree(rvalues);CHKERRQ(ierr); 523 ierr = PetscFree(lens);CHKERRQ(ierr); 524 ierr = PetscFree(owner);CHKERRQ(ierr); 525 ierr = PetscFree(nprocs);CHKERRQ(ierr); 526 527 /* actually zap the local rows */ 528 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 529 PetscLogObjectParent(A,istmp); 530 531 /* 532 Zero the required rows. If the "diagonal block" of the matrix 533 is square and the user wishes to set the diagonal we use seperate 534 code so that MatSetValues() is not called for each diagonal allocating 535 new memory, thus calling lots of mallocs and slowing things down. 536 537 Contributed by: Mathew Knepley 538 */ 539 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 540 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 541 if (diag && (l->A->M == l->A->N)) { 542 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 543 } else if (diag) { 544 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 545 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 546 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 547 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 548 } 549 for (i = 0; i < slen; i++) { 550 row = lrows[i] + rstart; 551 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 552 } 553 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 554 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 555 } else { 556 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 557 } 558 ierr = ISDestroy(istmp);CHKERRQ(ierr); 559 ierr = PetscFree(lrows);CHKERRQ(ierr); 560 561 /* wait on sends */ 562 if (nsends) { 563 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 564 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 565 ierr = PetscFree(send_status);CHKERRQ(ierr); 566 } 567 ierr = PetscFree(send_waits);CHKERRQ(ierr); 568 ierr = PetscFree(svalues);CHKERRQ(ierr); 569 570 PetscFunctionReturn(0); 571 } 572 573 #undef __FUNCT__ 574 #define __FUNCT__ "MatMult_MPIAIJ" 575 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 576 { 577 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 578 PetscErrorCode ierr; 579 PetscInt nt; 580 581 PetscFunctionBegin; 582 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 583 if (nt != A->n) { 584 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt); 585 } 586 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 587 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 588 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 589 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 590 PetscFunctionReturn(0); 591 } 592 593 #undef __FUNCT__ 594 #define __FUNCT__ "MatMultAdd_MPIAIJ" 595 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 596 { 597 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 598 PetscErrorCode ierr; 599 600 PetscFunctionBegin; 601 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 602 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 603 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 604 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 605 PetscFunctionReturn(0); 606 } 607 608 #undef __FUNCT__ 609 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 610 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 611 { 612 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 613 PetscErrorCode ierr; 614 615 PetscFunctionBegin; 616 /* do nondiagonal part */ 617 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 618 /* send it on its way */ 619 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 620 /* do local part */ 621 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 622 /* receive remote parts: note this assumes the values are not actually */ 623 /* inserted in yy until the next line, which is true for my implementation*/ 624 /* but is not perhaps always true. */ 625 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 626 PetscFunctionReturn(0); 627 } 628 629 EXTERN_C_BEGIN 630 #undef __FUNCT__ 631 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 632 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 633 { 634 MPI_Comm comm; 635 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 636 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 637 IS Me,Notme; 638 PetscErrorCode ierr; 639 PetscInt M,N,first,last,*notme,i; 640 PetscMPIInt size; 641 642 PetscFunctionBegin; 643 644 /* Easy test: symmetric diagonal block */ 645 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 646 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 647 if (!*f) PetscFunctionReturn(0); 648 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 649 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 650 if (size == 1) PetscFunctionReturn(0); 651 652 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 653 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 654 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 655 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 656 for (i=0; i<first; i++) notme[i] = i; 657 for (i=last; i<M; i++) notme[i-last+first] = i; 658 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 659 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 660 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 661 Aoff = Aoffs[0]; 662 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 663 Boff = Boffs[0]; 664 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 665 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 666 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 667 ierr = ISDestroy(Me);CHKERRQ(ierr); 668 ierr = ISDestroy(Notme);CHKERRQ(ierr); 669 670 PetscFunctionReturn(0); 671 } 672 EXTERN_C_END 673 674 #undef __FUNCT__ 675 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 676 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 677 { 678 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 679 PetscErrorCode ierr; 680 681 PetscFunctionBegin; 682 /* do nondiagonal part */ 683 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 684 /* send it on its way */ 685 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 686 /* do local part */ 687 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 688 /* receive remote parts: note this assumes the values are not actually */ 689 /* inserted in yy until the next line, which is true for my implementation*/ 690 /* but is not perhaps always true. */ 691 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } 694 695 /* 696 This only works correctly for square matrices where the subblock A->A is the 697 diagonal block 698 */ 699 #undef __FUNCT__ 700 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 701 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 702 { 703 PetscErrorCode ierr; 704 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 705 706 PetscFunctionBegin; 707 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 708 if (a->rstart != a->cstart || a->rend != a->cend) { 709 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 710 } 711 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 712 PetscFunctionReturn(0); 713 } 714 715 #undef __FUNCT__ 716 #define __FUNCT__ "MatScale_MPIAIJ" 717 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 718 { 719 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 720 PetscErrorCode ierr; 721 722 PetscFunctionBegin; 723 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 724 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 725 PetscFunctionReturn(0); 726 } 727 728 #undef __FUNCT__ 729 #define __FUNCT__ "MatDestroy_MPIAIJ" 730 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 731 { 732 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 733 PetscErrorCode ierr; 734 735 PetscFunctionBegin; 736 #if defined(PETSC_USE_LOG) 737 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 738 #endif 739 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 740 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 741 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 742 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 743 #if defined (PETSC_USE_CTABLE) 744 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 745 #else 746 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 747 #endif 748 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 749 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 750 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 751 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 752 ierr = PetscFree(aij);CHKERRQ(ierr); 753 754 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 755 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 756 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 757 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 758 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 759 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 760 PetscFunctionReturn(0); 761 } 762 763 #undef __FUNCT__ 764 #define __FUNCT__ "MatView_MPIAIJ_Binary" 765 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 766 { 767 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 768 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 769 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 770 PetscErrorCode ierr; 771 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 772 int fd; 773 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 774 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 775 PetscScalar *column_values; 776 777 PetscFunctionBegin; 778 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 779 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 780 nz = A->nz + B->nz; 781 if (!rank) { 782 header[0] = MAT_FILE_COOKIE; 783 header[1] = mat->M; 784 header[2] = mat->N; 785 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 786 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 787 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 788 /* get largest number of rows any processor has */ 789 rlen = mat->m; 790 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 791 for (i=1; i<size; i++) { 792 rlen = PetscMax(rlen,range[i+1] - range[i]); 793 } 794 } else { 795 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 796 rlen = mat->m; 797 } 798 799 /* load up the local row counts */ 800 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 801 for (i=0; i<mat->m; i++) { 802 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 803 } 804 805 /* store the row lengths to the file */ 806 if (!rank) { 807 MPI_Status status; 808 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 809 for (i=1; i<size; i++) { 810 rlen = range[i+1] - range[i]; 811 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 812 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 813 } 814 } else { 815 ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 816 } 817 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 818 819 /* load up the local column indices */ 820 nzmax = nz; /* )th processor needs space a largest processor needs */ 821 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 822 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 823 cnt = 0; 824 for (i=0; i<mat->m; i++) { 825 for (j=B->i[i]; j<B->i[i+1]; j++) { 826 if ( (col = garray[B->j[j]]) > cstart) break; 827 column_indices[cnt++] = col; 828 } 829 for (k=A->i[i]; k<A->i[i+1]; k++) { 830 column_indices[cnt++] = A->j[k] + cstart; 831 } 832 for (; j<B->i[i+1]; j++) { 833 column_indices[cnt++] = garray[B->j[j]]; 834 } 835 } 836 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 837 838 /* store the column indices to the file */ 839 if (!rank) { 840 MPI_Status status; 841 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 842 for (i=1; i<size; i++) { 843 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 844 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 845 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 846 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 847 } 848 } else { 849 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 850 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 851 } 852 ierr = PetscFree(column_indices);CHKERRQ(ierr); 853 854 /* load up the local column values */ 855 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 856 cnt = 0; 857 for (i=0; i<mat->m; i++) { 858 for (j=B->i[i]; j<B->i[i+1]; j++) { 859 if ( garray[B->j[j]] > cstart) break; 860 column_values[cnt++] = B->a[j]; 861 } 862 for (k=A->i[i]; k<A->i[i+1]; k++) { 863 column_values[cnt++] = A->a[k]; 864 } 865 for (; j<B->i[i+1]; j++) { 866 column_values[cnt++] = B->a[j]; 867 } 868 } 869 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 870 871 /* store the column values to the file */ 872 if (!rank) { 873 MPI_Status status; 874 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 875 for (i=1; i<size; i++) { 876 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 877 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 878 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 879 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 880 } 881 } else { 882 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 883 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 884 } 885 ierr = PetscFree(column_values);CHKERRQ(ierr); 886 PetscFunctionReturn(0); 887 } 888 889 #undef __FUNCT__ 890 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 891 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 892 { 893 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 894 PetscErrorCode ierr; 895 PetscMPIInt rank = aij->rank,size = aij->size; 896 PetscTruth isdraw,iascii,flg,isbinary; 897 PetscViewer sviewer; 898 PetscViewerFormat format; 899 900 PetscFunctionBegin; 901 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 902 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 903 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 904 if (iascii) { 905 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 906 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 907 MatInfo info; 908 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 909 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 910 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 911 if (flg) { 912 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 913 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 914 } else { 915 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 916 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 917 } 918 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 919 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 920 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 921 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 922 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 923 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 924 PetscFunctionReturn(0); 925 } else if (format == PETSC_VIEWER_ASCII_INFO) { 926 PetscFunctionReturn(0); 927 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 928 PetscFunctionReturn(0); 929 } 930 } else if (isbinary) { 931 if (size == 1) { 932 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 933 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 934 } else { 935 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 936 } 937 PetscFunctionReturn(0); 938 } else if (isdraw) { 939 PetscDraw draw; 940 PetscTruth isnull; 941 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 942 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 943 } 944 945 if (size == 1) { 946 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 947 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 948 } else { 949 /* assemble the entire matrix onto first processor. */ 950 Mat A; 951 Mat_SeqAIJ *Aloc; 952 PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 953 PetscScalar *a; 954 955 if (!rank) { 956 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 957 } else { 958 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 959 } 960 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 961 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 962 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 963 PetscLogObjectParent(mat,A); 964 965 /* copy over the A part */ 966 Aloc = (Mat_SeqAIJ*)aij->A->data; 967 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 968 row = aij->rstart; 969 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 970 for (i=0; i<m; i++) { 971 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 972 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 973 } 974 aj = Aloc->j; 975 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 976 977 /* copy over the B part */ 978 Aloc = (Mat_SeqAIJ*)aij->B->data; 979 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 980 row = aij->rstart; 981 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 982 ct = cols; 983 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 984 for (i=0; i<m; i++) { 985 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 986 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 987 } 988 ierr = PetscFree(ct);CHKERRQ(ierr); 989 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 990 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 991 /* 992 Everyone has to call to draw the matrix since the graphics waits are 993 synchronized across all processors that share the PetscDraw object 994 */ 995 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 996 if (!rank) { 997 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 998 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 999 } 1000 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1001 ierr = MatDestroy(A);CHKERRQ(ierr); 1002 } 1003 PetscFunctionReturn(0); 1004 } 1005 1006 #undef __FUNCT__ 1007 #define __FUNCT__ "MatView_MPIAIJ" 1008 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1009 { 1010 PetscErrorCode ierr; 1011 PetscTruth iascii,isdraw,issocket,isbinary; 1012 1013 PetscFunctionBegin; 1014 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1015 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1016 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1017 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1018 if (iascii || isdraw || isbinary || issocket) { 1019 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1020 } else { 1021 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1022 } 1023 PetscFunctionReturn(0); 1024 } 1025 1026 1027 1028 #undef __FUNCT__ 1029 #define __FUNCT__ "MatRelax_MPIAIJ" 1030 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1031 { 1032 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1033 PetscErrorCode ierr; 1034 Vec bb1; 1035 PetscScalar mone=-1.0; 1036 1037 PetscFunctionBegin; 1038 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1039 1040 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1041 1042 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1043 if (flag & SOR_ZERO_INITIAL_GUESS) { 1044 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1045 its--; 1046 } 1047 1048 while (its--) { 1049 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1050 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1051 1052 /* update rhs: bb1 = bb - B*x */ 1053 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1054 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1055 1056 /* local sweep */ 1057 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1058 CHKERRQ(ierr); 1059 } 1060 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1061 if (flag & SOR_ZERO_INITIAL_GUESS) { 1062 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1063 its--; 1064 } 1065 while (its--) { 1066 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1067 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1068 1069 /* update rhs: bb1 = bb - B*x */ 1070 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1071 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1072 1073 /* local sweep */ 1074 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1075 CHKERRQ(ierr); 1076 } 1077 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1078 if (flag & SOR_ZERO_INITIAL_GUESS) { 1079 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1080 its--; 1081 } 1082 while (its--) { 1083 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1084 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1085 1086 /* update rhs: bb1 = bb - B*x */ 1087 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1088 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1089 1090 /* local sweep */ 1091 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1092 CHKERRQ(ierr); 1093 } 1094 } else { 1095 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1096 } 1097 1098 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1104 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1105 { 1106 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1107 Mat A = mat->A,B = mat->B; 1108 PetscErrorCode ierr; 1109 PetscReal isend[5],irecv[5]; 1110 1111 PetscFunctionBegin; 1112 info->block_size = 1.0; 1113 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1114 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1115 isend[3] = info->memory; isend[4] = info->mallocs; 1116 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1117 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1118 isend[3] += info->memory; isend[4] += info->mallocs; 1119 if (flag == MAT_LOCAL) { 1120 info->nz_used = isend[0]; 1121 info->nz_allocated = isend[1]; 1122 info->nz_unneeded = isend[2]; 1123 info->memory = isend[3]; 1124 info->mallocs = isend[4]; 1125 } else if (flag == MAT_GLOBAL_MAX) { 1126 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1127 info->nz_used = irecv[0]; 1128 info->nz_allocated = irecv[1]; 1129 info->nz_unneeded = irecv[2]; 1130 info->memory = irecv[3]; 1131 info->mallocs = irecv[4]; 1132 } else if (flag == MAT_GLOBAL_SUM) { 1133 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1134 info->nz_used = irecv[0]; 1135 info->nz_allocated = irecv[1]; 1136 info->nz_unneeded = irecv[2]; 1137 info->memory = irecv[3]; 1138 info->mallocs = irecv[4]; 1139 } 1140 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1141 info->fill_ratio_needed = 0; 1142 info->factor_mallocs = 0; 1143 info->rows_global = (double)matin->M; 1144 info->columns_global = (double)matin->N; 1145 info->rows_local = (double)matin->m; 1146 info->columns_local = (double)matin->N; 1147 1148 PetscFunctionReturn(0); 1149 } 1150 1151 #undef __FUNCT__ 1152 #define __FUNCT__ "MatSetOption_MPIAIJ" 1153 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1154 { 1155 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1156 PetscErrorCode ierr; 1157 1158 PetscFunctionBegin; 1159 switch (op) { 1160 case MAT_NO_NEW_NONZERO_LOCATIONS: 1161 case MAT_YES_NEW_NONZERO_LOCATIONS: 1162 case MAT_COLUMNS_UNSORTED: 1163 case MAT_COLUMNS_SORTED: 1164 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1165 case MAT_KEEP_ZEROED_ROWS: 1166 case MAT_NEW_NONZERO_LOCATION_ERR: 1167 case MAT_USE_INODES: 1168 case MAT_DO_NOT_USE_INODES: 1169 case MAT_IGNORE_ZERO_ENTRIES: 1170 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1171 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1172 break; 1173 case MAT_ROW_ORIENTED: 1174 a->roworiented = PETSC_TRUE; 1175 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1176 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1177 break; 1178 case MAT_ROWS_SORTED: 1179 case MAT_ROWS_UNSORTED: 1180 case MAT_YES_NEW_DIAGONALS: 1181 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1182 break; 1183 case MAT_COLUMN_ORIENTED: 1184 a->roworiented = PETSC_FALSE; 1185 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1186 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1187 break; 1188 case MAT_IGNORE_OFF_PROC_ENTRIES: 1189 a->donotstash = PETSC_TRUE; 1190 break; 1191 case MAT_NO_NEW_DIAGONALS: 1192 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1193 case MAT_SYMMETRIC: 1194 case MAT_STRUCTURALLY_SYMMETRIC: 1195 case MAT_HERMITIAN: 1196 case MAT_SYMMETRY_ETERNAL: 1197 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1198 break; 1199 case MAT_NOT_SYMMETRIC: 1200 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1201 case MAT_NOT_HERMITIAN: 1202 case MAT_NOT_SYMMETRY_ETERNAL: 1203 break; 1204 default: 1205 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1206 } 1207 PetscFunctionReturn(0); 1208 } 1209 1210 #undef __FUNCT__ 1211 #define __FUNCT__ "MatGetRow_MPIAIJ" 1212 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1213 { 1214 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1215 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1216 PetscErrorCode ierr; 1217 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1218 PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1219 PetscInt *cmap,*idx_p; 1220 1221 PetscFunctionBegin; 1222 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1223 mat->getrowactive = PETSC_TRUE; 1224 1225 if (!mat->rowvalues && (idx || v)) { 1226 /* 1227 allocate enough space to hold information from the longest row. 1228 */ 1229 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1230 PetscInt max = 1,tmp; 1231 for (i=0; i<matin->m; i++) { 1232 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1233 if (max < tmp) { max = tmp; } 1234 } 1235 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1236 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1237 } 1238 1239 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1240 lrow = row - rstart; 1241 1242 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1243 if (!v) {pvA = 0; pvB = 0;} 1244 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1245 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1246 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1247 nztot = nzA + nzB; 1248 1249 cmap = mat->garray; 1250 if (v || idx) { 1251 if (nztot) { 1252 /* Sort by increasing column numbers, assuming A and B already sorted */ 1253 PetscInt imark = -1; 1254 if (v) { 1255 *v = v_p = mat->rowvalues; 1256 for (i=0; i<nzB; i++) { 1257 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1258 else break; 1259 } 1260 imark = i; 1261 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1262 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1263 } 1264 if (idx) { 1265 *idx = idx_p = mat->rowindices; 1266 if (imark > -1) { 1267 for (i=0; i<imark; i++) { 1268 idx_p[i] = cmap[cworkB[i]]; 1269 } 1270 } else { 1271 for (i=0; i<nzB; i++) { 1272 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1273 else break; 1274 } 1275 imark = i; 1276 } 1277 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1278 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1279 } 1280 } else { 1281 if (idx) *idx = 0; 1282 if (v) *v = 0; 1283 } 1284 } 1285 *nz = nztot; 1286 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1287 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1288 PetscFunctionReturn(0); 1289 } 1290 1291 #undef __FUNCT__ 1292 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1293 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1294 { 1295 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1296 1297 PetscFunctionBegin; 1298 if (aij->getrowactive == PETSC_FALSE) { 1299 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1300 } 1301 aij->getrowactive = PETSC_FALSE; 1302 PetscFunctionReturn(0); 1303 } 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatNorm_MPIAIJ" 1307 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1308 { 1309 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1310 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1311 PetscErrorCode ierr; 1312 PetscInt i,j,cstart = aij->cstart; 1313 PetscReal sum = 0.0; 1314 PetscScalar *v; 1315 1316 PetscFunctionBegin; 1317 if (aij->size == 1) { 1318 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1319 } else { 1320 if (type == NORM_FROBENIUS) { 1321 v = amat->a; 1322 for (i=0; i<amat->nz; i++) { 1323 #if defined(PETSC_USE_COMPLEX) 1324 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1325 #else 1326 sum += (*v)*(*v); v++; 1327 #endif 1328 } 1329 v = bmat->a; 1330 for (i=0; i<bmat->nz; i++) { 1331 #if defined(PETSC_USE_COMPLEX) 1332 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1333 #else 1334 sum += (*v)*(*v); v++; 1335 #endif 1336 } 1337 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1338 *norm = sqrt(*norm); 1339 } else if (type == NORM_1) { /* max column norm */ 1340 PetscReal *tmp,*tmp2; 1341 PetscInt *jj,*garray = aij->garray; 1342 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1343 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1344 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1345 *norm = 0.0; 1346 v = amat->a; jj = amat->j; 1347 for (j=0; j<amat->nz; j++) { 1348 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1349 } 1350 v = bmat->a; jj = bmat->j; 1351 for (j=0; j<bmat->nz; j++) { 1352 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1353 } 1354 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1355 for (j=0; j<mat->N; j++) { 1356 if (tmp2[j] > *norm) *norm = tmp2[j]; 1357 } 1358 ierr = PetscFree(tmp);CHKERRQ(ierr); 1359 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1360 } else if (type == NORM_INFINITY) { /* max row norm */ 1361 PetscReal ntemp = 0.0; 1362 for (j=0; j<aij->A->m; j++) { 1363 v = amat->a + amat->i[j]; 1364 sum = 0.0; 1365 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1366 sum += PetscAbsScalar(*v); v++; 1367 } 1368 v = bmat->a + bmat->i[j]; 1369 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1370 sum += PetscAbsScalar(*v); v++; 1371 } 1372 if (sum > ntemp) ntemp = sum; 1373 } 1374 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1375 } else { 1376 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1377 } 1378 } 1379 PetscFunctionReturn(0); 1380 } 1381 1382 #undef __FUNCT__ 1383 #define __FUNCT__ "MatTranspose_MPIAIJ" 1384 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1385 { 1386 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1387 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1388 PetscErrorCode ierr; 1389 PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1390 Mat B; 1391 PetscScalar *array; 1392 1393 PetscFunctionBegin; 1394 if (!matout && M != N) { 1395 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1396 } 1397 1398 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1399 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1400 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1401 1402 /* copy over the A part */ 1403 Aloc = (Mat_SeqAIJ*)a->A->data; 1404 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1405 row = a->rstart; 1406 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1407 for (i=0; i<m; i++) { 1408 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1409 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1410 } 1411 aj = Aloc->j; 1412 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1413 1414 /* copy over the B part */ 1415 Aloc = (Mat_SeqAIJ*)a->B->data; 1416 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1417 row = a->rstart; 1418 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1419 ct = cols; 1420 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1421 for (i=0; i<m; i++) { 1422 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1423 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1424 } 1425 ierr = PetscFree(ct);CHKERRQ(ierr); 1426 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1427 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1428 if (matout) { 1429 *matout = B; 1430 } else { 1431 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1432 } 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNCT__ 1437 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1438 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1439 { 1440 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1441 Mat a = aij->A,b = aij->B; 1442 PetscErrorCode ierr; 1443 PetscInt s1,s2,s3; 1444 1445 PetscFunctionBegin; 1446 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1447 if (rr) { 1448 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1449 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1450 /* Overlap communication with computation. */ 1451 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1452 } 1453 if (ll) { 1454 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1455 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1456 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1457 } 1458 /* scale the diagonal block */ 1459 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1460 1461 if (rr) { 1462 /* Do a scatter end and then right scale the off-diagonal block */ 1463 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1464 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1465 } 1466 1467 PetscFunctionReturn(0); 1468 } 1469 1470 1471 #undef __FUNCT__ 1472 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1473 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1474 { 1475 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1476 PetscErrorCode ierr; 1477 1478 PetscFunctionBegin; 1479 if (!a->rank) { 1480 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1481 } 1482 PetscFunctionReturn(0); 1483 } 1484 1485 #undef __FUNCT__ 1486 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1487 PetscErrorCode MatGetBlockSize_MPIAIJ(Mat A,PetscInt *bs) 1488 { 1489 PetscFunctionBegin; 1490 *bs = 1; 1491 PetscFunctionReturn(0); 1492 } 1493 #undef __FUNCT__ 1494 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1495 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1496 { 1497 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1498 PetscErrorCode ierr; 1499 1500 PetscFunctionBegin; 1501 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1502 PetscFunctionReturn(0); 1503 } 1504 1505 #undef __FUNCT__ 1506 #define __FUNCT__ "MatEqual_MPIAIJ" 1507 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1508 { 1509 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1510 Mat a,b,c,d; 1511 PetscTruth flg; 1512 PetscErrorCode ierr; 1513 1514 PetscFunctionBegin; 1515 a = matA->A; b = matA->B; 1516 c = matB->A; d = matB->B; 1517 1518 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1519 if (flg == PETSC_TRUE) { 1520 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1521 } 1522 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1523 PetscFunctionReturn(0); 1524 } 1525 1526 #undef __FUNCT__ 1527 #define __FUNCT__ "MatCopy_MPIAIJ" 1528 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1529 { 1530 PetscErrorCode ierr; 1531 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1532 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1533 1534 PetscFunctionBegin; 1535 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1536 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1537 /* because of the column compression in the off-processor part of the matrix a->B, 1538 the number of columns in a->B and b->B may be different, hence we cannot call 1539 the MatCopy() directly on the two parts. If need be, we can provide a more 1540 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1541 then copying the submatrices */ 1542 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1543 } else { 1544 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1545 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1546 } 1547 PetscFunctionReturn(0); 1548 } 1549 1550 #undef __FUNCT__ 1551 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1552 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1553 { 1554 PetscErrorCode ierr; 1555 1556 PetscFunctionBegin; 1557 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1558 PetscFunctionReturn(0); 1559 } 1560 1561 #include "petscblaslapack.h" 1562 #undef __FUNCT__ 1563 #define __FUNCT__ "MatAXPY_MPIAIJ" 1564 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1565 { 1566 PetscErrorCode ierr; 1567 PetscInt i; 1568 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1569 PetscBLASInt bnz,one=1; 1570 Mat_SeqAIJ *x,*y; 1571 1572 PetscFunctionBegin; 1573 if (str == SAME_NONZERO_PATTERN) { 1574 x = (Mat_SeqAIJ *)xx->A->data; 1575 y = (Mat_SeqAIJ *)yy->A->data; 1576 bnz = (PetscBLASInt)x->nz; 1577 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1578 x = (Mat_SeqAIJ *)xx->B->data; 1579 y = (Mat_SeqAIJ *)yy->B->data; 1580 bnz = (PetscBLASInt)x->nz; 1581 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1582 } else if (str == SUBSET_NONZERO_PATTERN) { 1583 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1584 1585 x = (Mat_SeqAIJ *)xx->B->data; 1586 y = (Mat_SeqAIJ *)yy->B->data; 1587 if (y->xtoy && y->XtoY != xx->B) { 1588 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1589 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1590 } 1591 if (!y->xtoy) { /* get xtoy */ 1592 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1593 y->XtoY = xx->B; 1594 } 1595 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1596 } else { 1597 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1598 } 1599 PetscFunctionReturn(0); 1600 } 1601 1602 /* -------------------------------------------------------------------*/ 1603 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1604 MatGetRow_MPIAIJ, 1605 MatRestoreRow_MPIAIJ, 1606 MatMult_MPIAIJ, 1607 /* 4*/ MatMultAdd_MPIAIJ, 1608 MatMultTranspose_MPIAIJ, 1609 MatMultTransposeAdd_MPIAIJ, 1610 0, 1611 0, 1612 0, 1613 /*10*/ 0, 1614 0, 1615 0, 1616 MatRelax_MPIAIJ, 1617 MatTranspose_MPIAIJ, 1618 /*15*/ MatGetInfo_MPIAIJ, 1619 MatEqual_MPIAIJ, 1620 MatGetDiagonal_MPIAIJ, 1621 MatDiagonalScale_MPIAIJ, 1622 MatNorm_MPIAIJ, 1623 /*20*/ MatAssemblyBegin_MPIAIJ, 1624 MatAssemblyEnd_MPIAIJ, 1625 0, 1626 MatSetOption_MPIAIJ, 1627 MatZeroEntries_MPIAIJ, 1628 /*25*/ MatZeroRows_MPIAIJ, 1629 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_64BIT_INT) 1630 MatLUFactorSymbolic_MPIAIJ_TFS, 1631 #else 1632 0, 1633 #endif 1634 0, 1635 0, 1636 0, 1637 /*30*/ MatSetUpPreallocation_MPIAIJ, 1638 0, 1639 0, 1640 0, 1641 0, 1642 /*35*/ MatDuplicate_MPIAIJ, 1643 0, 1644 0, 1645 0, 1646 0, 1647 /*40*/ MatAXPY_MPIAIJ, 1648 MatGetSubMatrices_MPIAIJ, 1649 MatIncreaseOverlap_MPIAIJ, 1650 MatGetValues_MPIAIJ, 1651 MatCopy_MPIAIJ, 1652 /*45*/ MatPrintHelp_MPIAIJ, 1653 MatScale_MPIAIJ, 1654 0, 1655 0, 1656 0, 1657 /*50*/ MatGetBlockSize_MPIAIJ, 1658 0, 1659 0, 1660 0, 1661 0, 1662 /*55*/ MatFDColoringCreate_MPIAIJ, 1663 0, 1664 MatSetUnfactored_MPIAIJ, 1665 0, 1666 0, 1667 /*60*/ MatGetSubMatrix_MPIAIJ, 1668 MatDestroy_MPIAIJ, 1669 MatView_MPIAIJ, 1670 MatGetPetscMaps_Petsc, 1671 0, 1672 /*65*/ 0, 1673 0, 1674 0, 1675 0, 1676 0, 1677 /*70*/ 0, 1678 0, 1679 MatSetColoring_MPIAIJ, 1680 MatSetValuesAdic_MPIAIJ, 1681 MatSetValuesAdifor_MPIAIJ, 1682 /*75*/ 0, 1683 0, 1684 0, 1685 0, 1686 0, 1687 /*80*/ 0, 1688 0, 1689 0, 1690 0, 1691 /*84*/ MatLoad_MPIAIJ, 1692 0, 1693 0, 1694 0, 1695 0, 1696 0, 1697 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1698 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1699 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1700 MatPtAP_MPIAIJ_MPIAIJ, 1701 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1702 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 1703 0, 1704 0, 1705 0}; 1706 1707 /* ----------------------------------------------------------------------------------------*/ 1708 1709 EXTERN_C_BEGIN 1710 #undef __FUNCT__ 1711 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1712 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 1713 { 1714 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1715 PetscErrorCode ierr; 1716 1717 PetscFunctionBegin; 1718 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1719 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1720 PetscFunctionReturn(0); 1721 } 1722 EXTERN_C_END 1723 1724 EXTERN_C_BEGIN 1725 #undef __FUNCT__ 1726 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1727 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 1728 { 1729 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1730 PetscErrorCode ierr; 1731 1732 PetscFunctionBegin; 1733 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1734 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1735 PetscFunctionReturn(0); 1736 } 1737 EXTERN_C_END 1738 1739 #include "petscpc.h" 1740 EXTERN_C_BEGIN 1741 #undef __FUNCT__ 1742 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1743 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1744 { 1745 Mat_MPIAIJ *b; 1746 PetscErrorCode ierr; 1747 PetscInt i; 1748 1749 PetscFunctionBegin; 1750 B->preallocated = PETSC_TRUE; 1751 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1752 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1753 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1754 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1755 if (d_nnz) { 1756 for (i=0; i<B->m; i++) { 1757 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1758 } 1759 } 1760 if (o_nnz) { 1761 for (i=0; i<B->m; i++) { 1762 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1763 } 1764 } 1765 b = (Mat_MPIAIJ*)B->data; 1766 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1767 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1768 1769 PetscFunctionReturn(0); 1770 } 1771 EXTERN_C_END 1772 1773 /*MC 1774 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1775 1776 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1777 and MATMPIAIJ otherwise. As a result, for single process communicators, 1778 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 1779 for communicators controlling multiple processes. It is recommended that you call both of 1780 the above preallocation routines for simplicity. 1781 1782 Options Database Keys: 1783 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 1784 1785 Level: beginner 1786 1787 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ 1788 M*/ 1789 1790 EXTERN_C_BEGIN 1791 #undef __FUNCT__ 1792 #define __FUNCT__ "MatCreate_AIJ" 1793 PetscErrorCode MatCreate_AIJ(Mat A) 1794 { 1795 PetscErrorCode ierr; 1796 PetscMPIInt size; 1797 1798 PetscFunctionBegin; 1799 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr); 1800 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1801 if (size == 1) { 1802 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1803 } else { 1804 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1805 } 1806 PetscFunctionReturn(0); 1807 } 1808 EXTERN_C_END 1809 1810 #undef __FUNCT__ 1811 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1812 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1813 { 1814 Mat mat; 1815 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1816 PetscErrorCode ierr; 1817 1818 PetscFunctionBegin; 1819 *newmat = 0; 1820 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1821 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1822 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1823 a = (Mat_MPIAIJ*)mat->data; 1824 1825 mat->factor = matin->factor; 1826 mat->assembled = PETSC_TRUE; 1827 mat->insertmode = NOT_SET_VALUES; 1828 mat->preallocated = PETSC_TRUE; 1829 1830 a->rstart = oldmat->rstart; 1831 a->rend = oldmat->rend; 1832 a->cstart = oldmat->cstart; 1833 a->cend = oldmat->cend; 1834 a->size = oldmat->size; 1835 a->rank = oldmat->rank; 1836 a->donotstash = oldmat->donotstash; 1837 a->roworiented = oldmat->roworiented; 1838 a->rowindices = 0; 1839 a->rowvalues = 0; 1840 a->getrowactive = PETSC_FALSE; 1841 1842 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1843 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1844 if (oldmat->colmap) { 1845 #if defined (PETSC_USE_CTABLE) 1846 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1847 #else 1848 ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1849 PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt)); 1850 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1851 #endif 1852 } else a->colmap = 0; 1853 if (oldmat->garray) { 1854 PetscInt len; 1855 len = oldmat->B->n; 1856 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1857 PetscLogObjectMemory(mat,len*sizeof(PetscInt)); 1858 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1859 } else a->garray = 0; 1860 1861 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1862 PetscLogObjectParent(mat,a->lvec); 1863 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1864 PetscLogObjectParent(mat,a->Mvctx); 1865 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1866 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1867 PetscLogObjectParent(mat,a->A); 1868 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1869 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1870 PetscLogObjectParent(mat,a->B); 1871 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1872 *newmat = mat; 1873 PetscFunctionReturn(0); 1874 } 1875 1876 #include "petscsys.h" 1877 1878 #undef __FUNCT__ 1879 #define __FUNCT__ "MatLoad_MPIAIJ" 1880 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1881 { 1882 Mat A; 1883 PetscScalar *vals,*svals; 1884 MPI_Comm comm = ((PetscObject)viewer)->comm; 1885 MPI_Status status; 1886 PetscErrorCode ierr; 1887 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*rowners,maxnz,mm; 1888 PetscInt i,nz,j,rstart,rend; 1889 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1890 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1891 PetscInt cend,cstart,n; 1892 int fd; 1893 1894 PetscFunctionBegin; 1895 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1896 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1897 if (!rank) { 1898 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1899 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1900 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1901 if (header[3] < 0) { 1902 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1903 } 1904 } 1905 1906 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1907 M = header[1]; N = header[2]; 1908 /* determine ownership of all rows */ 1909 m = M/size + ((M % size) > rank); 1910 mm = (PetscMPIInt) m; 1911 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1912 ierr = MPI_Allgather(&mm,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1913 rowners[0] = 0; 1914 for (i=2; i<=size; i++) { 1915 rowners[i] += rowners[i-1]; 1916 } 1917 rstart = rowners[rank]; 1918 rend = rowners[rank+1]; 1919 1920 /* distribute row lengths to all processors */ 1921 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr); 1922 offlens = ourlens + (rend-rstart); 1923 if (!rank) { 1924 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1925 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1926 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1927 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1928 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1929 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1930 } else { 1931 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1932 } 1933 1934 if (!rank) { 1935 /* calculate the number of nonzeros on each processor */ 1936 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1937 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1938 for (i=0; i<size; i++) { 1939 for (j=rowners[i]; j< rowners[i+1]; j++) { 1940 procsnz[i] += rowlengths[j]; 1941 } 1942 } 1943 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1944 1945 /* determine max buffer needed and allocate it */ 1946 maxnz = 0; 1947 for (i=0; i<size; i++) { 1948 maxnz = PetscMax(maxnz,procsnz[i]); 1949 } 1950 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1951 1952 /* read in my part of the matrix column indices */ 1953 nz = procsnz[0]; 1954 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1955 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1956 1957 /* read in every one elses and ship off */ 1958 for (i=1; i<size; i++) { 1959 nz = procsnz[i]; 1960 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1961 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1962 } 1963 ierr = PetscFree(cols);CHKERRQ(ierr); 1964 } else { 1965 /* determine buffer space needed for message */ 1966 nz = 0; 1967 for (i=0; i<m; i++) { 1968 nz += ourlens[i]; 1969 } 1970 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1971 1972 /* receive message of column indices*/ 1973 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1974 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1975 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1976 } 1977 1978 /* determine column ownership if matrix is not square */ 1979 if (N != M) { 1980 n = N/size + ((N % size) > rank); 1981 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1982 cstart = cend - n; 1983 } else { 1984 cstart = rstart; 1985 cend = rend; 1986 n = cend - cstart; 1987 } 1988 1989 /* loop over local rows, determining number of off diagonal entries */ 1990 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1991 jj = 0; 1992 for (i=0; i<m; i++) { 1993 for (j=0; j<ourlens[i]; j++) { 1994 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1995 jj++; 1996 } 1997 } 1998 1999 /* create our matrix */ 2000 for (i=0; i<m; i++) { 2001 ourlens[i] -= offlens[i]; 2002 } 2003 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2004 ierr = MatSetType(A,type);CHKERRQ(ierr); 2005 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2006 2007 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2008 for (i=0; i<m; i++) { 2009 ourlens[i] += offlens[i]; 2010 } 2011 2012 if (!rank) { 2013 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2014 2015 /* read in my part of the matrix numerical values */ 2016 nz = procsnz[0]; 2017 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2018 2019 /* insert into matrix */ 2020 jj = rstart; 2021 smycols = mycols; 2022 svals = vals; 2023 for (i=0; i<m; i++) { 2024 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2025 smycols += ourlens[i]; 2026 svals += ourlens[i]; 2027 jj++; 2028 } 2029 2030 /* read in other processors and ship out */ 2031 for (i=1; i<size; i++) { 2032 nz = procsnz[i]; 2033 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2034 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2035 } 2036 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2037 } else { 2038 /* receive numeric values */ 2039 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2040 2041 /* receive message of values*/ 2042 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2043 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2044 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2045 2046 /* insert into matrix */ 2047 jj = rstart; 2048 smycols = mycols; 2049 svals = vals; 2050 for (i=0; i<m; i++) { 2051 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2052 smycols += ourlens[i]; 2053 svals += ourlens[i]; 2054 jj++; 2055 } 2056 } 2057 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2058 ierr = PetscFree(vals);CHKERRQ(ierr); 2059 ierr = PetscFree(mycols);CHKERRQ(ierr); 2060 ierr = PetscFree(rowners);CHKERRQ(ierr); 2061 2062 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2063 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2064 *newmat = A; 2065 PetscFunctionReturn(0); 2066 } 2067 2068 #undef __FUNCT__ 2069 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2070 /* 2071 Not great since it makes two copies of the submatrix, first an SeqAIJ 2072 in local and then by concatenating the local matrices the end result. 2073 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2074 */ 2075 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2076 { 2077 PetscErrorCode ierr; 2078 PetscMPIInt rank,size; 2079 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2080 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2081 Mat *local,M,Mreuse; 2082 PetscScalar *vwork,*aa; 2083 MPI_Comm comm = mat->comm; 2084 Mat_SeqAIJ *aij; 2085 2086 2087 PetscFunctionBegin; 2088 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2089 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2090 2091 if (call == MAT_REUSE_MATRIX) { 2092 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2093 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2094 local = &Mreuse; 2095 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2096 } else { 2097 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2098 Mreuse = *local; 2099 ierr = PetscFree(local);CHKERRQ(ierr); 2100 } 2101 2102 /* 2103 m - number of local rows 2104 n - number of columns (same on all processors) 2105 rstart - first row in new global matrix generated 2106 */ 2107 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2108 if (call == MAT_INITIAL_MATRIX) { 2109 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2110 ii = aij->i; 2111 jj = aij->j; 2112 2113 /* 2114 Determine the number of non-zeros in the diagonal and off-diagonal 2115 portions of the matrix in order to do correct preallocation 2116 */ 2117 2118 /* first get start and end of "diagonal" columns */ 2119 if (csize == PETSC_DECIDE) { 2120 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2121 if (mglobal == n) { /* square matrix */ 2122 nlocal = m; 2123 } else { 2124 nlocal = n/size + ((n % size) > rank); 2125 } 2126 } else { 2127 nlocal = csize; 2128 } 2129 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2130 rstart = rend - nlocal; 2131 if (rank == size - 1 && rend != n) { 2132 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2133 } 2134 2135 /* next, compute all the lengths */ 2136 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2137 olens = dlens + m; 2138 for (i=0; i<m; i++) { 2139 jend = ii[i+1] - ii[i]; 2140 olen = 0; 2141 dlen = 0; 2142 for (j=0; j<jend; j++) { 2143 if (*jj < rstart || *jj >= rend) olen++; 2144 else dlen++; 2145 jj++; 2146 } 2147 olens[i] = olen; 2148 dlens[i] = dlen; 2149 } 2150 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2151 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2152 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2153 ierr = PetscFree(dlens);CHKERRQ(ierr); 2154 } else { 2155 PetscInt ml,nl; 2156 2157 M = *newmat; 2158 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2159 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2160 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2161 /* 2162 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2163 rather than the slower MatSetValues(). 2164 */ 2165 M->was_assembled = PETSC_TRUE; 2166 M->assembled = PETSC_FALSE; 2167 } 2168 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2169 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2170 ii = aij->i; 2171 jj = aij->j; 2172 aa = aij->a; 2173 for (i=0; i<m; i++) { 2174 row = rstart + i; 2175 nz = ii[i+1] - ii[i]; 2176 cwork = jj; jj += nz; 2177 vwork = aa; aa += nz; 2178 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2179 } 2180 2181 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2182 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2183 *newmat = M; 2184 2185 /* save submatrix used in processor for next request */ 2186 if (call == MAT_INITIAL_MATRIX) { 2187 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2188 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2189 } 2190 2191 PetscFunctionReturn(0); 2192 } 2193 2194 #undef __FUNCT__ 2195 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2196 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2197 { 2198 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2199 PetscInt m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2200 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2201 const PetscInt *JJ; 2202 PetscScalar *values; 2203 PetscErrorCode ierr; 2204 2205 PetscFunctionBegin; 2206 #if defined(PETSC_OPT_g) 2207 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2208 #endif 2209 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2210 o_nnz = d_nnz + m; 2211 2212 for (i=0; i<m; i++) { 2213 nnz = I[i+1]- I[i]; 2214 JJ = J + I[i]; 2215 nnz_max = PetscMax(nnz_max,nnz); 2216 #if defined(PETSC_OPT_g) 2217 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2218 #endif 2219 for (j=0; j<nnz; j++) { 2220 if (*JJ >= cstart) break; 2221 JJ++; 2222 } 2223 d = 0; 2224 for (; j<nnz; j++) { 2225 if (*JJ++ >= cend) break; 2226 d++; 2227 } 2228 d_nnz[i] = d; 2229 o_nnz[i] = nnz - d; 2230 } 2231 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2232 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2233 2234 if (v) values = (PetscScalar*)v; 2235 else { 2236 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2237 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2238 } 2239 2240 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2241 for (i=0; i<m; i++) { 2242 ii = i + rstart; 2243 nnz = I[i+1]- I[i]; 2244 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr); 2245 } 2246 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2247 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2248 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2249 2250 if (!v) { 2251 ierr = PetscFree(values);CHKERRQ(ierr); 2252 } 2253 PetscFunctionReturn(0); 2254 } 2255 2256 #undef __FUNCT__ 2257 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2258 /*@C 2259 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2260 (the default parallel PETSc format). 2261 2262 Collective on MPI_Comm 2263 2264 Input Parameters: 2265 + A - the matrix 2266 . i - the indices into j for the start of each local row (starts with zero) 2267 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2268 - v - optional values in the matrix 2269 2270 Level: developer 2271 2272 .keywords: matrix, aij, compressed row, sparse, parallel 2273 2274 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2275 @*/ 2276 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2277 { 2278 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2279 2280 PetscFunctionBegin; 2281 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2282 if (f) { 2283 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2284 } 2285 PetscFunctionReturn(0); 2286 } 2287 2288 #undef __FUNCT__ 2289 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2290 /*@C 2291 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2292 (the default parallel PETSc format). For good matrix assembly performance 2293 the user should preallocate the matrix storage by setting the parameters 2294 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2295 performance can be increased by more than a factor of 50. 2296 2297 Collective on MPI_Comm 2298 2299 Input Parameters: 2300 + A - the matrix 2301 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2302 (same value is used for all local rows) 2303 . d_nnz - array containing the number of nonzeros in the various rows of the 2304 DIAGONAL portion of the local submatrix (possibly different for each row) 2305 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2306 The size of this array is equal to the number of local rows, i.e 'm'. 2307 You must leave room for the diagonal entry even if it is zero. 2308 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2309 submatrix (same value is used for all local rows). 2310 - o_nnz - array containing the number of nonzeros in the various rows of the 2311 OFF-DIAGONAL portion of the local submatrix (possibly different for 2312 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2313 structure. The size of this array is equal to the number 2314 of local rows, i.e 'm'. 2315 2316 The AIJ format (also called the Yale sparse matrix format or 2317 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2318 storage. The stored row and column indices begin with zero. See the users manual for details. 2319 2320 The parallel matrix is partitioned such that the first m0 rows belong to 2321 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2322 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2323 2324 The DIAGONAL portion of the local submatrix of a processor can be defined 2325 as the submatrix which is obtained by extraction the part corresponding 2326 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2327 first row that belongs to the processor, and r2 is the last row belonging 2328 to the this processor. This is a square mxm matrix. The remaining portion 2329 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2330 2331 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2332 2333 Example usage: 2334 2335 Consider the following 8x8 matrix with 34 non-zero values, that is 2336 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2337 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2338 as follows: 2339 2340 .vb 2341 1 2 0 | 0 3 0 | 0 4 2342 Proc0 0 5 6 | 7 0 0 | 8 0 2343 9 0 10 | 11 0 0 | 12 0 2344 ------------------------------------- 2345 13 0 14 | 15 16 17 | 0 0 2346 Proc1 0 18 0 | 19 20 21 | 0 0 2347 0 0 0 | 22 23 0 | 24 0 2348 ------------------------------------- 2349 Proc2 25 26 27 | 0 0 28 | 29 0 2350 30 0 0 | 31 32 33 | 0 34 2351 .ve 2352 2353 This can be represented as a collection of submatrices as: 2354 2355 .vb 2356 A B C 2357 D E F 2358 G H I 2359 .ve 2360 2361 Where the submatrices A,B,C are owned by proc0, D,E,F are 2362 owned by proc1, G,H,I are owned by proc2. 2363 2364 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2365 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2366 The 'M','N' parameters are 8,8, and have the same values on all procs. 2367 2368 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2369 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2370 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2371 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2372 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2373 matrix, ans [DF] as another SeqAIJ matrix. 2374 2375 When d_nz, o_nz parameters are specified, d_nz storage elements are 2376 allocated for every row of the local diagonal submatrix, and o_nz 2377 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2378 One way to choose d_nz and o_nz is to use the max nonzerors per local 2379 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2380 In this case, the values of d_nz,o_nz are: 2381 .vb 2382 proc0 : dnz = 2, o_nz = 2 2383 proc1 : dnz = 3, o_nz = 2 2384 proc2 : dnz = 1, o_nz = 4 2385 .ve 2386 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2387 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2388 for proc3. i.e we are using 12+15+10=37 storage locations to store 2389 34 values. 2390 2391 When d_nnz, o_nnz parameters are specified, the storage is specified 2392 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2393 In the above case the values for d_nnz,o_nnz are: 2394 .vb 2395 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2396 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2397 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2398 .ve 2399 Here the space allocated is sum of all the above values i.e 34, and 2400 hence pre-allocation is perfect. 2401 2402 Level: intermediate 2403 2404 .keywords: matrix, aij, compressed row, sparse, parallel 2405 2406 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2407 MPIAIJ 2408 @*/ 2409 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2410 { 2411 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2412 2413 PetscFunctionBegin; 2414 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2415 if (f) { 2416 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2417 } 2418 PetscFunctionReturn(0); 2419 } 2420 2421 #undef __FUNCT__ 2422 #define __FUNCT__ "MatCreateMPIAIJ" 2423 /*@C 2424 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2425 (the default parallel PETSc format). For good matrix assembly performance 2426 the user should preallocate the matrix storage by setting the parameters 2427 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2428 performance can be increased by more than a factor of 50. 2429 2430 Collective on MPI_Comm 2431 2432 Input Parameters: 2433 + comm - MPI communicator 2434 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2435 This value should be the same as the local size used in creating the 2436 y vector for the matrix-vector product y = Ax. 2437 . n - This value should be the same as the local size used in creating the 2438 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2439 calculated if N is given) For square matrices n is almost always m. 2440 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2441 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2442 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2443 (same value is used for all local rows) 2444 . d_nnz - array containing the number of nonzeros in the various rows of the 2445 DIAGONAL portion of the local submatrix (possibly different for each row) 2446 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2447 The size of this array is equal to the number of local rows, i.e 'm'. 2448 You must leave room for the diagonal entry even if it is zero. 2449 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2450 submatrix (same value is used for all local rows). 2451 - o_nnz - array containing the number of nonzeros in the various rows of the 2452 OFF-DIAGONAL portion of the local submatrix (possibly different for 2453 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2454 structure. The size of this array is equal to the number 2455 of local rows, i.e 'm'. 2456 2457 Output Parameter: 2458 . A - the matrix 2459 2460 Notes: 2461 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2462 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2463 storage requirements for this matrix. 2464 2465 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2466 processor than it must be used on all processors that share the object for 2467 that argument. 2468 2469 The user MUST specify either the local or global matrix dimensions 2470 (possibly both). 2471 2472 The parallel matrix is partitioned such that the first m0 rows belong to 2473 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2474 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2475 2476 The DIAGONAL portion of the local submatrix of a processor can be defined 2477 as the submatrix which is obtained by extraction the part corresponding 2478 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2479 first row that belongs to the processor, and r2 is the last row belonging 2480 to the this processor. This is a square mxm matrix. The remaining portion 2481 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2482 2483 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2484 2485 When calling this routine with a single process communicator, a matrix of 2486 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2487 type of communicator, use the construction mechanism: 2488 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2489 2490 By default, this format uses inodes (identical nodes) when possible. 2491 We search for consecutive rows with the same nonzero structure, thereby 2492 reusing matrix information to achieve increased efficiency. 2493 2494 Options Database Keys: 2495 + -mat_aij_no_inode - Do not use inodes 2496 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2497 - -mat_aij_oneindex - Internally use indexing starting at 1 2498 rather than 0. Note that when calling MatSetValues(), 2499 the user still MUST index entries starting at 0! 2500 2501 2502 Example usage: 2503 2504 Consider the following 8x8 matrix with 34 non-zero values, that is 2505 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2506 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2507 as follows: 2508 2509 .vb 2510 1 2 0 | 0 3 0 | 0 4 2511 Proc0 0 5 6 | 7 0 0 | 8 0 2512 9 0 10 | 11 0 0 | 12 0 2513 ------------------------------------- 2514 13 0 14 | 15 16 17 | 0 0 2515 Proc1 0 18 0 | 19 20 21 | 0 0 2516 0 0 0 | 22 23 0 | 24 0 2517 ------------------------------------- 2518 Proc2 25 26 27 | 0 0 28 | 29 0 2519 30 0 0 | 31 32 33 | 0 34 2520 .ve 2521 2522 This can be represented as a collection of submatrices as: 2523 2524 .vb 2525 A B C 2526 D E F 2527 G H I 2528 .ve 2529 2530 Where the submatrices A,B,C are owned by proc0, D,E,F are 2531 owned by proc1, G,H,I are owned by proc2. 2532 2533 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2534 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2535 The 'M','N' parameters are 8,8, and have the same values on all procs. 2536 2537 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2538 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2539 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2540 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2541 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2542 matrix, ans [DF] as another SeqAIJ matrix. 2543 2544 When d_nz, o_nz parameters are specified, d_nz storage elements are 2545 allocated for every row of the local diagonal submatrix, and o_nz 2546 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2547 One way to choose d_nz and o_nz is to use the max nonzerors per local 2548 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2549 In this case, the values of d_nz,o_nz are: 2550 .vb 2551 proc0 : dnz = 2, o_nz = 2 2552 proc1 : dnz = 3, o_nz = 2 2553 proc2 : dnz = 1, o_nz = 4 2554 .ve 2555 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2556 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2557 for proc3. i.e we are using 12+15+10=37 storage locations to store 2558 34 values. 2559 2560 When d_nnz, o_nnz parameters are specified, the storage is specified 2561 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2562 In the above case the values for d_nnz,o_nnz are: 2563 .vb 2564 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2565 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2566 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2567 .ve 2568 Here the space allocated is sum of all the above values i.e 34, and 2569 hence pre-allocation is perfect. 2570 2571 Level: intermediate 2572 2573 .keywords: matrix, aij, compressed row, sparse, parallel 2574 2575 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2576 MPIAIJ 2577 @*/ 2578 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2579 { 2580 PetscErrorCode ierr; 2581 PetscMPIInt size; 2582 2583 PetscFunctionBegin; 2584 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2585 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2586 if (size > 1) { 2587 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2588 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2589 } else { 2590 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2591 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2592 } 2593 PetscFunctionReturn(0); 2594 } 2595 2596 #undef __FUNCT__ 2597 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2598 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2599 { 2600 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2601 2602 PetscFunctionBegin; 2603 *Ad = a->A; 2604 *Ao = a->B; 2605 *colmap = a->garray; 2606 PetscFunctionReturn(0); 2607 } 2608 2609 #undef __FUNCT__ 2610 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2611 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2612 { 2613 PetscErrorCode ierr; 2614 PetscInt i; 2615 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2616 2617 PetscFunctionBegin; 2618 if (coloring->ctype == IS_COLORING_LOCAL) { 2619 ISColoringValue *allcolors,*colors; 2620 ISColoring ocoloring; 2621 2622 /* set coloring for diagonal portion */ 2623 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2624 2625 /* set coloring for off-diagonal portion */ 2626 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2627 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2628 for (i=0; i<a->B->n; i++) { 2629 colors[i] = allcolors[a->garray[i]]; 2630 } 2631 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2632 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2633 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2634 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2635 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2636 ISColoringValue *colors; 2637 PetscInt *larray; 2638 ISColoring ocoloring; 2639 2640 /* set coloring for diagonal portion */ 2641 ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2642 for (i=0; i<a->A->n; i++) { 2643 larray[i] = i + a->cstart; 2644 } 2645 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2646 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2647 for (i=0; i<a->A->n; i++) { 2648 colors[i] = coloring->colors[larray[i]]; 2649 } 2650 ierr = PetscFree(larray);CHKERRQ(ierr); 2651 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2652 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2653 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2654 2655 /* set coloring for off-diagonal portion */ 2656 ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2657 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2658 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2659 for (i=0; i<a->B->n; i++) { 2660 colors[i] = coloring->colors[larray[i]]; 2661 } 2662 ierr = PetscFree(larray);CHKERRQ(ierr); 2663 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2664 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2665 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2666 } else { 2667 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2668 } 2669 2670 PetscFunctionReturn(0); 2671 } 2672 2673 #undef __FUNCT__ 2674 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2675 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2676 { 2677 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2678 PetscErrorCode ierr; 2679 2680 PetscFunctionBegin; 2681 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2682 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2683 PetscFunctionReturn(0); 2684 } 2685 2686 #undef __FUNCT__ 2687 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2688 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2689 { 2690 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2691 PetscErrorCode ierr; 2692 2693 PetscFunctionBegin; 2694 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2695 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2696 PetscFunctionReturn(0); 2697 } 2698 2699 #undef __FUNCT__ 2700 #define __FUNCT__ "MatMerge" 2701 /*@C 2702 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2703 matrices from each processor 2704 2705 Collective on MPI_Comm 2706 2707 Input Parameters: 2708 + comm - the communicators the parallel matrix will live on 2709 . inmat - the input sequential matrices 2710 . n - number of local columns (or PETSC_DECIDE) 2711 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2712 2713 Output Parameter: 2714 . outmat - the parallel matrix generated 2715 2716 Level: advanced 2717 2718 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2719 2720 @*/ 2721 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2722 { 2723 PetscErrorCode ierr; 2724 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2725 const PetscInt *indx; 2726 const PetscScalar *values; 2727 PetscMap columnmap,rowmap; 2728 2729 PetscFunctionBegin; 2730 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2731 /* 2732 PetscMPIInt rank; 2733 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2734 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N); 2735 */ 2736 if (scall == MAT_INITIAL_MATRIX){ 2737 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2738 if (n == PETSC_DECIDE){ 2739 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2740 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2741 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2742 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2743 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2744 } 2745 2746 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2747 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2748 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2749 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2750 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2751 2752 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2753 for (i=0;i<m;i++) { 2754 ierr = MatGetRow(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2755 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2756 ierr = MatRestoreRow(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2757 } 2758 /* This routine will ONLY return MPIAIJ type matrix */ 2759 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2760 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2761 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2762 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2763 2764 } else if (scall == MAT_REUSE_MATRIX){ 2765 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2766 } else { 2767 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2768 } 2769 2770 for (i=0;i<m;i++) { 2771 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2772 I = i + rstart; 2773 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2774 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2775 } 2776 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2777 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2778 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2779 2780 PetscFunctionReturn(0); 2781 } 2782 2783 #undef __FUNCT__ 2784 #define __FUNCT__ "MatFileSplit" 2785 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2786 { 2787 PetscErrorCode ierr; 2788 PetscMPIInt rank; 2789 PetscInt m,N,i,rstart,nnz; 2790 size_t len; 2791 const PetscInt *indx; 2792 PetscViewer out; 2793 char *name; 2794 Mat B; 2795 const PetscScalar *values; 2796 2797 PetscFunctionBegin; 2798 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2799 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2800 /* Should this be the type of the diagonal block of A? */ 2801 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2802 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2803 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2804 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2805 for (i=0;i<m;i++) { 2806 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2807 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2808 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2809 } 2810 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2811 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2812 2813 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2814 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2815 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2816 sprintf(name,"%s.%d",outfile,rank); 2817 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2818 ierr = PetscFree(name); 2819 ierr = MatView(B,out);CHKERRQ(ierr); 2820 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2821 ierr = MatDestroy(B);CHKERRQ(ierr); 2822 PetscFunctionReturn(0); 2823 } 2824 2825 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2826 #undef __FUNCT__ 2827 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2828 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2829 { 2830 PetscErrorCode ierr; 2831 Mat_Merge_SeqsToMPI *merge; 2832 PetscObjectContainer container; 2833 2834 PetscFunctionBegin; 2835 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2836 if (container) { 2837 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2838 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2839 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2840 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2841 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2842 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2843 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2844 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2845 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2846 if (merge->ci){ierr = PetscFree(merge->ci);CHKERRQ(ierr);} 2847 if (merge->cj){ierr = PetscFree(merge->cj);CHKERRQ(ierr);} 2848 if (merge->C_seq){ierr = MatDestroy(merge->C_seq);CHKERRQ(ierr);} 2849 2850 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2851 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2852 } 2853 ierr = PetscFree(merge);CHKERRQ(ierr); 2854 2855 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2856 PetscFunctionReturn(0); 2857 } 2858 2859 #include "src/mat/utils/freespace.h" 2860 #include "petscbt.h" 2861 #undef __FUNCT__ 2862 #define __FUNCT__ "MatMerge_SeqsToMPI" 2863 /*@C 2864 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2865 matrices from each processor 2866 2867 Collective on MPI_Comm 2868 2869 Input Parameters: 2870 + comm - the communicators the parallel matrix will live on 2871 . seqmat - the input sequential matrices 2872 . m - number of local rows (or PETSC_DECIDE) 2873 . n - number of local columns (or PETSC_DECIDE) 2874 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2875 2876 Output Parameter: 2877 . mpimat - the parallel matrix generated 2878 2879 Level: advanced 2880 2881 Notes: 2882 The dimensions of the sequential matrix in each processor MUST be the same. 2883 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2884 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2885 @*/ 2886 static PetscEvent logkey_seqstompinum = 0; 2887 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2888 { 2889 PetscErrorCode ierr; 2890 MPI_Comm comm=mpimat->comm; 2891 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2892 PetscMPIInt size,rank,taga,*len_s; 2893 PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2894 PetscInt proc,m; 2895 PetscInt **buf_ri,**buf_rj; 2896 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2897 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2898 MPI_Request *s_waits,*r_waits; 2899 MPI_Status *status; 2900 MatScalar *aa=a->a,**abuf_r,*ba_i; 2901 Mat_Merge_SeqsToMPI *merge; 2902 PetscObjectContainer container; 2903 2904 PetscFunctionBegin; 2905 if (!logkey_seqstompinum) { 2906 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2907 } 2908 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2909 2910 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2911 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2912 2913 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2914 if (container) { 2915 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2916 } 2917 bi = merge->bi; 2918 bj = merge->bj; 2919 buf_ri = merge->buf_ri; 2920 buf_rj = merge->buf_rj; 2921 2922 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2923 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2924 len_s = merge->len_s; 2925 2926 /* send and recv matrix values */ 2927 /*-----------------------------*/ 2928 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2929 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 2930 2931 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2932 for (proc=0,k=0; proc<size; proc++){ 2933 if (!len_s[proc]) continue; 2934 i = owners[proc]; 2935 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 2936 k++; 2937 } 2938 2939 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 2940 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 2941 ierr = PetscFree(status);CHKERRQ(ierr); 2942 2943 ierr = PetscFree(s_waits);CHKERRQ(ierr); 2944 ierr = PetscFree(r_waits);CHKERRQ(ierr); 2945 2946 /* insert mat values of mpimat */ 2947 /*----------------------------*/ 2948 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 2949 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 2950 nextrow = buf_ri_k + merge->nrecv; 2951 nextai = nextrow + merge->nrecv; 2952 2953 for (k=0; k<merge->nrecv; k++){ 2954 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 2955 nrows = *(buf_ri_k[k]); 2956 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 2957 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 2958 } 2959 2960 /* set values of ba */ 2961 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2962 for (i=0; i<m; i++) { 2963 arow = owners[rank] + i; 2964 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 2965 bnzi = bi[i+1] - bi[i]; 2966 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 2967 2968 /* add local non-zero vals of this proc's seqmat into ba */ 2969 anzi = ai[arow+1] - ai[arow]; 2970 aj = a->j + ai[arow]; 2971 aa = a->a + ai[arow]; 2972 nextaj = 0; 2973 for (j=0; nextaj<anzi; j++){ 2974 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2975 ba_i[j] += aa[nextaj++]; 2976 } 2977 } 2978 2979 /* add received vals into ba */ 2980 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 2981 /* i-th row */ 2982 if (i == *nextrow[k]) { 2983 anzi = *(nextai[k]+1) - *nextai[k]; 2984 aj = buf_rj[k] + *(nextai[k]); 2985 aa = abuf_r[k] + *(nextai[k]); 2986 nextaj = 0; 2987 for (j=0; nextaj<anzi; j++){ 2988 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2989 ba_i[j] += aa[nextaj++]; 2990 } 2991 } 2992 nextrow[k]++; nextai[k]++; 2993 } 2994 } 2995 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 2996 } 2997 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2998 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2999 3000 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3001 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3002 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3003 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3004 PetscFunctionReturn(0); 3005 } 3006 static PetscEvent logkey_seqstompisym = 0; 3007 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3008 { 3009 PetscErrorCode ierr; 3010 Mat B_mpi; 3011 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3012 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3013 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3014 PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 3015 PetscInt len,proc,*dnz,*onz; 3016 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3017 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3018 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3019 MPI_Status *status; 3020 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3021 PetscBT lnkbt; 3022 Mat_Merge_SeqsToMPI *merge; 3023 PetscObjectContainer container; 3024 3025 PetscFunctionBegin; 3026 if (!logkey_seqstompisym) { 3027 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3028 } 3029 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3030 3031 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3032 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3033 3034 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3035 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3036 3037 /* determine row ownership */ 3038 /*---------------------------------------------------------*/ 3039 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3040 if (m == PETSC_DECIDE) { 3041 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 3042 } else { 3043 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3044 } 3045 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 3046 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3047 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3048 3049 if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); } 3050 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3051 3052 /* determine the number of messages to send, their lengths */ 3053 /*---------------------------------------------------------*/ 3054 len_s = merge->len_s; 3055 3056 len = 0; /* length of buf_si[] */ 3057 merge->nsend = 0; 3058 for (proc=0; proc<size; proc++){ 3059 len_si[proc] = 0; 3060 if (proc == rank){ 3061 len_s[proc] = 0; 3062 } else { 3063 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3064 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3065 } 3066 if (len_s[proc]) { 3067 merge->nsend++; 3068 nrows = 0; 3069 for (i=owners[proc]; i<owners[proc+1]; i++){ 3070 if (ai[i+1] > ai[i]) nrows++; 3071 } 3072 len_si[proc] = 2*(nrows+1); 3073 len += len_si[proc]; 3074 } 3075 } 3076 3077 /* determine the number and length of messages to receive for ij-structure */ 3078 /*-------------------------------------------------------------------------*/ 3079 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3080 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3081 3082 /* post the Irecv of j-structure */ 3083 /*-------------------------------*/ 3084 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3085 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3086 3087 /* post the Isend of j-structure */ 3088 /*--------------------------------*/ 3089 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3090 sj_waits = si_waits + merge->nsend; 3091 3092 for (proc=0, k=0; proc<size; proc++){ 3093 if (!len_s[proc]) continue; 3094 i = owners[proc]; 3095 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3096 k++; 3097 } 3098 3099 /* receives and sends of j-structure are complete */ 3100 /*------------------------------------------------*/ 3101 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 3102 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 3103 3104 /* send and recv i-structure */ 3105 /*---------------------------*/ 3106 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3107 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3108 3109 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3110 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3111 for (proc=0,k=0; proc<size; proc++){ 3112 if (!len_s[proc]) continue; 3113 /* form outgoing message for i-structure: 3114 buf_si[0]: nrows to be sent 3115 [1:nrows]: row index (global) 3116 [nrows+1:2*nrows+1]: i-structure index 3117 */ 3118 /*-------------------------------------------*/ 3119 nrows = len_si[proc]/2 - 1; 3120 buf_si_i = buf_si + nrows+1; 3121 buf_si[0] = nrows; 3122 buf_si_i[0] = 0; 3123 nrows = 0; 3124 for (i=owners[proc]; i<owners[proc+1]; i++){ 3125 anzi = ai[i+1] - ai[i]; 3126 if (anzi) { 3127 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3128 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3129 nrows++; 3130 } 3131 } 3132 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3133 k++; 3134 buf_si += len_si[proc]; 3135 } 3136 3137 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 3138 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 3139 3140 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3141 for (i=0; i<merge->nrecv; i++){ 3142 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3143 } 3144 3145 ierr = PetscFree(len_si);CHKERRQ(ierr); 3146 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3147 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3148 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3149 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3150 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3151 ierr = PetscFree(status);CHKERRQ(ierr); 3152 3153 /* compute a local seq matrix in each processor */ 3154 /*----------------------------------------------*/ 3155 /* allocate bi array and free space for accumulating nonzero column info */ 3156 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3157 bi[0] = 0; 3158 3159 /* create and initialize a linked list */ 3160 nlnk = N+1; 3161 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3162 3163 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3164 len = 0; 3165 len = ai[owners[rank+1]] - ai[owners[rank]]; 3166 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3167 current_space = free_space; 3168 3169 /* determine symbolic info for each local row */ 3170 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3171 nextrow = buf_ri_k + merge->nrecv; 3172 nextai = nextrow + merge->nrecv; 3173 for (k=0; k<merge->nrecv; k++){ 3174 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3175 nrows = *buf_ri_k[k]; 3176 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3177 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3178 } 3179 3180 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3181 len = 0; 3182 for (i=0;i<m;i++) { 3183 bnzi = 0; 3184 /* add local non-zero cols of this proc's seqmat into lnk */ 3185 arow = owners[rank] + i; 3186 anzi = ai[arow+1] - ai[arow]; 3187 aj = a->j + ai[arow]; 3188 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3189 bnzi += nlnk; 3190 /* add received col data into lnk */ 3191 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3192 if (i == *nextrow[k]) { /* i-th row */ 3193 anzi = *(nextai[k]+1) - *nextai[k]; 3194 aj = buf_rj[k] + *nextai[k]; 3195 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3196 bnzi += nlnk; 3197 nextrow[k]++; nextai[k]++; 3198 } 3199 } 3200 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3201 3202 /* if free space is not available, make more free space */ 3203 if (current_space->local_remaining<bnzi) { 3204 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3205 nspacedouble++; 3206 } 3207 /* copy data into free space, then initialize lnk */ 3208 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3209 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3210 3211 current_space->array += bnzi; 3212 current_space->local_used += bnzi; 3213 current_space->local_remaining -= bnzi; 3214 3215 bi[i+1] = bi[i] + bnzi; 3216 } 3217 3218 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3219 3220 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3221 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3222 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3223 3224 /* create symbolic parallel matrix B_mpi */ 3225 /*---------------------------------------*/ 3226 if (n==PETSC_DECIDE) { 3227 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr); 3228 } else { 3229 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 3230 } 3231 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3232 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3233 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3234 3235 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3236 B_mpi->assembled = PETSC_FALSE; 3237 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3238 merge->bi = bi; 3239 merge->bj = bj; 3240 merge->buf_ri = buf_ri; 3241 merge->buf_rj = buf_rj; 3242 merge->C_seq = seqmat; 3243 merge->ci = PETSC_NULL; 3244 merge->cj = PETSC_NULL; 3245 3246 /* attach the supporting struct to B_mpi for reuse */ 3247 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3248 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3249 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3250 *mpimat = B_mpi; 3251 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3252 PetscFunctionReturn(0); 3253 } 3254 3255 static PetscEvent logkey_seqstompi = 0; 3256 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3257 { 3258 PetscErrorCode ierr; 3259 3260 PetscFunctionBegin; 3261 if (!logkey_seqstompi) { 3262 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3263 } 3264 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3265 if (scall == MAT_INITIAL_MATRIX){ 3266 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3267 } 3268 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3269 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3270 PetscFunctionReturn(0); 3271 } 3272 static PetscEvent logkey_getlocalmat = 0; 3273 #undef __FUNCT__ 3274 #define __FUNCT__ "MatGetLocalMat" 3275 /*@C 3276 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3277 3278 Not Collective 3279 3280 Input Parameters: 3281 + A - the matrix 3282 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3283 3284 Output Parameter: 3285 . A_loc - the local sequential matrix generated 3286 3287 Level: developer 3288 3289 @*/ 3290 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3291 { 3292 PetscErrorCode ierr; 3293 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3294 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3295 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3296 MatScalar *aa=a->a,*ba=b->a; 3297 PetscInt am=A->m,i,j,*nnz,nnz_max,ncols,*cols,cstart=mpimat->cstart; 3298 Mat Aw; 3299 3300 PetscFunctionBegin; 3301 if (!logkey_getlocalmat) { 3302 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3303 } 3304 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3305 if (scall == MAT_INITIAL_MATRIX){ 3306 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 3307 nnz_max = 0; 3308 for (i=0; i<am; i++){ 3309 nnz[i] = (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3310 if (nnz[i] > nnz_max) nnz_max = nnz[i]; 3311 } 3312 ierr = MatCreate(PETSC_COMM_SELF,am,A->N,am,A->N,&Aw);CHKERRQ(ierr); 3313 ierr = MatSetType(Aw,MATSEQAIJ);CHKERRQ(ierr); 3314 ierr = MatSeqAIJSetPreallocation(Aw,0,nnz);CHKERRQ(ierr); 3315 } else if (scall == MAT_REUSE_MATRIX){ 3316 Aw = *A_loc; 3317 mat=(Mat_SeqAIJ*)Aw->data; 3318 nnz_max = 0; 3319 for (i=0; i<am; i++){ 3320 j = mat->i[i+1] - mat->i[i]; 3321 if (j > nnz_max) nnz_max = j; 3322 } 3323 /* clear old values in *A_loc */ 3324 ierr = PetscMemzero(mat->a,mat->i[am]*sizeof(MatScalar));CHKERRQ(ierr); 3325 } else { 3326 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3327 } 3328 ierr = PetscMalloc((1+nnz_max)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3329 3330 for (i=0; i<am; i++) { 3331 /* diagonal portion of A */ 3332 ncols = ai[i+1] - ai[i]; 3333 for (j=0; j<ncols; j++) {cols[j] = cstart + *aj++;} 3334 ierr = MatSetValues(Aw,1,&i,ncols,cols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 3335 /* off-diagonal portion of A */ 3336 ncols = bi[i+1] - bi[i]; 3337 for (j=0; j<ncols; j++) cols[j] = cmap[*bj++]; /* cmap[*(bj+bi[i]+j)]; */ 3338 ierr = MatSetValues(Aw,1,&i,ncols,cols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 3339 } 3340 ierr = MatAssemblyBegin(Aw,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3341 ierr = MatAssemblyEnd(Aw,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3342 ierr = PetscFree(cols);CHKERRQ(ierr); 3343 if (scall == MAT_INITIAL_MATRIX){ 3344 ierr = PetscFree(nnz);CHKERRQ(ierr); 3345 *A_loc = Aw; 3346 } 3347 3348 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3349 PetscFunctionReturn(0); 3350 } 3351 3352 static PetscEvent logkey_getlocalmatcondensed = 0; 3353 #undef __FUNCT__ 3354 #define __FUNCT__ "MatGetLocalMatCondensed" 3355 /*@C 3356 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3357 3358 Not Collective 3359 3360 Input Parameters: 3361 + A - the matrix 3362 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3363 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3364 3365 Output Parameter: 3366 . A_loc - the local sequential matrix generated 3367 3368 Level: developer 3369 3370 @*/ 3371 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3372 { 3373 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3374 PetscErrorCode ierr; 3375 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3376 IS isrowa,iscola; 3377 Mat *aloc; 3378 3379 PetscFunctionBegin; 3380 if (!logkey_getlocalmatcondensed) { 3381 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3382 } 3383 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3384 if (!row){ 3385 start = a->rstart; end = a->rend; 3386 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3387 } else { 3388 isrowa = *row; 3389 } 3390 if (!col){ 3391 start = a->cstart; 3392 cmap = a->garray; 3393 nzA = a->A->n; 3394 nzB = a->B->n; 3395 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3396 ncols = 0; 3397 for (i=0; i<nzB; i++) { 3398 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3399 else break; 3400 } 3401 imark = i; 3402 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3403 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3404 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3405 ierr = PetscFree(idx);CHKERRQ(ierr); 3406 } else { 3407 iscola = *col; 3408 } 3409 if (scall != MAT_INITIAL_MATRIX){ 3410 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3411 aloc[0] = *A_loc; 3412 } 3413 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3414 *A_loc = aloc[0]; 3415 ierr = PetscFree(aloc);CHKERRQ(ierr); 3416 if (!row){ 3417 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3418 } 3419 if (!col){ 3420 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3421 } 3422 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3423 PetscFunctionReturn(0); 3424 } 3425 3426 static PetscEvent logkey_GetBrowsOfAcols = 0; 3427 #undef __FUNCT__ 3428 #define __FUNCT__ "MatGetBrowsOfAcols" 3429 /*@C 3430 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3431 3432 Collective on Mat 3433 3434 Input Parameters: 3435 + A,B - the matrices in mpiaij format 3436 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3437 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3438 3439 Output Parameter: 3440 + rowb, colb - index sets of rows and columns of B to extract 3441 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3442 - B_seq - the sequential matrix generated 3443 3444 Level: developer 3445 3446 @*/ 3447 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3448 { 3449 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3450 PetscErrorCode ierr; 3451 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3452 IS isrowb,iscolb; 3453 Mat *bseq; 3454 3455 PetscFunctionBegin; 3456 if (a->cstart != b->rstart || a->cend != b->rend){ 3457 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3458 } 3459 if (!logkey_GetBrowsOfAcols) { 3460 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3461 } 3462 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3463 3464 if (scall == MAT_INITIAL_MATRIX){ 3465 start = a->cstart; 3466 cmap = a->garray; 3467 nzA = a->A->n; 3468 nzB = a->B->n; 3469 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3470 ncols = 0; 3471 for (i=0; i<nzB; i++) { /* row < local row index */ 3472 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3473 else break; 3474 } 3475 imark = i; 3476 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3477 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3478 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3479 ierr = PetscFree(idx);CHKERRQ(ierr); 3480 *brstart = imark; 3481 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3482 } else { 3483 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3484 isrowb = *rowb; iscolb = *colb; 3485 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3486 bseq[0] = *B_seq; 3487 } 3488 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3489 *B_seq = bseq[0]; 3490 ierr = PetscFree(bseq);CHKERRQ(ierr); 3491 if (!rowb){ 3492 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3493 } else { 3494 *rowb = isrowb; 3495 } 3496 if (!colb){ 3497 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3498 } else { 3499 *colb = iscolb; 3500 } 3501 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3502 PetscFunctionReturn(0); 3503 } 3504 3505 static PetscEvent logkey_GetBrowsOfAocols = 0; 3506 #undef __FUNCT__ 3507 #define __FUNCT__ "MatGetBrowsOfAoCols" 3508 /*@C 3509 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3510 of the OFF-DIAGONAL portion of local A 3511 3512 Collective on Mat 3513 3514 Input Parameters: 3515 + A,B - the matrices in mpiaij format 3516 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3517 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3518 3519 Output Parameter: 3520 + rowb, colb - index sets of rows and columns of B to extract 3521 . brstart - row index of B_seq from which the remaining rows correspond to A's columns that are greater than cend. 3522 - B_seq - the sequential matrix generated 3523 3524 Level: developer 3525 3526 @*/ 3527 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3528 { 3529 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3530 PetscErrorCode ierr; 3531 PetscInt *idx,i,start,ncols,nzB,*cmap; 3532 IS isrowb,iscolb; 3533 Mat *bseq; 3534 3535 PetscFunctionBegin; 3536 if (a->cstart != b->rstart || a->cend != b->rend){ 3537 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3538 } 3539 if (!logkey_GetBrowsOfAocols) { 3540 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrowsOfAoCols",MAT_COOKIE); 3541 } 3542 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3543 3544 if (scall == MAT_INITIAL_MATRIX){ 3545 start = a->cstart; 3546 cmap = a->garray; 3547 nzB = a->B->n; 3548 if (!nzB){ /* output a null matrix */ 3549 B_seq = PETSC_NULL; 3550 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3551 PetscFunctionReturn(0); 3552 } 3553 ierr = PetscMalloc(nzB*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3554 ncols = 0; 3555 *brstart = nzB; 3556 for (i=0; i<nzB; i++) { /* row < local row index */ 3557 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3558 else break; 3559 } 3560 *brstart = i; 3561 for (i=*brstart; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3562 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3563 ierr = PetscFree(idx);CHKERRQ(ierr); 3564 3565 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3566 } else { 3567 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3568 isrowb = *rowb; iscolb = *colb; 3569 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3570 bseq[0] = *B_seq; 3571 } 3572 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3573 *B_seq = bseq[0]; 3574 ierr = PetscFree(bseq);CHKERRQ(ierr); 3575 if (!rowb){ 3576 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3577 } else { 3578 *rowb = isrowb; 3579 } 3580 if (!colb){ 3581 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3582 } else { 3583 *colb = iscolb; 3584 } 3585 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3586 PetscFunctionReturn(0); 3587 } 3588 3589 /*MC 3590 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3591 3592 Options Database Keys: 3593 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3594 3595 Level: beginner 3596 3597 .seealso: MatCreateMPIAIJ 3598 M*/ 3599 3600 EXTERN_C_BEGIN 3601 #undef __FUNCT__ 3602 #define __FUNCT__ "MatCreate_MPIAIJ" 3603 PetscErrorCode MatCreate_MPIAIJ(Mat B) 3604 { 3605 Mat_MPIAIJ *b; 3606 PetscErrorCode ierr; 3607 PetscInt i; 3608 PetscMPIInt size; 3609 3610 PetscFunctionBegin; 3611 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3612 3613 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3614 B->data = (void*)b; 3615 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 3616 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3617 B->factor = 0; 3618 B->assembled = PETSC_FALSE; 3619 B->mapping = 0; 3620 3621 B->insertmode = NOT_SET_VALUES; 3622 b->size = size; 3623 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3624 3625 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 3626 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 3627 3628 /* the information in the maps duplicates the information computed below, eventually 3629 we should remove the duplicate information that is not contained in the maps */ 3630 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 3631 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 3632 3633 /* build local table of row and column ownerships */ 3634 ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 3635 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 3636 b->cowners = b->rowners + b->size + 2; 3637 ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3638 b->rowners[0] = 0; 3639 for (i=2; i<=b->size; i++) { 3640 b->rowners[i] += b->rowners[i-1]; 3641 } 3642 b->rstart = b->rowners[b->rank]; 3643 b->rend = b->rowners[b->rank+1]; 3644 ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3645 b->cowners[0] = 0; 3646 for (i=2; i<=b->size; i++) { 3647 b->cowners[i] += b->cowners[i-1]; 3648 } 3649 b->cstart = b->cowners[b->rank]; 3650 b->cend = b->cowners[b->rank+1]; 3651 3652 /* build cache for off array entries formed */ 3653 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3654 b->donotstash = PETSC_FALSE; 3655 b->colmap = 0; 3656 b->garray = 0; 3657 b->roworiented = PETSC_TRUE; 3658 3659 /* stuff used for matrix vector multiply */ 3660 b->lvec = PETSC_NULL; 3661 b->Mvctx = PETSC_NULL; 3662 3663 /* stuff for MatGetRow() */ 3664 b->rowindices = 0; 3665 b->rowvalues = 0; 3666 b->getrowactive = PETSC_FALSE; 3667 3668 /* Explicitly create 2 MATSEQAIJ matrices. */ 3669 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 3670 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3671 PetscLogObjectParent(B,b->A); 3672 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 3673 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3674 PetscLogObjectParent(B,b->B); 3675 3676 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3677 "MatStoreValues_MPIAIJ", 3678 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3679 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3680 "MatRetrieveValues_MPIAIJ", 3681 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3682 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3683 "MatGetDiagonalBlock_MPIAIJ", 3684 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3685 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3686 "MatIsTranspose_MPIAIJ", 3687 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3688 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3689 "MatMPIAIJSetPreallocation_MPIAIJ", 3690 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3691 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3692 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3693 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3694 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3695 "MatDiagonalScaleLocal_MPIAIJ", 3696 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3697 PetscFunctionReturn(0); 3698 } 3699 EXTERN_C_END 3700