1 2 #include "src/mat/impls/aij/mpi/mpiaij.h" 3 #include "src/inline/spops.h" 4 5 /* 6 Local utility routine that creates a mapping from the global column 7 number to the local number in the off-diagonal part of the local 8 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 9 a slightly higher hash table cost; without it it is not scalable (each processor 10 has an order N integer array but is fast to acess. 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 17 PetscErrorCode ierr; 18 PetscInt n = aij->B->n,i; 19 20 PetscFunctionBegin; 21 #if defined (PETSC_USE_CTABLE) 22 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 23 for (i=0; i<n; i++){ 24 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 25 } 26 #else 27 ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 28 PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt)); 29 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 30 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 31 #endif 32 PetscFunctionReturn(0); 33 } 34 35 #define CHUNKSIZE 15 36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 40 rmax = aimax[row]; nrow = ailen[row]; \ 41 col1 = col - shift; \ 42 \ 43 low = 0; high = nrow; \ 44 while (high-low > 5) { \ 45 t = (low+high)/2; \ 46 if (rp[t] > col) high = t; \ 47 else low = t; \ 48 } \ 49 for (_i=low; _i<high; _i++) { \ 50 if (rp[_i] > col1) break; \ 51 if (rp[_i] == col1) { \ 52 if (addv == ADD_VALUES) ap[_i] += value; \ 53 else ap[_i] = value; \ 54 goto a_noinsert; \ 55 } \ 56 } \ 57 if (nonew == 1) goto a_noinsert; \ 58 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 59 if (nrow >= rmax) { \ 60 /* there is no extra room in row, therefore enlarge */ \ 61 PetscInt new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 62 PetscScalar *new_a; \ 63 \ 64 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 65 \ 66 /* malloc new storage space */ \ 67 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \ 68 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 69 new_j = (PetscInt*)(new_a + new_nz); \ 70 new_i = new_j + new_nz; \ 71 \ 72 /* copy over old data into new slots */ \ 73 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 74 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 75 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 76 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 77 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 78 len*sizeof(PetscInt));CHKERRQ(ierr); \ 79 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 81 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 82 /* free up old matrix storage */ \ 83 \ 84 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 85 if (!a->singlemalloc) { \ 86 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 87 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 88 } \ 89 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 90 a->singlemalloc = PETSC_TRUE; \ 91 \ 92 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 93 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 94 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \ 95 a->maxnz += CHUNKSIZE; \ 96 a->reallocs++; \ 97 } \ 98 N = nrow++ - 1; a->nz++; \ 99 /* shift up all the later entries in this row */ \ 100 for (ii=N; ii>=_i; ii--) { \ 101 rp[ii+1] = rp[ii]; \ 102 ap[ii+1] = ap[ii]; \ 103 } \ 104 rp[_i] = col1; \ 105 ap[_i] = value; \ 106 a_noinsert: ; \ 107 ailen[row] = nrow; \ 108 } 109 110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 111 { \ 112 \ 113 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 114 rmax = bimax[row]; nrow = bilen[row]; \ 115 col1 = col - shift; \ 116 \ 117 low = 0; high = nrow; \ 118 while (high-low > 5) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > col) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > col1) break; \ 125 if (rp[_i] == col1) { \ 126 if (addv == ADD_VALUES) ap[_i] += value; \ 127 else ap[_i] = value; \ 128 goto b_noinsert; \ 129 } \ 130 } \ 131 if (nonew == 1) goto b_noinsert; \ 132 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 133 if (nrow >= rmax) { \ 134 /* there is no extra room in row, therefore enlarge */ \ 135 PetscInt new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 136 PetscScalar *new_a; \ 137 \ 138 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 139 \ 140 /* malloc new storage space */ \ 141 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \ 142 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 143 new_j = (PetscInt*)(new_a + new_nz); \ 144 new_i = new_j + new_nz; \ 145 \ 146 /* copy over old data into new slots */ \ 147 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 148 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 149 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 150 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 151 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 152 len*sizeof(PetscInt));CHKERRQ(ierr); \ 153 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 154 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 155 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 156 /* free up old matrix storage */ \ 157 \ 158 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 159 if (!b->singlemalloc) { \ 160 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 161 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 162 } \ 163 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 164 b->singlemalloc = PETSC_TRUE; \ 165 \ 166 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 167 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 168 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \ 169 b->maxnz += CHUNKSIZE; \ 170 b->reallocs++; \ 171 } \ 172 N = nrow++ - 1; b->nz++; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ap[ii+1] = ap[ii]; \ 177 } \ 178 rp[_i] = col1; \ 179 ap[_i] = value; \ 180 b_noinsert: ; \ 181 bilen[row] = nrow; \ 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatSetValues_MPIAIJ" 186 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 187 { 188 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 189 PetscScalar value; 190 PetscErrorCode ierr; 191 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 192 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 193 PetscTruth roworiented = aij->roworiented; 194 195 /* Some Variables required in the macro */ 196 Mat A = aij->A; 197 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 198 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 199 PetscScalar *aa = a->a; 200 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 201 Mat B = aij->B; 202 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 203 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 204 PetscScalar *ba = b->a; 205 206 PetscInt *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 207 PetscInt nonew = a->nonew,shift=0; 208 PetscScalar *ap; 209 210 PetscFunctionBegin; 211 for (i=0; i<m; i++) { 212 if (im[i] < 0) continue; 213 #if defined(PETSC_USE_BOPT_g) 214 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 215 #endif 216 if (im[i] >= rstart && im[i] < rend) { 217 row = im[i] - rstart; 218 for (j=0; j<n; j++) { 219 if (in[j] >= cstart && in[j] < cend){ 220 col = in[j] - cstart; 221 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 222 if (ignorezeroentries && value == 0.0) continue; 223 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 224 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 225 } else if (in[j] < 0) continue; 226 #if defined(PETSC_USE_BOPT_g) 227 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 228 #endif 229 else { 230 if (mat->was_assembled) { 231 if (!aij->colmap) { 232 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 233 } 234 #if defined (PETSC_USE_CTABLE) 235 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 236 col--; 237 #else 238 col = aij->colmap[in[j]] - 1; 239 #endif 240 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 241 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 244 B = aij->B; 245 b = (Mat_SeqAIJ*)B->data; 246 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 247 ba = b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 if (ignorezeroentries && value == 0.0) continue; 252 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 253 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 254 } 255 } 256 } else { 257 if (!aij->donotstash) { 258 if (roworiented) { 259 if (ignorezeroentries && v[i*n] == 0.0) continue; 260 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 261 } else { 262 if (ignorezeroentries && v[i] == 0.0) continue; 263 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatGetValues_MPIAIJ" 273 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 274 { 275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 276 PetscErrorCode ierr; 277 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 278 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 279 280 PetscFunctionBegin; 281 for (i=0; i<m; i++) { 282 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 283 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 284 if (idxm[i] >= rstart && idxm[i] < rend) { 285 row = idxm[i] - rstart; 286 for (j=0; j<n; j++) { 287 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 288 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 289 if (idxn[j] >= cstart && idxn[j] < cend){ 290 col = idxn[j] - cstart; 291 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 292 } else { 293 if (!aij->colmap) { 294 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 295 } 296 #if defined (PETSC_USE_CTABLE) 297 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 298 col --; 299 #else 300 col = aij->colmap[idxn[j]] - 1; 301 #endif 302 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 303 else { 304 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 305 } 306 } 307 } 308 } else { 309 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 310 } 311 } 312 PetscFunctionReturn(0); 313 } 314 315 #undef __FUNCT__ 316 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 317 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 318 { 319 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 320 PetscErrorCode ierr; 321 PetscInt nstash,reallocs; 322 InsertMode addv; 323 324 PetscFunctionBegin; 325 if (aij->donotstash) { 326 PetscFunctionReturn(0); 327 } 328 329 /* make sure all processors are either in INSERTMODE or ADDMODE */ 330 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 331 if (addv == (ADD_VALUES|INSERT_VALUES)) { 332 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 333 } 334 mat->insertmode = addv; /* in case this processor had no cache */ 335 336 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 337 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 338 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs); 339 PetscFunctionReturn(0); 340 } 341 342 343 #undef __FUNCT__ 344 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 345 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 346 { 347 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 348 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 349 PetscErrorCode ierr; 350 PetscMPIInt n; 351 PetscInt i,j,rstart,ncols,flg; 352 PetscInt *row,*col,other_disassembled; 353 PetscScalar *val; 354 InsertMode addv = mat->insertmode; 355 356 PetscFunctionBegin; 357 if (!aij->donotstash) { 358 while (1) { 359 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 360 if (!flg) break; 361 362 for (i=0; i<n;) { 363 /* Now identify the consecutive vals belonging to the same row */ 364 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 365 if (j < n) ncols = j-i; 366 else ncols = n-i; 367 /* Now assemble all these values with a single function call */ 368 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 369 i = j; 370 } 371 } 372 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 373 } 374 375 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 376 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 377 378 /* determine if any processor has disassembled, if so we must 379 also disassemble ourselfs, in order that we may reassemble. */ 380 /* 381 if nonzero structure of submatrix B cannot change then we know that 382 no processor disassembled thus we can skip this stuff 383 */ 384 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 385 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 386 if (mat->was_assembled && !other_disassembled) { 387 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 388 /* reaccess the b because aij->B was changed */ 389 b = (Mat_SeqAIJ *)aij->B->data; 390 } 391 } 392 393 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 394 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 395 } 396 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 398 399 if (aij->rowvalues) { 400 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 401 aij->rowvalues = 0; 402 } 403 404 /* used by MatAXPY() */ 405 a->xtoy = 0; b->xtoy = 0; 406 a->XtoY = 0; b->XtoY = 0; 407 408 PetscFunctionReturn(0); 409 } 410 411 #undef __FUNCT__ 412 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 413 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 414 { 415 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 416 PetscErrorCode ierr; 417 418 PetscFunctionBegin; 419 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 420 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 421 PetscFunctionReturn(0); 422 } 423 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatZeroRows_MPIAIJ" 426 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 427 { 428 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 429 PetscErrorCode ierr; 430 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag; 431 PetscInt i,N,*rows,*owners = l->rowners; 432 PetscInt *nprocs,j,idx,nsends,row; 433 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 434 PetscInt *rvalues,count,base,slen,*source; 435 PetscInt *lens,*lrows,*values,rstart=l->rstart; 436 MPI_Comm comm = A->comm; 437 MPI_Request *send_waits,*recv_waits; 438 MPI_Status recv_status,*send_status; 439 IS istmp; 440 PetscTruth found; 441 442 PetscFunctionBegin; 443 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 444 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 445 446 /* first count number of contributors to each processor */ 447 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 448 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 449 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 450 for (i=0; i<N; i++) { 451 idx = rows[i]; 452 found = PETSC_FALSE; 453 for (j=0; j<size; j++) { 454 if (idx >= owners[j] && idx < owners[j+1]) { 455 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 456 } 457 } 458 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 459 } 460 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 461 462 /* inform other processors of number of messages and max length*/ 463 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 464 465 /* post receives: */ 466 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 467 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 468 for (i=0; i<nrecvs; i++) { 469 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 470 } 471 472 /* do sends: 473 1) starts[i] gives the starting index in svalues for stuff going to 474 the ith processor 475 */ 476 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 477 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 478 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 479 starts[0] = 0; 480 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 481 for (i=0; i<N; i++) { 482 svalues[starts[owner[i]]++] = rows[i]; 483 } 484 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 485 486 starts[0] = 0; 487 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 488 count = 0; 489 for (i=0; i<size; i++) { 490 if (nprocs[2*i+1]) { 491 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 492 } 493 } 494 ierr = PetscFree(starts);CHKERRQ(ierr); 495 496 base = owners[rank]; 497 498 /* wait on receives */ 499 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 500 source = lens + nrecvs; 501 count = nrecvs; slen = 0; 502 while (count) { 503 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 504 /* unpack receives into our local space */ 505 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 506 source[imdex] = recv_status.MPI_SOURCE; 507 lens[imdex] = n; 508 slen += n; 509 count--; 510 } 511 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 512 513 /* move the data into the send scatter */ 514 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 515 count = 0; 516 for (i=0; i<nrecvs; i++) { 517 values = rvalues + i*nmax; 518 for (j=0; j<lens[i]; j++) { 519 lrows[count++] = values[j] - base; 520 } 521 } 522 ierr = PetscFree(rvalues);CHKERRQ(ierr); 523 ierr = PetscFree(lens);CHKERRQ(ierr); 524 ierr = PetscFree(owner);CHKERRQ(ierr); 525 ierr = PetscFree(nprocs);CHKERRQ(ierr); 526 527 /* actually zap the local rows */ 528 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 529 PetscLogObjectParent(A,istmp); 530 531 /* 532 Zero the required rows. If the "diagonal block" of the matrix 533 is square and the user wishes to set the diagonal we use seperate 534 code so that MatSetValues() is not called for each diagonal allocating 535 new memory, thus calling lots of mallocs and slowing things down. 536 537 Contributed by: Mathew Knepley 538 */ 539 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 540 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 541 if (diag && (l->A->M == l->A->N)) { 542 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 543 } else if (diag) { 544 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 545 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 546 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 547 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 548 } 549 for (i = 0; i < slen; i++) { 550 row = lrows[i] + rstart; 551 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 552 } 553 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 554 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 555 } else { 556 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 557 } 558 ierr = ISDestroy(istmp);CHKERRQ(ierr); 559 ierr = PetscFree(lrows);CHKERRQ(ierr); 560 561 /* wait on sends */ 562 if (nsends) { 563 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 564 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 565 ierr = PetscFree(send_status);CHKERRQ(ierr); 566 } 567 ierr = PetscFree(send_waits);CHKERRQ(ierr); 568 ierr = PetscFree(svalues);CHKERRQ(ierr); 569 570 PetscFunctionReturn(0); 571 } 572 573 #undef __FUNCT__ 574 #define __FUNCT__ "MatMult_MPIAIJ" 575 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 576 { 577 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 578 PetscErrorCode ierr; 579 PetscInt nt; 580 581 PetscFunctionBegin; 582 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 583 if (nt != A->n) { 584 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt); 585 } 586 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 587 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 588 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 589 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 590 PetscFunctionReturn(0); 591 } 592 593 #undef __FUNCT__ 594 #define __FUNCT__ "MatMultAdd_MPIAIJ" 595 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 596 { 597 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 598 PetscErrorCode ierr; 599 600 PetscFunctionBegin; 601 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 602 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 603 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 604 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 605 PetscFunctionReturn(0); 606 } 607 608 #undef __FUNCT__ 609 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 610 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 611 { 612 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 613 PetscErrorCode ierr; 614 615 PetscFunctionBegin; 616 /* do nondiagonal part */ 617 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 618 /* send it on its way */ 619 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 620 /* do local part */ 621 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 622 /* receive remote parts: note this assumes the values are not actually */ 623 /* inserted in yy until the next line, which is true for my implementation*/ 624 /* but is not perhaps always true. */ 625 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 626 PetscFunctionReturn(0); 627 } 628 629 EXTERN_C_BEGIN 630 #undef __FUNCT__ 631 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 632 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 633 { 634 MPI_Comm comm; 635 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 636 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 637 IS Me,Notme; 638 PetscErrorCode ierr; 639 PetscInt M,N,first,last,*notme,i; 640 PetscMPIInt size; 641 642 PetscFunctionBegin; 643 644 /* Easy test: symmetric diagonal block */ 645 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 646 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 647 if (!*f) PetscFunctionReturn(0); 648 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 649 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 650 if (size == 1) PetscFunctionReturn(0); 651 652 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 653 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 654 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 655 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 656 for (i=0; i<first; i++) notme[i] = i; 657 for (i=last; i<M; i++) notme[i-last+first] = i; 658 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 659 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 660 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 661 Aoff = Aoffs[0]; 662 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 663 Boff = Boffs[0]; 664 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 665 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 666 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 667 ierr = ISDestroy(Me);CHKERRQ(ierr); 668 ierr = ISDestroy(Notme);CHKERRQ(ierr); 669 670 PetscFunctionReturn(0); 671 } 672 EXTERN_C_END 673 674 #undef __FUNCT__ 675 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 676 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 677 { 678 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 679 PetscErrorCode ierr; 680 681 PetscFunctionBegin; 682 /* do nondiagonal part */ 683 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 684 /* send it on its way */ 685 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 686 /* do local part */ 687 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 688 /* receive remote parts: note this assumes the values are not actually */ 689 /* inserted in yy until the next line, which is true for my implementation*/ 690 /* but is not perhaps always true. */ 691 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } 694 695 /* 696 This only works correctly for square matrices where the subblock A->A is the 697 diagonal block 698 */ 699 #undef __FUNCT__ 700 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 701 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 702 { 703 PetscErrorCode ierr; 704 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 705 706 PetscFunctionBegin; 707 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 708 if (a->rstart != a->cstart || a->rend != a->cend) { 709 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 710 } 711 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 712 PetscFunctionReturn(0); 713 } 714 715 #undef __FUNCT__ 716 #define __FUNCT__ "MatScale_MPIAIJ" 717 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 718 { 719 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 720 PetscErrorCode ierr; 721 722 PetscFunctionBegin; 723 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 724 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 725 PetscFunctionReturn(0); 726 } 727 728 #undef __FUNCT__ 729 #define __FUNCT__ "MatDestroy_MPIAIJ" 730 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 731 { 732 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 733 PetscErrorCode ierr; 734 735 PetscFunctionBegin; 736 #if defined(PETSC_USE_LOG) 737 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 738 #endif 739 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 740 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 741 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 742 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 743 #if defined (PETSC_USE_CTABLE) 744 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 745 #else 746 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 747 #endif 748 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 749 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 750 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 751 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 752 ierr = PetscFree(aij);CHKERRQ(ierr); 753 754 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 755 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 756 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 757 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 758 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 759 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 760 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 761 PetscFunctionReturn(0); 762 } 763 764 #undef __FUNCT__ 765 #define __FUNCT__ "MatView_MPIAIJ_Binary" 766 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 767 { 768 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 769 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 770 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 771 PetscErrorCode ierr; 772 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 773 int fd; 774 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 775 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 776 PetscScalar *column_values; 777 778 PetscFunctionBegin; 779 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 780 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 781 nz = A->nz + B->nz; 782 if (!rank) { 783 header[0] = MAT_FILE_COOKIE; 784 header[1] = mat->M; 785 header[2] = mat->N; 786 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 787 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 788 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 789 /* get largest number of rows any processor has */ 790 rlen = mat->m; 791 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 792 for (i=1; i<size; i++) { 793 rlen = PetscMax(rlen,range[i+1] - range[i]); 794 } 795 } else { 796 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 797 rlen = mat->m; 798 } 799 800 /* load up the local row counts */ 801 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 802 for (i=0; i<mat->m; i++) { 803 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 804 } 805 806 /* store the row lengths to the file */ 807 if (!rank) { 808 MPI_Status status; 809 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 810 for (i=1; i<size; i++) { 811 rlen = range[i+1] - range[i]; 812 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 813 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 814 } 815 } else { 816 ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 817 } 818 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 819 820 /* load up the local column indices */ 821 nzmax = nz; /* )th processor needs space a largest processor needs */ 822 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 823 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 824 cnt = 0; 825 for (i=0; i<mat->m; i++) { 826 for (j=B->i[i]; j<B->i[i+1]; j++) { 827 if ( (col = garray[B->j[j]]) > cstart) break; 828 column_indices[cnt++] = col; 829 } 830 for (k=A->i[i]; k<A->i[i+1]; k++) { 831 column_indices[cnt++] = A->j[k] + cstart; 832 } 833 for (; j<B->i[i+1]; j++) { 834 column_indices[cnt++] = garray[B->j[j]]; 835 } 836 } 837 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 838 839 /* store the column indices to the file */ 840 if (!rank) { 841 MPI_Status status; 842 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 843 for (i=1; i<size; i++) { 844 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 845 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 846 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 847 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 848 } 849 } else { 850 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 851 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 852 } 853 ierr = PetscFree(column_indices);CHKERRQ(ierr); 854 855 /* load up the local column values */ 856 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 857 cnt = 0; 858 for (i=0; i<mat->m; i++) { 859 for (j=B->i[i]; j<B->i[i+1]; j++) { 860 if ( garray[B->j[j]] > cstart) break; 861 column_values[cnt++] = B->a[j]; 862 } 863 for (k=A->i[i]; k<A->i[i+1]; k++) { 864 column_values[cnt++] = A->a[k]; 865 } 866 for (; j<B->i[i+1]; j++) { 867 column_values[cnt++] = B->a[j]; 868 } 869 } 870 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 871 872 /* store the column values to the file */ 873 if (!rank) { 874 MPI_Status status; 875 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 876 for (i=1; i<size; i++) { 877 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 878 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 879 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 880 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 881 } 882 } else { 883 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 884 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 885 } 886 ierr = PetscFree(column_values);CHKERRQ(ierr); 887 PetscFunctionReturn(0); 888 } 889 890 #undef __FUNCT__ 891 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 892 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 893 { 894 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 895 PetscErrorCode ierr; 896 PetscMPIInt rank = aij->rank,size = aij->size; 897 PetscTruth isdraw,iascii,flg,isbinary; 898 PetscViewer sviewer; 899 PetscViewerFormat format; 900 901 PetscFunctionBegin; 902 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 903 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 904 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 905 if (iascii) { 906 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 907 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 908 MatInfo info; 909 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 910 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 911 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 912 if (flg) { 913 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 914 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 915 } else { 916 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 917 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 918 } 919 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 920 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 921 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 922 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 923 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 924 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 925 PetscFunctionReturn(0); 926 } else if (format == PETSC_VIEWER_ASCII_INFO) { 927 PetscFunctionReturn(0); 928 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 929 PetscFunctionReturn(0); 930 } 931 } else if (isbinary) { 932 if (size == 1) { 933 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 934 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 935 } else { 936 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 937 } 938 PetscFunctionReturn(0); 939 } else if (isdraw) { 940 PetscDraw draw; 941 PetscTruth isnull; 942 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 943 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 944 } 945 946 if (size == 1) { 947 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 948 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 949 } else { 950 /* assemble the entire matrix onto first processor. */ 951 Mat A; 952 Mat_SeqAIJ *Aloc; 953 PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 954 PetscScalar *a; 955 956 if (!rank) { 957 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 958 } else { 959 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 960 } 961 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 962 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 963 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 964 PetscLogObjectParent(mat,A); 965 966 /* copy over the A part */ 967 Aloc = (Mat_SeqAIJ*)aij->A->data; 968 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 969 row = aij->rstart; 970 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 971 for (i=0; i<m; i++) { 972 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 973 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 974 } 975 aj = Aloc->j; 976 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 977 978 /* copy over the B part */ 979 Aloc = (Mat_SeqAIJ*)aij->B->data; 980 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 981 row = aij->rstart; 982 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 983 ct = cols; 984 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 985 for (i=0; i<m; i++) { 986 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 987 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 988 } 989 ierr = PetscFree(ct);CHKERRQ(ierr); 990 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 991 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 992 /* 993 Everyone has to call to draw the matrix since the graphics waits are 994 synchronized across all processors that share the PetscDraw object 995 */ 996 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 997 if (!rank) { 998 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 999 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1000 } 1001 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1002 ierr = MatDestroy(A);CHKERRQ(ierr); 1003 } 1004 PetscFunctionReturn(0); 1005 } 1006 1007 #undef __FUNCT__ 1008 #define __FUNCT__ "MatView_MPIAIJ" 1009 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1010 { 1011 PetscErrorCode ierr; 1012 PetscTruth iascii,isdraw,issocket,isbinary; 1013 1014 PetscFunctionBegin; 1015 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1016 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1017 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1018 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1019 if (iascii || isdraw || isbinary || issocket) { 1020 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1021 } else { 1022 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1023 } 1024 PetscFunctionReturn(0); 1025 } 1026 1027 1028 1029 #undef __FUNCT__ 1030 #define __FUNCT__ "MatRelax_MPIAIJ" 1031 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1032 { 1033 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1034 PetscErrorCode ierr; 1035 Vec bb1; 1036 PetscScalar mone=-1.0; 1037 1038 PetscFunctionBegin; 1039 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1040 1041 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1042 1043 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1044 if (flag & SOR_ZERO_INITIAL_GUESS) { 1045 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1046 its--; 1047 } 1048 1049 while (its--) { 1050 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1051 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1052 1053 /* update rhs: bb1 = bb - B*x */ 1054 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1055 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1056 1057 /* local sweep */ 1058 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1059 CHKERRQ(ierr); 1060 } 1061 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1062 if (flag & SOR_ZERO_INITIAL_GUESS) { 1063 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1064 its--; 1065 } 1066 while (its--) { 1067 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1068 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1069 1070 /* update rhs: bb1 = bb - B*x */ 1071 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1072 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1073 1074 /* local sweep */ 1075 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1076 CHKERRQ(ierr); 1077 } 1078 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1079 if (flag & SOR_ZERO_INITIAL_GUESS) { 1080 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1081 its--; 1082 } 1083 while (its--) { 1084 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1085 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1086 1087 /* update rhs: bb1 = bb - B*x */ 1088 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1089 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1090 1091 /* local sweep */ 1092 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1093 CHKERRQ(ierr); 1094 } 1095 } else { 1096 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1097 } 1098 1099 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } 1102 1103 #undef __FUNCT__ 1104 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1105 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1106 { 1107 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1108 Mat A = mat->A,B = mat->B; 1109 PetscErrorCode ierr; 1110 PetscReal isend[5],irecv[5]; 1111 1112 PetscFunctionBegin; 1113 info->block_size = 1.0; 1114 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1115 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1116 isend[3] = info->memory; isend[4] = info->mallocs; 1117 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1118 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1119 isend[3] += info->memory; isend[4] += info->mallocs; 1120 if (flag == MAT_LOCAL) { 1121 info->nz_used = isend[0]; 1122 info->nz_allocated = isend[1]; 1123 info->nz_unneeded = isend[2]; 1124 info->memory = isend[3]; 1125 info->mallocs = isend[4]; 1126 } else if (flag == MAT_GLOBAL_MAX) { 1127 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1128 info->nz_used = irecv[0]; 1129 info->nz_allocated = irecv[1]; 1130 info->nz_unneeded = irecv[2]; 1131 info->memory = irecv[3]; 1132 info->mallocs = irecv[4]; 1133 } else if (flag == MAT_GLOBAL_SUM) { 1134 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1135 info->nz_used = irecv[0]; 1136 info->nz_allocated = irecv[1]; 1137 info->nz_unneeded = irecv[2]; 1138 info->memory = irecv[3]; 1139 info->mallocs = irecv[4]; 1140 } 1141 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1142 info->fill_ratio_needed = 0; 1143 info->factor_mallocs = 0; 1144 info->rows_global = (double)matin->M; 1145 info->columns_global = (double)matin->N; 1146 info->rows_local = (double)matin->m; 1147 info->columns_local = (double)matin->N; 1148 1149 PetscFunctionReturn(0); 1150 } 1151 1152 #undef __FUNCT__ 1153 #define __FUNCT__ "MatSetOption_MPIAIJ" 1154 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1155 { 1156 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1157 PetscErrorCode ierr; 1158 1159 PetscFunctionBegin; 1160 switch (op) { 1161 case MAT_NO_NEW_NONZERO_LOCATIONS: 1162 case MAT_YES_NEW_NONZERO_LOCATIONS: 1163 case MAT_COLUMNS_UNSORTED: 1164 case MAT_COLUMNS_SORTED: 1165 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1166 case MAT_KEEP_ZEROED_ROWS: 1167 case MAT_NEW_NONZERO_LOCATION_ERR: 1168 case MAT_USE_INODES: 1169 case MAT_DO_NOT_USE_INODES: 1170 case MAT_IGNORE_ZERO_ENTRIES: 1171 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1172 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1173 break; 1174 case MAT_ROW_ORIENTED: 1175 a->roworiented = PETSC_TRUE; 1176 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1177 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1178 break; 1179 case MAT_ROWS_SORTED: 1180 case MAT_ROWS_UNSORTED: 1181 case MAT_YES_NEW_DIAGONALS: 1182 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1183 break; 1184 case MAT_COLUMN_ORIENTED: 1185 a->roworiented = PETSC_FALSE; 1186 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1187 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1188 break; 1189 case MAT_IGNORE_OFF_PROC_ENTRIES: 1190 a->donotstash = PETSC_TRUE; 1191 break; 1192 case MAT_NO_NEW_DIAGONALS: 1193 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1194 case MAT_SYMMETRIC: 1195 case MAT_STRUCTURALLY_SYMMETRIC: 1196 case MAT_HERMITIAN: 1197 case MAT_SYMMETRY_ETERNAL: 1198 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1199 break; 1200 case MAT_NOT_SYMMETRIC: 1201 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1202 case MAT_NOT_HERMITIAN: 1203 case MAT_NOT_SYMMETRY_ETERNAL: 1204 break; 1205 default: 1206 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1207 } 1208 PetscFunctionReturn(0); 1209 } 1210 1211 #undef __FUNCT__ 1212 #define __FUNCT__ "MatGetRow_MPIAIJ" 1213 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1214 { 1215 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1216 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1217 PetscErrorCode ierr; 1218 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1219 PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1220 PetscInt *cmap,*idx_p; 1221 1222 PetscFunctionBegin; 1223 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1224 mat->getrowactive = PETSC_TRUE; 1225 1226 if (!mat->rowvalues && (idx || v)) { 1227 /* 1228 allocate enough space to hold information from the longest row. 1229 */ 1230 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1231 PetscInt max = 1,tmp; 1232 for (i=0; i<matin->m; i++) { 1233 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1234 if (max < tmp) { max = tmp; } 1235 } 1236 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1237 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1238 } 1239 1240 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1241 lrow = row - rstart; 1242 1243 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1244 if (!v) {pvA = 0; pvB = 0;} 1245 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1246 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1247 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1248 nztot = nzA + nzB; 1249 1250 cmap = mat->garray; 1251 if (v || idx) { 1252 if (nztot) { 1253 /* Sort by increasing column numbers, assuming A and B already sorted */ 1254 PetscInt imark = -1; 1255 if (v) { 1256 *v = v_p = mat->rowvalues; 1257 for (i=0; i<nzB; i++) { 1258 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1259 else break; 1260 } 1261 imark = i; 1262 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1263 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1264 } 1265 if (idx) { 1266 *idx = idx_p = mat->rowindices; 1267 if (imark > -1) { 1268 for (i=0; i<imark; i++) { 1269 idx_p[i] = cmap[cworkB[i]]; 1270 } 1271 } else { 1272 for (i=0; i<nzB; i++) { 1273 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1274 else break; 1275 } 1276 imark = i; 1277 } 1278 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1279 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1280 } 1281 } else { 1282 if (idx) *idx = 0; 1283 if (v) *v = 0; 1284 } 1285 } 1286 *nz = nztot; 1287 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1288 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1289 PetscFunctionReturn(0); 1290 } 1291 1292 #undef __FUNCT__ 1293 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1294 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1295 { 1296 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1297 1298 PetscFunctionBegin; 1299 if (aij->getrowactive == PETSC_FALSE) { 1300 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1301 } 1302 aij->getrowactive = PETSC_FALSE; 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #undef __FUNCT__ 1307 #define __FUNCT__ "MatNorm_MPIAIJ" 1308 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1309 { 1310 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1311 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1312 PetscErrorCode ierr; 1313 PetscInt i,j,cstart = aij->cstart; 1314 PetscReal sum = 0.0; 1315 PetscScalar *v; 1316 1317 PetscFunctionBegin; 1318 if (aij->size == 1) { 1319 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1320 } else { 1321 if (type == NORM_FROBENIUS) { 1322 v = amat->a; 1323 for (i=0; i<amat->nz; i++) { 1324 #if defined(PETSC_USE_COMPLEX) 1325 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1326 #else 1327 sum += (*v)*(*v); v++; 1328 #endif 1329 } 1330 v = bmat->a; 1331 for (i=0; i<bmat->nz; i++) { 1332 #if defined(PETSC_USE_COMPLEX) 1333 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1334 #else 1335 sum += (*v)*(*v); v++; 1336 #endif 1337 } 1338 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1339 *norm = sqrt(*norm); 1340 } else if (type == NORM_1) { /* max column norm */ 1341 PetscReal *tmp,*tmp2; 1342 PetscInt *jj,*garray = aij->garray; 1343 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1344 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1345 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1346 *norm = 0.0; 1347 v = amat->a; jj = amat->j; 1348 for (j=0; j<amat->nz; j++) { 1349 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1350 } 1351 v = bmat->a; jj = bmat->j; 1352 for (j=0; j<bmat->nz; j++) { 1353 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1354 } 1355 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1356 for (j=0; j<mat->N; j++) { 1357 if (tmp2[j] > *norm) *norm = tmp2[j]; 1358 } 1359 ierr = PetscFree(tmp);CHKERRQ(ierr); 1360 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1361 } else if (type == NORM_INFINITY) { /* max row norm */ 1362 PetscReal ntemp = 0.0; 1363 for (j=0; j<aij->A->m; j++) { 1364 v = amat->a + amat->i[j]; 1365 sum = 0.0; 1366 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1367 sum += PetscAbsScalar(*v); v++; 1368 } 1369 v = bmat->a + bmat->i[j]; 1370 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1371 sum += PetscAbsScalar(*v); v++; 1372 } 1373 if (sum > ntemp) ntemp = sum; 1374 } 1375 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1376 } else { 1377 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1378 } 1379 } 1380 PetscFunctionReturn(0); 1381 } 1382 1383 #undef __FUNCT__ 1384 #define __FUNCT__ "MatTranspose_MPIAIJ" 1385 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1386 { 1387 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1388 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1389 PetscErrorCode ierr; 1390 PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1391 Mat B; 1392 PetscScalar *array; 1393 1394 PetscFunctionBegin; 1395 if (!matout && M != N) { 1396 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1397 } 1398 1399 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1400 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1401 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1402 1403 /* copy over the A part */ 1404 Aloc = (Mat_SeqAIJ*)a->A->data; 1405 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1406 row = a->rstart; 1407 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1408 for (i=0; i<m; i++) { 1409 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1410 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1411 } 1412 aj = Aloc->j; 1413 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1414 1415 /* copy over the B part */ 1416 Aloc = (Mat_SeqAIJ*)a->B->data; 1417 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1418 row = a->rstart; 1419 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1420 ct = cols; 1421 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1422 for (i=0; i<m; i++) { 1423 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1424 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1425 } 1426 ierr = PetscFree(ct);CHKERRQ(ierr); 1427 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1428 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1429 if (matout) { 1430 *matout = B; 1431 } else { 1432 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1433 } 1434 PetscFunctionReturn(0); 1435 } 1436 1437 #undef __FUNCT__ 1438 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1439 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1440 { 1441 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1442 Mat a = aij->A,b = aij->B; 1443 PetscErrorCode ierr; 1444 PetscInt s1,s2,s3; 1445 1446 PetscFunctionBegin; 1447 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1448 if (rr) { 1449 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1450 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1451 /* Overlap communication with computation. */ 1452 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1453 } 1454 if (ll) { 1455 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1456 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1457 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1458 } 1459 /* scale the diagonal block */ 1460 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1461 1462 if (rr) { 1463 /* Do a scatter end and then right scale the off-diagonal block */ 1464 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1465 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1466 } 1467 1468 PetscFunctionReturn(0); 1469 } 1470 1471 1472 #undef __FUNCT__ 1473 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1474 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1475 { 1476 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1477 PetscErrorCode ierr; 1478 1479 PetscFunctionBegin; 1480 if (!a->rank) { 1481 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1482 } 1483 PetscFunctionReturn(0); 1484 } 1485 1486 #undef __FUNCT__ 1487 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1488 PetscErrorCode MatGetBlockSize_MPIAIJ(Mat A,PetscInt *bs) 1489 { 1490 PetscFunctionBegin; 1491 *bs = 1; 1492 PetscFunctionReturn(0); 1493 } 1494 #undef __FUNCT__ 1495 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1496 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1497 { 1498 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1499 PetscErrorCode ierr; 1500 1501 PetscFunctionBegin; 1502 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1503 PetscFunctionReturn(0); 1504 } 1505 1506 #undef __FUNCT__ 1507 #define __FUNCT__ "MatEqual_MPIAIJ" 1508 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1509 { 1510 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1511 Mat a,b,c,d; 1512 PetscTruth flg; 1513 PetscErrorCode ierr; 1514 1515 PetscFunctionBegin; 1516 a = matA->A; b = matA->B; 1517 c = matB->A; d = matB->B; 1518 1519 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1520 if (flg == PETSC_TRUE) { 1521 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1522 } 1523 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1524 PetscFunctionReturn(0); 1525 } 1526 1527 #undef __FUNCT__ 1528 #define __FUNCT__ "MatCopy_MPIAIJ" 1529 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1530 { 1531 PetscErrorCode ierr; 1532 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1533 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1534 1535 PetscFunctionBegin; 1536 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1537 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1538 /* because of the column compression in the off-processor part of the matrix a->B, 1539 the number of columns in a->B and b->B may be different, hence we cannot call 1540 the MatCopy() directly on the two parts. If need be, we can provide a more 1541 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1542 then copying the submatrices */ 1543 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1544 } else { 1545 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1546 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1547 } 1548 PetscFunctionReturn(0); 1549 } 1550 1551 #undef __FUNCT__ 1552 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1553 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1554 { 1555 PetscErrorCode ierr; 1556 1557 PetscFunctionBegin; 1558 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1559 PetscFunctionReturn(0); 1560 } 1561 1562 #include "petscblaslapack.h" 1563 #undef __FUNCT__ 1564 #define __FUNCT__ "MatAXPY_MPIAIJ" 1565 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1566 { 1567 PetscErrorCode ierr; 1568 PetscInt i; 1569 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1570 PetscBLASInt bnz,one=1; 1571 Mat_SeqAIJ *x,*y; 1572 1573 PetscFunctionBegin; 1574 if (str == SAME_NONZERO_PATTERN) { 1575 x = (Mat_SeqAIJ *)xx->A->data; 1576 y = (Mat_SeqAIJ *)yy->A->data; 1577 bnz = (PetscBLASInt)x->nz; 1578 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1579 x = (Mat_SeqAIJ *)xx->B->data; 1580 y = (Mat_SeqAIJ *)yy->B->data; 1581 bnz = (PetscBLASInt)x->nz; 1582 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1583 } else if (str == SUBSET_NONZERO_PATTERN) { 1584 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1585 1586 x = (Mat_SeqAIJ *)xx->B->data; 1587 y = (Mat_SeqAIJ *)yy->B->data; 1588 if (y->xtoy && y->XtoY != xx->B) { 1589 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1590 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1591 } 1592 if (!y->xtoy) { /* get xtoy */ 1593 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1594 y->XtoY = xx->B; 1595 } 1596 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1597 } else { 1598 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1599 } 1600 PetscFunctionReturn(0); 1601 } 1602 1603 /* -------------------------------------------------------------------*/ 1604 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1605 MatGetRow_MPIAIJ, 1606 MatRestoreRow_MPIAIJ, 1607 MatMult_MPIAIJ, 1608 /* 4*/ MatMultAdd_MPIAIJ, 1609 MatMultTranspose_MPIAIJ, 1610 MatMultTransposeAdd_MPIAIJ, 1611 0, 1612 0, 1613 0, 1614 /*10*/ 0, 1615 0, 1616 0, 1617 MatRelax_MPIAIJ, 1618 MatTranspose_MPIAIJ, 1619 /*15*/ MatGetInfo_MPIAIJ, 1620 MatEqual_MPIAIJ, 1621 MatGetDiagonal_MPIAIJ, 1622 MatDiagonalScale_MPIAIJ, 1623 MatNorm_MPIAIJ, 1624 /*20*/ MatAssemblyBegin_MPIAIJ, 1625 MatAssemblyEnd_MPIAIJ, 1626 0, 1627 MatSetOption_MPIAIJ, 1628 MatZeroEntries_MPIAIJ, 1629 /*25*/ MatZeroRows_MPIAIJ, 1630 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_64BIT_INT) 1631 MatLUFactorSymbolic_MPIAIJ_TFS, 1632 #else 1633 0, 1634 #endif 1635 0, 1636 0, 1637 0, 1638 /*30*/ MatSetUpPreallocation_MPIAIJ, 1639 0, 1640 0, 1641 0, 1642 0, 1643 /*35*/ MatDuplicate_MPIAIJ, 1644 0, 1645 0, 1646 0, 1647 0, 1648 /*40*/ MatAXPY_MPIAIJ, 1649 MatGetSubMatrices_MPIAIJ, 1650 MatIncreaseOverlap_MPIAIJ, 1651 MatGetValues_MPIAIJ, 1652 MatCopy_MPIAIJ, 1653 /*45*/ MatPrintHelp_MPIAIJ, 1654 MatScale_MPIAIJ, 1655 0, 1656 0, 1657 0, 1658 /*50*/ MatGetBlockSize_MPIAIJ, 1659 0, 1660 0, 1661 0, 1662 0, 1663 /*55*/ MatFDColoringCreate_MPIAIJ, 1664 0, 1665 MatSetUnfactored_MPIAIJ, 1666 0, 1667 0, 1668 /*60*/ MatGetSubMatrix_MPIAIJ, 1669 MatDestroy_MPIAIJ, 1670 MatView_MPIAIJ, 1671 MatGetPetscMaps_Petsc, 1672 0, 1673 /*65*/ 0, 1674 0, 1675 0, 1676 0, 1677 0, 1678 /*70*/ 0, 1679 0, 1680 MatSetColoring_MPIAIJ, 1681 MatSetValuesAdic_MPIAIJ, 1682 MatSetValuesAdifor_MPIAIJ, 1683 /*75*/ 0, 1684 0, 1685 0, 1686 0, 1687 0, 1688 /*80*/ 0, 1689 0, 1690 0, 1691 0, 1692 /*84*/ MatLoad_MPIAIJ, 1693 0, 1694 0, 1695 0, 1696 0, 1697 0, 1698 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1699 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1700 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1701 MatPtAP_MPIAIJ_MPIAIJ, 1702 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1703 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 1704 0, 1705 0, 1706 0}; 1707 1708 /* ----------------------------------------------------------------------------------------*/ 1709 1710 EXTERN_C_BEGIN 1711 #undef __FUNCT__ 1712 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1713 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 1714 { 1715 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1716 PetscErrorCode ierr; 1717 1718 PetscFunctionBegin; 1719 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1720 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1721 PetscFunctionReturn(0); 1722 } 1723 EXTERN_C_END 1724 1725 EXTERN_C_BEGIN 1726 #undef __FUNCT__ 1727 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1728 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 1729 { 1730 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1731 PetscErrorCode ierr; 1732 1733 PetscFunctionBegin; 1734 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1735 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1736 PetscFunctionReturn(0); 1737 } 1738 EXTERN_C_END 1739 1740 #include "petscpc.h" 1741 EXTERN_C_BEGIN 1742 #undef __FUNCT__ 1743 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1744 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1745 { 1746 Mat_MPIAIJ *b; 1747 PetscErrorCode ierr; 1748 PetscInt i; 1749 1750 PetscFunctionBegin; 1751 B->preallocated = PETSC_TRUE; 1752 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1753 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1754 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1755 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1756 if (d_nnz) { 1757 for (i=0; i<B->m; i++) { 1758 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1759 } 1760 } 1761 if (o_nnz) { 1762 for (i=0; i<B->m; i++) { 1763 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1764 } 1765 } 1766 b = (Mat_MPIAIJ*)B->data; 1767 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1768 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1769 1770 PetscFunctionReturn(0); 1771 } 1772 EXTERN_C_END 1773 1774 /*MC 1775 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1776 1777 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1778 and MATMPIAIJ otherwise. As a result, for single process communicators, 1779 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 1780 for communicators controlling multiple processes. It is recommended that you call both of 1781 the above preallocation routines for simplicity. 1782 1783 Options Database Keys: 1784 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 1785 1786 Level: beginner 1787 1788 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ 1789 M*/ 1790 1791 EXTERN_C_BEGIN 1792 #undef __FUNCT__ 1793 #define __FUNCT__ "MatCreate_AIJ" 1794 PetscErrorCode MatCreate_AIJ(Mat A) 1795 { 1796 PetscErrorCode ierr; 1797 PetscMPIInt size; 1798 1799 PetscFunctionBegin; 1800 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr); 1801 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1802 if (size == 1) { 1803 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1804 } else { 1805 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1806 } 1807 PetscFunctionReturn(0); 1808 } 1809 EXTERN_C_END 1810 1811 #undef __FUNCT__ 1812 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1813 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1814 { 1815 Mat mat; 1816 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1817 PetscErrorCode ierr; 1818 1819 PetscFunctionBegin; 1820 *newmat = 0; 1821 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1822 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1823 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1824 a = (Mat_MPIAIJ*)mat->data; 1825 1826 mat->factor = matin->factor; 1827 mat->assembled = PETSC_TRUE; 1828 mat->insertmode = NOT_SET_VALUES; 1829 mat->preallocated = PETSC_TRUE; 1830 1831 a->rstart = oldmat->rstart; 1832 a->rend = oldmat->rend; 1833 a->cstart = oldmat->cstart; 1834 a->cend = oldmat->cend; 1835 a->size = oldmat->size; 1836 a->rank = oldmat->rank; 1837 a->donotstash = oldmat->donotstash; 1838 a->roworiented = oldmat->roworiented; 1839 a->rowindices = 0; 1840 a->rowvalues = 0; 1841 a->getrowactive = PETSC_FALSE; 1842 1843 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1844 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1845 if (oldmat->colmap) { 1846 #if defined (PETSC_USE_CTABLE) 1847 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1848 #else 1849 ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1850 PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt)); 1851 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1852 #endif 1853 } else a->colmap = 0; 1854 if (oldmat->garray) { 1855 PetscInt len; 1856 len = oldmat->B->n; 1857 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1858 PetscLogObjectMemory(mat,len*sizeof(PetscInt)); 1859 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1860 } else a->garray = 0; 1861 1862 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1863 PetscLogObjectParent(mat,a->lvec); 1864 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1865 PetscLogObjectParent(mat,a->Mvctx); 1866 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1867 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1868 PetscLogObjectParent(mat,a->A); 1869 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1870 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1871 PetscLogObjectParent(mat,a->B); 1872 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1873 *newmat = mat; 1874 PetscFunctionReturn(0); 1875 } 1876 1877 #include "petscsys.h" 1878 1879 #undef __FUNCT__ 1880 #define __FUNCT__ "MatLoad_MPIAIJ" 1881 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1882 { 1883 Mat A; 1884 PetscScalar *vals,*svals; 1885 MPI_Comm comm = ((PetscObject)viewer)->comm; 1886 MPI_Status status; 1887 PetscErrorCode ierr; 1888 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*rowners,maxnz,mm; 1889 PetscInt i,nz,j,rstart,rend; 1890 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1891 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1892 PetscInt cend,cstart,n; 1893 int fd; 1894 1895 PetscFunctionBegin; 1896 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1897 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1898 if (!rank) { 1899 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1900 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1901 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1902 if (header[3] < 0) { 1903 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1904 } 1905 } 1906 1907 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1908 M = header[1]; N = header[2]; 1909 /* determine ownership of all rows */ 1910 m = M/size + ((M % size) > rank); 1911 mm = (PetscMPIInt) m; 1912 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1913 ierr = MPI_Allgather(&mm,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1914 rowners[0] = 0; 1915 for (i=2; i<=size; i++) { 1916 rowners[i] += rowners[i-1]; 1917 } 1918 rstart = rowners[rank]; 1919 rend = rowners[rank+1]; 1920 1921 /* distribute row lengths to all processors */ 1922 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr); 1923 offlens = ourlens + (rend-rstart); 1924 if (!rank) { 1925 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1926 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1927 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1928 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1929 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1930 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1931 } else { 1932 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1933 } 1934 1935 if (!rank) { 1936 /* calculate the number of nonzeros on each processor */ 1937 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1938 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1939 for (i=0; i<size; i++) { 1940 for (j=rowners[i]; j< rowners[i+1]; j++) { 1941 procsnz[i] += rowlengths[j]; 1942 } 1943 } 1944 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1945 1946 /* determine max buffer needed and allocate it */ 1947 maxnz = 0; 1948 for (i=0; i<size; i++) { 1949 maxnz = PetscMax(maxnz,procsnz[i]); 1950 } 1951 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1952 1953 /* read in my part of the matrix column indices */ 1954 nz = procsnz[0]; 1955 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1956 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1957 1958 /* read in every one elses and ship off */ 1959 for (i=1; i<size; i++) { 1960 nz = procsnz[i]; 1961 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1962 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1963 } 1964 ierr = PetscFree(cols);CHKERRQ(ierr); 1965 } else { 1966 /* determine buffer space needed for message */ 1967 nz = 0; 1968 for (i=0; i<m; i++) { 1969 nz += ourlens[i]; 1970 } 1971 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1972 1973 /* receive message of column indices*/ 1974 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1975 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1976 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1977 } 1978 1979 /* determine column ownership if matrix is not square */ 1980 if (N != M) { 1981 n = N/size + ((N % size) > rank); 1982 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1983 cstart = cend - n; 1984 } else { 1985 cstart = rstart; 1986 cend = rend; 1987 n = cend - cstart; 1988 } 1989 1990 /* loop over local rows, determining number of off diagonal entries */ 1991 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1992 jj = 0; 1993 for (i=0; i<m; i++) { 1994 for (j=0; j<ourlens[i]; j++) { 1995 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1996 jj++; 1997 } 1998 } 1999 2000 /* create our matrix */ 2001 for (i=0; i<m; i++) { 2002 ourlens[i] -= offlens[i]; 2003 } 2004 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2005 ierr = MatSetType(A,type);CHKERRQ(ierr); 2006 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2007 2008 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2009 for (i=0; i<m; i++) { 2010 ourlens[i] += offlens[i]; 2011 } 2012 2013 if (!rank) { 2014 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2015 2016 /* read in my part of the matrix numerical values */ 2017 nz = procsnz[0]; 2018 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2019 2020 /* insert into matrix */ 2021 jj = rstart; 2022 smycols = mycols; 2023 svals = vals; 2024 for (i=0; i<m; i++) { 2025 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2026 smycols += ourlens[i]; 2027 svals += ourlens[i]; 2028 jj++; 2029 } 2030 2031 /* read in other processors and ship out */ 2032 for (i=1; i<size; i++) { 2033 nz = procsnz[i]; 2034 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2035 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2036 } 2037 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2038 } else { 2039 /* receive numeric values */ 2040 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2041 2042 /* receive message of values*/ 2043 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2044 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2045 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2046 2047 /* insert into matrix */ 2048 jj = rstart; 2049 smycols = mycols; 2050 svals = vals; 2051 for (i=0; i<m; i++) { 2052 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2053 smycols += ourlens[i]; 2054 svals += ourlens[i]; 2055 jj++; 2056 } 2057 } 2058 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2059 ierr = PetscFree(vals);CHKERRQ(ierr); 2060 ierr = PetscFree(mycols);CHKERRQ(ierr); 2061 ierr = PetscFree(rowners);CHKERRQ(ierr); 2062 2063 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2064 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2065 *newmat = A; 2066 PetscFunctionReturn(0); 2067 } 2068 2069 #undef __FUNCT__ 2070 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2071 /* 2072 Not great since it makes two copies of the submatrix, first an SeqAIJ 2073 in local and then by concatenating the local matrices the end result. 2074 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2075 */ 2076 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2077 { 2078 PetscErrorCode ierr; 2079 PetscMPIInt rank,size; 2080 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2081 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2082 Mat *local,M,Mreuse; 2083 PetscScalar *vwork,*aa; 2084 MPI_Comm comm = mat->comm; 2085 Mat_SeqAIJ *aij; 2086 2087 2088 PetscFunctionBegin; 2089 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2090 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2091 2092 if (call == MAT_REUSE_MATRIX) { 2093 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2094 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2095 local = &Mreuse; 2096 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2097 } else { 2098 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2099 Mreuse = *local; 2100 ierr = PetscFree(local);CHKERRQ(ierr); 2101 } 2102 2103 /* 2104 m - number of local rows 2105 n - number of columns (same on all processors) 2106 rstart - first row in new global matrix generated 2107 */ 2108 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2109 if (call == MAT_INITIAL_MATRIX) { 2110 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2111 ii = aij->i; 2112 jj = aij->j; 2113 2114 /* 2115 Determine the number of non-zeros in the diagonal and off-diagonal 2116 portions of the matrix in order to do correct preallocation 2117 */ 2118 2119 /* first get start and end of "diagonal" columns */ 2120 if (csize == PETSC_DECIDE) { 2121 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2122 if (mglobal == n) { /* square matrix */ 2123 nlocal = m; 2124 } else { 2125 nlocal = n/size + ((n % size) > rank); 2126 } 2127 } else { 2128 nlocal = csize; 2129 } 2130 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2131 rstart = rend - nlocal; 2132 if (rank == size - 1 && rend != n) { 2133 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2134 } 2135 2136 /* next, compute all the lengths */ 2137 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2138 olens = dlens + m; 2139 for (i=0; i<m; i++) { 2140 jend = ii[i+1] - ii[i]; 2141 olen = 0; 2142 dlen = 0; 2143 for (j=0; j<jend; j++) { 2144 if (*jj < rstart || *jj >= rend) olen++; 2145 else dlen++; 2146 jj++; 2147 } 2148 olens[i] = olen; 2149 dlens[i] = dlen; 2150 } 2151 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2152 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2153 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2154 ierr = PetscFree(dlens);CHKERRQ(ierr); 2155 } else { 2156 PetscInt ml,nl; 2157 2158 M = *newmat; 2159 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2160 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2161 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2162 /* 2163 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2164 rather than the slower MatSetValues(). 2165 */ 2166 M->was_assembled = PETSC_TRUE; 2167 M->assembled = PETSC_FALSE; 2168 } 2169 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2170 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2171 ii = aij->i; 2172 jj = aij->j; 2173 aa = aij->a; 2174 for (i=0; i<m; i++) { 2175 row = rstart + i; 2176 nz = ii[i+1] - ii[i]; 2177 cwork = jj; jj += nz; 2178 vwork = aa; aa += nz; 2179 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2180 } 2181 2182 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2183 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2184 *newmat = M; 2185 2186 /* save submatrix used in processor for next request */ 2187 if (call == MAT_INITIAL_MATRIX) { 2188 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2189 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2190 } 2191 2192 PetscFunctionReturn(0); 2193 } 2194 2195 #undef __FUNCT__ 2196 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2197 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2198 { 2199 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2200 PetscInt m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2201 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2202 const PetscInt *JJ; 2203 PetscScalar *values; 2204 PetscErrorCode ierr; 2205 2206 PetscFunctionBegin; 2207 #if defined(PETSC_OPT_g) 2208 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2209 #endif 2210 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2211 o_nnz = d_nnz + m; 2212 2213 for (i=0; i<m; i++) { 2214 nnz = I[i+1]- I[i]; 2215 JJ = J + I[i]; 2216 nnz_max = PetscMax(nnz_max,nnz); 2217 #if defined(PETSC_OPT_g) 2218 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2219 #endif 2220 for (j=0; j<nnz; j++) { 2221 if (*JJ >= cstart) break; 2222 JJ++; 2223 } 2224 d = 0; 2225 for (; j<nnz; j++) { 2226 if (*JJ++ >= cend) break; 2227 d++; 2228 } 2229 d_nnz[i] = d; 2230 o_nnz[i] = nnz - d; 2231 } 2232 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2233 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2234 2235 if (v) values = (PetscScalar*)v; 2236 else { 2237 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2238 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2239 } 2240 2241 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2242 for (i=0; i<m; i++) { 2243 ii = i + rstart; 2244 nnz = I[i+1]- I[i]; 2245 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr); 2246 } 2247 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2248 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2249 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2250 2251 if (!v) { 2252 ierr = PetscFree(values);CHKERRQ(ierr); 2253 } 2254 PetscFunctionReturn(0); 2255 } 2256 2257 #undef __FUNCT__ 2258 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2259 /*@C 2260 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2261 (the default parallel PETSc format). 2262 2263 Collective on MPI_Comm 2264 2265 Input Parameters: 2266 + A - the matrix 2267 . i - the indices into j for the start of each local row (starts with zero) 2268 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2269 - v - optional values in the matrix 2270 2271 Level: developer 2272 2273 .keywords: matrix, aij, compressed row, sparse, parallel 2274 2275 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2276 @*/ 2277 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2278 { 2279 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2280 2281 PetscFunctionBegin; 2282 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2283 if (f) { 2284 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2285 } 2286 PetscFunctionReturn(0); 2287 } 2288 2289 #undef __FUNCT__ 2290 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2291 /*@C 2292 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2293 (the default parallel PETSc format). For good matrix assembly performance 2294 the user should preallocate the matrix storage by setting the parameters 2295 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2296 performance can be increased by more than a factor of 50. 2297 2298 Collective on MPI_Comm 2299 2300 Input Parameters: 2301 + A - the matrix 2302 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2303 (same value is used for all local rows) 2304 . d_nnz - array containing the number of nonzeros in the various rows of the 2305 DIAGONAL portion of the local submatrix (possibly different for each row) 2306 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2307 The size of this array is equal to the number of local rows, i.e 'm'. 2308 You must leave room for the diagonal entry even if it is zero. 2309 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2310 submatrix (same value is used for all local rows). 2311 - o_nnz - array containing the number of nonzeros in the various rows of the 2312 OFF-DIAGONAL portion of the local submatrix (possibly different for 2313 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2314 structure. The size of this array is equal to the number 2315 of local rows, i.e 'm'. 2316 2317 The AIJ format (also called the Yale sparse matrix format or 2318 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2319 storage. The stored row and column indices begin with zero. See the users manual for details. 2320 2321 The parallel matrix is partitioned such that the first m0 rows belong to 2322 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2323 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2324 2325 The DIAGONAL portion of the local submatrix of a processor can be defined 2326 as the submatrix which is obtained by extraction the part corresponding 2327 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2328 first row that belongs to the processor, and r2 is the last row belonging 2329 to the this processor. This is a square mxm matrix. The remaining portion 2330 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2331 2332 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2333 2334 Example usage: 2335 2336 Consider the following 8x8 matrix with 34 non-zero values, that is 2337 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2338 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2339 as follows: 2340 2341 .vb 2342 1 2 0 | 0 3 0 | 0 4 2343 Proc0 0 5 6 | 7 0 0 | 8 0 2344 9 0 10 | 11 0 0 | 12 0 2345 ------------------------------------- 2346 13 0 14 | 15 16 17 | 0 0 2347 Proc1 0 18 0 | 19 20 21 | 0 0 2348 0 0 0 | 22 23 0 | 24 0 2349 ------------------------------------- 2350 Proc2 25 26 27 | 0 0 28 | 29 0 2351 30 0 0 | 31 32 33 | 0 34 2352 .ve 2353 2354 This can be represented as a collection of submatrices as: 2355 2356 .vb 2357 A B C 2358 D E F 2359 G H I 2360 .ve 2361 2362 Where the submatrices A,B,C are owned by proc0, D,E,F are 2363 owned by proc1, G,H,I are owned by proc2. 2364 2365 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2366 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2367 The 'M','N' parameters are 8,8, and have the same values on all procs. 2368 2369 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2370 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2371 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2372 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2373 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2374 matrix, ans [DF] as another SeqAIJ matrix. 2375 2376 When d_nz, o_nz parameters are specified, d_nz storage elements are 2377 allocated for every row of the local diagonal submatrix, and o_nz 2378 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2379 One way to choose d_nz and o_nz is to use the max nonzerors per local 2380 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2381 In this case, the values of d_nz,o_nz are: 2382 .vb 2383 proc0 : dnz = 2, o_nz = 2 2384 proc1 : dnz = 3, o_nz = 2 2385 proc2 : dnz = 1, o_nz = 4 2386 .ve 2387 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2388 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2389 for proc3. i.e we are using 12+15+10=37 storage locations to store 2390 34 values. 2391 2392 When d_nnz, o_nnz parameters are specified, the storage is specified 2393 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2394 In the above case the values for d_nnz,o_nnz are: 2395 .vb 2396 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2397 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2398 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2399 .ve 2400 Here the space allocated is sum of all the above values i.e 34, and 2401 hence pre-allocation is perfect. 2402 2403 Level: intermediate 2404 2405 .keywords: matrix, aij, compressed row, sparse, parallel 2406 2407 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2408 MPIAIJ 2409 @*/ 2410 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2411 { 2412 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2413 2414 PetscFunctionBegin; 2415 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2416 if (f) { 2417 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2418 } 2419 PetscFunctionReturn(0); 2420 } 2421 2422 #undef __FUNCT__ 2423 #define __FUNCT__ "MatCreateMPIAIJ" 2424 /*@C 2425 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2426 (the default parallel PETSc format). For good matrix assembly performance 2427 the user should preallocate the matrix storage by setting the parameters 2428 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2429 performance can be increased by more than a factor of 50. 2430 2431 Collective on MPI_Comm 2432 2433 Input Parameters: 2434 + comm - MPI communicator 2435 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2436 This value should be the same as the local size used in creating the 2437 y vector for the matrix-vector product y = Ax. 2438 . n - This value should be the same as the local size used in creating the 2439 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2440 calculated if N is given) For square matrices n is almost always m. 2441 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2442 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2443 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2444 (same value is used for all local rows) 2445 . d_nnz - array containing the number of nonzeros in the various rows of the 2446 DIAGONAL portion of the local submatrix (possibly different for each row) 2447 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2448 The size of this array is equal to the number of local rows, i.e 'm'. 2449 You must leave room for the diagonal entry even if it is zero. 2450 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2451 submatrix (same value is used for all local rows). 2452 - o_nnz - array containing the number of nonzeros in the various rows of the 2453 OFF-DIAGONAL portion of the local submatrix (possibly different for 2454 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2455 structure. The size of this array is equal to the number 2456 of local rows, i.e 'm'. 2457 2458 Output Parameter: 2459 . A - the matrix 2460 2461 Notes: 2462 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2463 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2464 storage requirements for this matrix. 2465 2466 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2467 processor than it must be used on all processors that share the object for 2468 that argument. 2469 2470 The user MUST specify either the local or global matrix dimensions 2471 (possibly both). 2472 2473 The parallel matrix is partitioned such that the first m0 rows belong to 2474 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2475 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2476 2477 The DIAGONAL portion of the local submatrix of a processor can be defined 2478 as the submatrix which is obtained by extraction the part corresponding 2479 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2480 first row that belongs to the processor, and r2 is the last row belonging 2481 to the this processor. This is a square mxm matrix. The remaining portion 2482 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2483 2484 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2485 2486 When calling this routine with a single process communicator, a matrix of 2487 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2488 type of communicator, use the construction mechanism: 2489 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2490 2491 By default, this format uses inodes (identical nodes) when possible. 2492 We search for consecutive rows with the same nonzero structure, thereby 2493 reusing matrix information to achieve increased efficiency. 2494 2495 Options Database Keys: 2496 + -mat_aij_no_inode - Do not use inodes 2497 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2498 - -mat_aij_oneindex - Internally use indexing starting at 1 2499 rather than 0. Note that when calling MatSetValues(), 2500 the user still MUST index entries starting at 0! 2501 2502 2503 Example usage: 2504 2505 Consider the following 8x8 matrix with 34 non-zero values, that is 2506 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2507 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2508 as follows: 2509 2510 .vb 2511 1 2 0 | 0 3 0 | 0 4 2512 Proc0 0 5 6 | 7 0 0 | 8 0 2513 9 0 10 | 11 0 0 | 12 0 2514 ------------------------------------- 2515 13 0 14 | 15 16 17 | 0 0 2516 Proc1 0 18 0 | 19 20 21 | 0 0 2517 0 0 0 | 22 23 0 | 24 0 2518 ------------------------------------- 2519 Proc2 25 26 27 | 0 0 28 | 29 0 2520 30 0 0 | 31 32 33 | 0 34 2521 .ve 2522 2523 This can be represented as a collection of submatrices as: 2524 2525 .vb 2526 A B C 2527 D E F 2528 G H I 2529 .ve 2530 2531 Where the submatrices A,B,C are owned by proc0, D,E,F are 2532 owned by proc1, G,H,I are owned by proc2. 2533 2534 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2535 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2536 The 'M','N' parameters are 8,8, and have the same values on all procs. 2537 2538 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2539 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2540 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2541 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2542 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2543 matrix, ans [DF] as another SeqAIJ matrix. 2544 2545 When d_nz, o_nz parameters are specified, d_nz storage elements are 2546 allocated for every row of the local diagonal submatrix, and o_nz 2547 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2548 One way to choose d_nz and o_nz is to use the max nonzerors per local 2549 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2550 In this case, the values of d_nz,o_nz are: 2551 .vb 2552 proc0 : dnz = 2, o_nz = 2 2553 proc1 : dnz = 3, o_nz = 2 2554 proc2 : dnz = 1, o_nz = 4 2555 .ve 2556 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2557 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2558 for proc3. i.e we are using 12+15+10=37 storage locations to store 2559 34 values. 2560 2561 When d_nnz, o_nnz parameters are specified, the storage is specified 2562 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2563 In the above case the values for d_nnz,o_nnz are: 2564 .vb 2565 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2566 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2567 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2568 .ve 2569 Here the space allocated is sum of all the above values i.e 34, and 2570 hence pre-allocation is perfect. 2571 2572 Level: intermediate 2573 2574 .keywords: matrix, aij, compressed row, sparse, parallel 2575 2576 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2577 MPIAIJ 2578 @*/ 2579 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2580 { 2581 PetscErrorCode ierr; 2582 PetscMPIInt size; 2583 2584 PetscFunctionBegin; 2585 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2586 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2587 if (size > 1) { 2588 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2589 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2590 } else { 2591 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2592 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2593 } 2594 PetscFunctionReturn(0); 2595 } 2596 2597 #undef __FUNCT__ 2598 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2599 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2600 { 2601 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2602 2603 PetscFunctionBegin; 2604 *Ad = a->A; 2605 *Ao = a->B; 2606 *colmap = a->garray; 2607 PetscFunctionReturn(0); 2608 } 2609 2610 #undef __FUNCT__ 2611 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2612 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2613 { 2614 PetscErrorCode ierr; 2615 PetscInt i; 2616 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2617 2618 PetscFunctionBegin; 2619 if (coloring->ctype == IS_COLORING_LOCAL) { 2620 ISColoringValue *allcolors,*colors; 2621 ISColoring ocoloring; 2622 2623 /* set coloring for diagonal portion */ 2624 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2625 2626 /* set coloring for off-diagonal portion */ 2627 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2628 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2629 for (i=0; i<a->B->n; i++) { 2630 colors[i] = allcolors[a->garray[i]]; 2631 } 2632 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2633 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2634 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2635 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2636 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2637 ISColoringValue *colors; 2638 PetscInt *larray; 2639 ISColoring ocoloring; 2640 2641 /* set coloring for diagonal portion */ 2642 ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2643 for (i=0; i<a->A->n; i++) { 2644 larray[i] = i + a->cstart; 2645 } 2646 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2647 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2648 for (i=0; i<a->A->n; i++) { 2649 colors[i] = coloring->colors[larray[i]]; 2650 } 2651 ierr = PetscFree(larray);CHKERRQ(ierr); 2652 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2653 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2654 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2655 2656 /* set coloring for off-diagonal portion */ 2657 ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2658 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2659 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2660 for (i=0; i<a->B->n; i++) { 2661 colors[i] = coloring->colors[larray[i]]; 2662 } 2663 ierr = PetscFree(larray);CHKERRQ(ierr); 2664 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2665 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2666 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2667 } else { 2668 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2669 } 2670 2671 PetscFunctionReturn(0); 2672 } 2673 2674 #undef __FUNCT__ 2675 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2676 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2677 { 2678 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2679 PetscErrorCode ierr; 2680 2681 PetscFunctionBegin; 2682 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2683 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2684 PetscFunctionReturn(0); 2685 } 2686 2687 #undef __FUNCT__ 2688 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2689 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2690 { 2691 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2692 PetscErrorCode ierr; 2693 2694 PetscFunctionBegin; 2695 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2696 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2697 PetscFunctionReturn(0); 2698 } 2699 2700 #undef __FUNCT__ 2701 #define __FUNCT__ "MatMerge" 2702 /*@C 2703 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2704 matrices from each processor 2705 2706 Collective on MPI_Comm 2707 2708 Input Parameters: 2709 + comm - the communicators the parallel matrix will live on 2710 . inmat - the input sequential matrices 2711 . n - number of local columns (or PETSC_DECIDE) 2712 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2713 2714 Output Parameter: 2715 . outmat - the parallel matrix generated 2716 2717 Level: advanced 2718 2719 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2720 2721 @*/ 2722 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2723 { 2724 PetscErrorCode ierr; 2725 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2726 const PetscInt *indx; 2727 const PetscScalar *values; 2728 PetscMap columnmap,rowmap; 2729 2730 PetscFunctionBegin; 2731 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2732 /* 2733 PetscMPIInt rank; 2734 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2735 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N); 2736 */ 2737 if (scall == MAT_INITIAL_MATRIX){ 2738 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2739 if (n == PETSC_DECIDE){ 2740 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2741 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2742 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2743 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2744 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2745 } 2746 2747 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2748 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2749 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2750 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2751 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2752 2753 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2754 for (i=0;i<m;i++) { 2755 ierr = MatGetRow(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2756 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2757 ierr = MatRestoreRow(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2758 } 2759 /* This routine will ONLY return MPIAIJ type matrix */ 2760 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2761 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2762 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2763 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2764 2765 } else if (scall == MAT_REUSE_MATRIX){ 2766 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2767 } else { 2768 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2769 } 2770 2771 for (i=0;i<m;i++) { 2772 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2773 I = i + rstart; 2774 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2775 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2776 } 2777 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2778 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2779 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2780 2781 PetscFunctionReturn(0); 2782 } 2783 2784 #undef __FUNCT__ 2785 #define __FUNCT__ "MatFileSplit" 2786 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2787 { 2788 PetscErrorCode ierr; 2789 PetscMPIInt rank; 2790 PetscInt m,N,i,rstart,nnz; 2791 size_t len; 2792 const PetscInt *indx; 2793 PetscViewer out; 2794 char *name; 2795 Mat B; 2796 const PetscScalar *values; 2797 2798 PetscFunctionBegin; 2799 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2800 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2801 /* Should this be the type of the diagonal block of A? */ 2802 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2803 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2804 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2805 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2806 for (i=0;i<m;i++) { 2807 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2808 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2809 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2810 } 2811 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2812 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2813 2814 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2815 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2816 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2817 sprintf(name,"%s.%d",outfile,rank); 2818 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2819 ierr = PetscFree(name); 2820 ierr = MatView(B,out);CHKERRQ(ierr); 2821 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2822 ierr = MatDestroy(B);CHKERRQ(ierr); 2823 PetscFunctionReturn(0); 2824 } 2825 2826 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2827 #undef __FUNCT__ 2828 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2829 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2830 { 2831 PetscErrorCode ierr; 2832 Mat_Merge_SeqsToMPI *merge; 2833 PetscObjectContainer container; 2834 2835 PetscFunctionBegin; 2836 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2837 if (container) { 2838 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2839 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2840 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2841 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2842 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2843 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2844 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2845 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2846 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2847 if (merge->ci){ierr = PetscFree(merge->ci);CHKERRQ(ierr);} 2848 if (merge->cj){ierr = PetscFree(merge->cj);CHKERRQ(ierr);} 2849 if (merge->C_seq){ierr = MatDestroy(merge->C_seq);CHKERRQ(ierr);} 2850 2851 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2852 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2853 } 2854 ierr = PetscFree(merge);CHKERRQ(ierr); 2855 2856 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2857 PetscFunctionReturn(0); 2858 } 2859 2860 #include "src/mat/utils/freespace.h" 2861 #include "petscbt.h" 2862 #undef __FUNCT__ 2863 #define __FUNCT__ "MatMerge_SeqsToMPI" 2864 /*@C 2865 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2866 matrices from each processor 2867 2868 Collective on MPI_Comm 2869 2870 Input Parameters: 2871 + comm - the communicators the parallel matrix will live on 2872 . seqmat - the input sequential matrices 2873 . m - number of local rows (or PETSC_DECIDE) 2874 . n - number of local columns (or PETSC_DECIDE) 2875 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2876 2877 Output Parameter: 2878 . mpimat - the parallel matrix generated 2879 2880 Level: advanced 2881 2882 Notes: 2883 The dimensions of the sequential matrix in each processor MUST be the same. 2884 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2885 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2886 @*/ 2887 static PetscEvent logkey_seqstompinum = 0; 2888 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2889 { 2890 PetscErrorCode ierr; 2891 MPI_Comm comm=mpimat->comm; 2892 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2893 PetscMPIInt size,rank,taga,*len_s; 2894 PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2895 PetscInt proc,m; 2896 PetscInt **buf_ri,**buf_rj; 2897 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2898 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2899 MPI_Request *s_waits,*r_waits; 2900 MPI_Status *status; 2901 MatScalar *aa=a->a,**abuf_r,*ba_i; 2902 Mat_Merge_SeqsToMPI *merge; 2903 PetscObjectContainer container; 2904 2905 PetscFunctionBegin; 2906 if (!logkey_seqstompinum) { 2907 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2908 } 2909 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2910 2911 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2912 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2913 2914 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2915 if (container) { 2916 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2917 } 2918 bi = merge->bi; 2919 bj = merge->bj; 2920 buf_ri = merge->buf_ri; 2921 buf_rj = merge->buf_rj; 2922 2923 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2924 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2925 len_s = merge->len_s; 2926 2927 /* send and recv matrix values */ 2928 /*-----------------------------*/ 2929 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2930 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 2931 2932 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2933 for (proc=0,k=0; proc<size; proc++){ 2934 if (!len_s[proc]) continue; 2935 i = owners[proc]; 2936 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 2937 k++; 2938 } 2939 2940 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 2941 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 2942 ierr = PetscFree(status);CHKERRQ(ierr); 2943 2944 ierr = PetscFree(s_waits);CHKERRQ(ierr); 2945 ierr = PetscFree(r_waits);CHKERRQ(ierr); 2946 2947 /* insert mat values of mpimat */ 2948 /*----------------------------*/ 2949 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 2950 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 2951 nextrow = buf_ri_k + merge->nrecv; 2952 nextai = nextrow + merge->nrecv; 2953 2954 for (k=0; k<merge->nrecv; k++){ 2955 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 2956 nrows = *(buf_ri_k[k]); 2957 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 2958 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 2959 } 2960 2961 /* set values of ba */ 2962 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2963 for (i=0; i<m; i++) { 2964 arow = owners[rank] + i; 2965 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 2966 bnzi = bi[i+1] - bi[i]; 2967 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 2968 2969 /* add local non-zero vals of this proc's seqmat into ba */ 2970 anzi = ai[arow+1] - ai[arow]; 2971 aj = a->j + ai[arow]; 2972 aa = a->a + ai[arow]; 2973 nextaj = 0; 2974 for (j=0; nextaj<anzi; j++){ 2975 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2976 ba_i[j] += aa[nextaj++]; 2977 } 2978 } 2979 2980 /* add received vals into ba */ 2981 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 2982 /* i-th row */ 2983 if (i == *nextrow[k]) { 2984 anzi = *(nextai[k]+1) - *nextai[k]; 2985 aj = buf_rj[k] + *(nextai[k]); 2986 aa = abuf_r[k] + *(nextai[k]); 2987 nextaj = 0; 2988 for (j=0; nextaj<anzi; j++){ 2989 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2990 ba_i[j] += aa[nextaj++]; 2991 } 2992 } 2993 nextrow[k]++; nextai[k]++; 2994 } 2995 } 2996 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 2997 } 2998 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2999 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3000 3001 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3002 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3003 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3004 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3005 PetscFunctionReturn(0); 3006 } 3007 static PetscEvent logkey_seqstompisym = 0; 3008 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3009 { 3010 PetscErrorCode ierr; 3011 Mat B_mpi; 3012 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3013 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3014 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3015 PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 3016 PetscInt len,proc,*dnz,*onz; 3017 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3018 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3019 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3020 MPI_Status *status; 3021 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3022 PetscBT lnkbt; 3023 Mat_Merge_SeqsToMPI *merge; 3024 PetscObjectContainer container; 3025 3026 PetscFunctionBegin; 3027 if (!logkey_seqstompisym) { 3028 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3029 } 3030 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3031 3032 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3033 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3034 3035 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3036 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3037 3038 /* determine row ownership */ 3039 /*---------------------------------------------------------*/ 3040 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3041 if (m == PETSC_DECIDE) { 3042 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 3043 } else { 3044 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3045 } 3046 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 3047 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3048 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3049 3050 if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); } 3051 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3052 3053 /* determine the number of messages to send, their lengths */ 3054 /*---------------------------------------------------------*/ 3055 len_s = merge->len_s; 3056 3057 len = 0; /* length of buf_si[] */ 3058 merge->nsend = 0; 3059 for (proc=0; proc<size; proc++){ 3060 len_si[proc] = 0; 3061 if (proc == rank){ 3062 len_s[proc] = 0; 3063 } else { 3064 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3065 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3066 } 3067 if (len_s[proc]) { 3068 merge->nsend++; 3069 nrows = 0; 3070 for (i=owners[proc]; i<owners[proc+1]; i++){ 3071 if (ai[i+1] > ai[i]) nrows++; 3072 } 3073 len_si[proc] = 2*(nrows+1); 3074 len += len_si[proc]; 3075 } 3076 } 3077 3078 /* determine the number and length of messages to receive for ij-structure */ 3079 /*-------------------------------------------------------------------------*/ 3080 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3081 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3082 3083 /* post the Irecv of j-structure */ 3084 /*-------------------------------*/ 3085 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3086 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3087 3088 /* post the Isend of j-structure */ 3089 /*--------------------------------*/ 3090 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3091 sj_waits = si_waits + merge->nsend; 3092 3093 for (proc=0, k=0; proc<size; proc++){ 3094 if (!len_s[proc]) continue; 3095 i = owners[proc]; 3096 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3097 k++; 3098 } 3099 3100 /* receives and sends of j-structure are complete */ 3101 /*------------------------------------------------*/ 3102 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 3103 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 3104 3105 /* send and recv i-structure */ 3106 /*---------------------------*/ 3107 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3108 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3109 3110 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3111 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3112 for (proc=0,k=0; proc<size; proc++){ 3113 if (!len_s[proc]) continue; 3114 /* form outgoing message for i-structure: 3115 buf_si[0]: nrows to be sent 3116 [1:nrows]: row index (global) 3117 [nrows+1:2*nrows+1]: i-structure index 3118 */ 3119 /*-------------------------------------------*/ 3120 nrows = len_si[proc]/2 - 1; 3121 buf_si_i = buf_si + nrows+1; 3122 buf_si[0] = nrows; 3123 buf_si_i[0] = 0; 3124 nrows = 0; 3125 for (i=owners[proc]; i<owners[proc+1]; i++){ 3126 anzi = ai[i+1] - ai[i]; 3127 if (anzi) { 3128 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3129 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3130 nrows++; 3131 } 3132 } 3133 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3134 k++; 3135 buf_si += len_si[proc]; 3136 } 3137 3138 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 3139 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 3140 3141 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3142 for (i=0; i<merge->nrecv; i++){ 3143 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3144 } 3145 3146 ierr = PetscFree(len_si);CHKERRQ(ierr); 3147 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3148 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3149 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3150 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3151 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3152 ierr = PetscFree(status);CHKERRQ(ierr); 3153 3154 /* compute a local seq matrix in each processor */ 3155 /*----------------------------------------------*/ 3156 /* allocate bi array and free space for accumulating nonzero column info */ 3157 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3158 bi[0] = 0; 3159 3160 /* create and initialize a linked list */ 3161 nlnk = N+1; 3162 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3163 3164 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3165 len = 0; 3166 len = ai[owners[rank+1]] - ai[owners[rank]]; 3167 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3168 current_space = free_space; 3169 3170 /* determine symbolic info for each local row */ 3171 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3172 nextrow = buf_ri_k + merge->nrecv; 3173 nextai = nextrow + merge->nrecv; 3174 for (k=0; k<merge->nrecv; k++){ 3175 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3176 nrows = *buf_ri_k[k]; 3177 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3178 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3179 } 3180 3181 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3182 len = 0; 3183 for (i=0;i<m;i++) { 3184 bnzi = 0; 3185 /* add local non-zero cols of this proc's seqmat into lnk */ 3186 arow = owners[rank] + i; 3187 anzi = ai[arow+1] - ai[arow]; 3188 aj = a->j + ai[arow]; 3189 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3190 bnzi += nlnk; 3191 /* add received col data into lnk */ 3192 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3193 if (i == *nextrow[k]) { /* i-th row */ 3194 anzi = *(nextai[k]+1) - *nextai[k]; 3195 aj = buf_rj[k] + *nextai[k]; 3196 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3197 bnzi += nlnk; 3198 nextrow[k]++; nextai[k]++; 3199 } 3200 } 3201 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3202 3203 /* if free space is not available, make more free space */ 3204 if (current_space->local_remaining<bnzi) { 3205 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3206 nspacedouble++; 3207 } 3208 /* copy data into free space, then initialize lnk */ 3209 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3210 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3211 3212 current_space->array += bnzi; 3213 current_space->local_used += bnzi; 3214 current_space->local_remaining -= bnzi; 3215 3216 bi[i+1] = bi[i] + bnzi; 3217 } 3218 3219 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3220 3221 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3222 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3223 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3224 3225 /* create symbolic parallel matrix B_mpi */ 3226 /*---------------------------------------*/ 3227 if (n==PETSC_DECIDE) { 3228 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr); 3229 } else { 3230 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 3231 } 3232 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3233 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3234 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3235 3236 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3237 B_mpi->assembled = PETSC_FALSE; 3238 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3239 merge->bi = bi; 3240 merge->bj = bj; 3241 merge->buf_ri = buf_ri; 3242 merge->buf_rj = buf_rj; 3243 merge->C_seq = seqmat; 3244 merge->ci = PETSC_NULL; 3245 merge->cj = PETSC_NULL; 3246 3247 /* attach the supporting struct to B_mpi for reuse */ 3248 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3249 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3250 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3251 *mpimat = B_mpi; 3252 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3253 PetscFunctionReturn(0); 3254 } 3255 3256 static PetscEvent logkey_seqstompi = 0; 3257 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3258 { 3259 PetscErrorCode ierr; 3260 3261 PetscFunctionBegin; 3262 if (!logkey_seqstompi) { 3263 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3264 } 3265 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3266 if (scall == MAT_INITIAL_MATRIX){ 3267 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3268 } 3269 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3270 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3271 PetscFunctionReturn(0); 3272 } 3273 static PetscEvent logkey_getlocalmat = 0; 3274 #undef __FUNCT__ 3275 #define __FUNCT__ "MatGetLocalMat" 3276 /*@C 3277 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3278 3279 Not Collective 3280 3281 Input Parameters: 3282 + A - the matrix 3283 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3284 3285 Output Parameter: 3286 . A_loc - the local sequential matrix generated 3287 3288 Level: developer 3289 3290 @*/ 3291 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3292 { 3293 PetscErrorCode ierr; 3294 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3295 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3296 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3297 MatScalar *aa=a->a,*ba=b->a,*ca; 3298 PetscInt am=A->m,i,j,k,*nnz,nnz_max,ncols,cstart=mpimat->cstart; 3299 Mat Aw; 3300 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3301 PetscInt stages[2]; 3302 3303 PetscFunctionBegin; 3304 ierr = PetscLogStageRegister(&stages[0],"LocalMat_init");CHKERRQ(ierr); 3305 ierr = PetscLogStageRegister(&stages[1],"LocalMat_ruse");CHKERRQ(ierr); 3306 3307 if (!logkey_getlocalmat) { 3308 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3309 } 3310 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3311 if (scall == MAT_INITIAL_MATRIX){ 3312 ierr = PetscLogStagePush(stages[0]);CHKERRQ(ierr); 3313 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 3314 nnz_max = 0; 3315 for (i=0; i<am; i++){ 3316 nnz[i] = (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3317 if (nnz[i] > nnz_max) nnz_max = nnz[i]; 3318 } 3319 ierr = MatCreate(PETSC_COMM_SELF,am,A->N,am,A->N,&Aw);CHKERRQ(ierr); 3320 ierr = MatSetType(Aw,MATSEQAIJ);CHKERRQ(ierr); 3321 ierr = MatSeqAIJSetPreallocation(Aw,0,nnz);CHKERRQ(ierr); 3322 ierr = MatSetOption(Aw,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 3323 3324 ierr = PetscMalloc((1+nnz_max)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3325 ierr = PetscMalloc((1+nnz_max)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 3326 for (i=0; i<am; i++) { 3327 ncols_o = bi[i+1] - bi[i]; 3328 ncols_d = ai[i+1] - ai[i]; 3329 ncols = ncols_d + ncols_o; 3330 k = 0; 3331 /* off-diagonal portion of A */ 3332 for (jo=0; jo<ncols_o; jo++) { 3333 col = cmap[*bj]; 3334 if (col >= cstart) break; 3335 cj[k] = col; bj++; 3336 ca[k++] = *ba++; 3337 } 3338 /* diagonal portion of A */ 3339 for (j=0; j<ncols_d; j++) { 3340 cj[k] = cstart + *aj++; 3341 ca[k++] = *aa++; 3342 } 3343 /* off-diagonal portion of A */ 3344 for (j=jo;j<ncols_o; j++) { 3345 cj[k] = cmap[*bj++]; 3346 ca[k++] = *ba++; 3347 } 3348 ierr = MatSetValues_SeqAIJ(Aw,1,&i,ncols,cj,ca,INSERT_VALUES);CHKERRQ(ierr); 3349 } 3350 ierr = MatAssemblyBegin(Aw,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3351 ierr = MatAssemblyEnd(Aw,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3352 ierr = PetscFree(cj);CHKERRQ(ierr); 3353 ierr = PetscFree(ca);CHKERRQ(ierr); 3354 ierr = PetscFree(nnz);CHKERRQ(ierr); 3355 *A_loc = Aw; 3356 ierr = PetscLogStagePop(); 3357 } else if (scall == MAT_REUSE_MATRIX){ 3358 ierr = PetscLogStagePush(stages[1]); 3359 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3360 ci = mat->i; cj = mat->j; ca = mat->a; 3361 for (i=0; i<am; i++) { 3362 /* off-diagonal portion of A */ 3363 ncols_o = bi[i+1] - bi[i]; 3364 for (jo=0; jo<ncols_o; jo++) { 3365 col = cmap[*bj]; 3366 if (col >= cstart) break; 3367 *ca++ = *ba++; 3368 } 3369 /* diagonal portion of A */ 3370 ncols = ai[i+1] - ai[i]; 3371 for (j=0; j<ncols; j++) *ca++ = *aa++; 3372 /* off-diagonal portion of A */ 3373 for (j=jo;j<ncols_o; j++) *ca++ = *ba++; 3374 } 3375 ierr = PetscLogStagePop(); 3376 } else { 3377 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3378 } 3379 3380 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3381 PetscFunctionReturn(0); 3382 } 3383 3384 static PetscEvent logkey_getlocalmatcondensed = 0; 3385 #undef __FUNCT__ 3386 #define __FUNCT__ "MatGetLocalMatCondensed" 3387 /*@C 3388 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3389 3390 Not Collective 3391 3392 Input Parameters: 3393 + A - the matrix 3394 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3395 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3396 3397 Output Parameter: 3398 . A_loc - the local sequential matrix generated 3399 3400 Level: developer 3401 3402 @*/ 3403 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3404 { 3405 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3406 PetscErrorCode ierr; 3407 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3408 IS isrowa,iscola; 3409 Mat *aloc; 3410 3411 PetscFunctionBegin; 3412 if (!logkey_getlocalmatcondensed) { 3413 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3414 } 3415 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3416 if (!row){ 3417 start = a->rstart; end = a->rend; 3418 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3419 } else { 3420 isrowa = *row; 3421 } 3422 if (!col){ 3423 start = a->cstart; 3424 cmap = a->garray; 3425 nzA = a->A->n; 3426 nzB = a->B->n; 3427 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3428 ncols = 0; 3429 for (i=0; i<nzB; i++) { 3430 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3431 else break; 3432 } 3433 imark = i; 3434 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3435 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3436 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3437 ierr = PetscFree(idx);CHKERRQ(ierr); 3438 } else { 3439 iscola = *col; 3440 } 3441 if (scall != MAT_INITIAL_MATRIX){ 3442 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3443 aloc[0] = *A_loc; 3444 } 3445 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3446 *A_loc = aloc[0]; 3447 ierr = PetscFree(aloc);CHKERRQ(ierr); 3448 if (!row){ 3449 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3450 } 3451 if (!col){ 3452 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3453 } 3454 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3455 PetscFunctionReturn(0); 3456 } 3457 3458 static PetscEvent logkey_GetBrowsOfAcols = 0; 3459 #undef __FUNCT__ 3460 #define __FUNCT__ "MatGetBrowsOfAcols" 3461 /*@C 3462 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3463 3464 Collective on Mat 3465 3466 Input Parameters: 3467 + A,B - the matrices in mpiaij format 3468 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3469 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3470 3471 Output Parameter: 3472 + rowb, colb - index sets of rows and columns of B to extract 3473 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3474 - B_seq - the sequential matrix generated 3475 3476 Level: developer 3477 3478 @*/ 3479 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3480 { 3481 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3482 PetscErrorCode ierr; 3483 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3484 IS isrowb,iscolb; 3485 Mat *bseq; 3486 3487 PetscFunctionBegin; 3488 if (a->cstart != b->rstart || a->cend != b->rend){ 3489 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3490 } 3491 if (!logkey_GetBrowsOfAcols) { 3492 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3493 } 3494 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3495 3496 if (scall == MAT_INITIAL_MATRIX){ 3497 start = a->cstart; 3498 cmap = a->garray; 3499 nzA = a->A->n; 3500 nzB = a->B->n; 3501 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3502 ncols = 0; 3503 for (i=0; i<nzB; i++) { /* row < local row index */ 3504 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3505 else break; 3506 } 3507 imark = i; 3508 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3509 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3510 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3511 ierr = PetscFree(idx);CHKERRQ(ierr); 3512 *brstart = imark; 3513 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3514 } else { 3515 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3516 isrowb = *rowb; iscolb = *colb; 3517 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3518 bseq[0] = *B_seq; 3519 } 3520 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3521 *B_seq = bseq[0]; 3522 ierr = PetscFree(bseq);CHKERRQ(ierr); 3523 if (!rowb){ 3524 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3525 } else { 3526 *rowb = isrowb; 3527 } 3528 if (!colb){ 3529 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3530 } else { 3531 *colb = iscolb; 3532 } 3533 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3534 PetscFunctionReturn(0); 3535 } 3536 3537 static PetscEvent logkey_GetBrowsOfAocols = 0; 3538 #undef __FUNCT__ 3539 #define __FUNCT__ "MatGetBrowsOfAoCols" 3540 /*@C 3541 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3542 of the OFF-DIAGONAL portion of local A 3543 3544 Collective on Mat 3545 3546 Input Parameters: 3547 + A,B - the matrices in mpiaij format 3548 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3549 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3550 3551 Output Parameter: 3552 + rowb, colb - index sets of rows and columns of B to extract 3553 . brstart - row index of B_seq from which the remaining rows correspond to A's columns that are greater than cend. 3554 - B_seq - the sequential matrix generated 3555 3556 Level: developer 3557 3558 @*/ 3559 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3560 { 3561 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3562 PetscErrorCode ierr; 3563 PetscInt *idx,i,start,ncols,nzB,*cmap; 3564 IS isrowb,iscolb; 3565 Mat *bseq; 3566 3567 PetscFunctionBegin; 3568 if (a->cstart != b->rstart || a->cend != b->rend){ 3569 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3570 } 3571 if (!logkey_GetBrowsOfAocols) { 3572 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3573 } 3574 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3575 3576 if (scall == MAT_INITIAL_MATRIX){ 3577 start = a->cstart; 3578 cmap = a->garray; 3579 nzB = a->B->n; 3580 if (!nzB){ /* output a null matrix */ 3581 B_seq = PETSC_NULL; 3582 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3583 PetscFunctionReturn(0); 3584 } 3585 ierr = PetscMalloc(nzB*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3586 ncols = 0; 3587 *brstart = nzB; 3588 for (i=0; i<nzB; i++) { /* row < local row index */ 3589 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3590 else break; 3591 } 3592 *brstart = i; 3593 for (i=*brstart; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3594 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3595 ierr = PetscFree(idx);CHKERRQ(ierr); 3596 3597 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3598 } else { 3599 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3600 isrowb = *rowb; iscolb = *colb; 3601 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3602 bseq[0] = *B_seq; 3603 } 3604 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3605 *B_seq = bseq[0]; 3606 ierr = PetscFree(bseq);CHKERRQ(ierr); 3607 if (!rowb){ 3608 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3609 } else { 3610 *rowb = isrowb; 3611 } 3612 if (!colb){ 3613 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3614 } else { 3615 *colb = iscolb; 3616 } 3617 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3618 PetscFunctionReturn(0); 3619 } 3620 3621 /*MC 3622 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3623 3624 Options Database Keys: 3625 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3626 3627 Level: beginner 3628 3629 .seealso: MatCreateMPIAIJ 3630 M*/ 3631 3632 EXTERN_C_BEGIN 3633 #undef __FUNCT__ 3634 #define __FUNCT__ "MatCreate_MPIAIJ" 3635 PetscErrorCode MatCreate_MPIAIJ(Mat B) 3636 { 3637 Mat_MPIAIJ *b; 3638 PetscErrorCode ierr; 3639 PetscInt i; 3640 PetscMPIInt size; 3641 3642 PetscFunctionBegin; 3643 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3644 3645 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3646 B->data = (void*)b; 3647 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 3648 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3649 B->factor = 0; 3650 B->assembled = PETSC_FALSE; 3651 B->mapping = 0; 3652 3653 B->insertmode = NOT_SET_VALUES; 3654 b->size = size; 3655 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3656 3657 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 3658 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 3659 3660 /* the information in the maps duplicates the information computed below, eventually 3661 we should remove the duplicate information that is not contained in the maps */ 3662 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 3663 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 3664 3665 /* build local table of row and column ownerships */ 3666 ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 3667 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 3668 b->cowners = b->rowners + b->size + 2; 3669 ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3670 b->rowners[0] = 0; 3671 for (i=2; i<=b->size; i++) { 3672 b->rowners[i] += b->rowners[i-1]; 3673 } 3674 b->rstart = b->rowners[b->rank]; 3675 b->rend = b->rowners[b->rank+1]; 3676 ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3677 b->cowners[0] = 0; 3678 for (i=2; i<=b->size; i++) { 3679 b->cowners[i] += b->cowners[i-1]; 3680 } 3681 b->cstart = b->cowners[b->rank]; 3682 b->cend = b->cowners[b->rank+1]; 3683 3684 /* build cache for off array entries formed */ 3685 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3686 b->donotstash = PETSC_FALSE; 3687 b->colmap = 0; 3688 b->garray = 0; 3689 b->roworiented = PETSC_TRUE; 3690 3691 /* stuff used for matrix vector multiply */ 3692 b->lvec = PETSC_NULL; 3693 b->Mvctx = PETSC_NULL; 3694 3695 /* stuff for MatGetRow() */ 3696 b->rowindices = 0; 3697 b->rowvalues = 0; 3698 b->getrowactive = PETSC_FALSE; 3699 3700 /* Explicitly create 2 MATSEQAIJ matrices. */ 3701 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 3702 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3703 PetscLogObjectParent(B,b->A); 3704 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 3705 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3706 PetscLogObjectParent(B,b->B); 3707 3708 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3709 "MatStoreValues_MPIAIJ", 3710 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3711 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3712 "MatRetrieveValues_MPIAIJ", 3713 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3714 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3715 "MatGetDiagonalBlock_MPIAIJ", 3716 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3717 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3718 "MatIsTranspose_MPIAIJ", 3719 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3720 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3721 "MatMPIAIJSetPreallocation_MPIAIJ", 3722 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3723 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3724 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3725 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3726 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3727 "MatDiagonalScaleLocal_MPIAIJ", 3728 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3729 PetscFunctionReturn(0); 3730 } 3731 EXTERN_C_END 3732