1 2 #include "src/mat/impls/aij/mpi/mpiaij.h" 3 #include "src/inline/spops.h" 4 5 /* 6 Local utility routine that creates a mapping from the global column 7 number to the local number in the off-diagonal part of the local 8 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 9 a slightly higher hash table cost; without it it is not scalable (each processor 10 has an order N integer array but is fast to acess. 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 17 PetscErrorCode ierr; 18 PetscInt n = aij->B->n,i; 19 20 PetscFunctionBegin; 21 #if defined (PETSC_USE_CTABLE) 22 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 23 for (i=0; i<n; i++){ 24 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 25 } 26 #else 27 ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 28 PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt)); 29 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 30 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 31 #endif 32 PetscFunctionReturn(0); 33 } 34 35 #define CHUNKSIZE 15 36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 40 rmax = aimax[row]; nrow = ailen[row]; \ 41 col1 = col - shift; \ 42 \ 43 low = 0; high = nrow; \ 44 while (high-low > 5) { \ 45 t = (low+high)/2; \ 46 if (rp[t] > col) high = t; \ 47 else low = t; \ 48 } \ 49 for (_i=low; _i<high; _i++) { \ 50 if (rp[_i] > col1) break; \ 51 if (rp[_i] == col1) { \ 52 if (addv == ADD_VALUES) ap[_i] += value; \ 53 else ap[_i] = value; \ 54 goto a_noinsert; \ 55 } \ 56 } \ 57 if (nonew == 1) goto a_noinsert; \ 58 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 59 if (nrow >= rmax) { \ 60 /* there is no extra room in row, therefore enlarge */ \ 61 PetscInt new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 62 PetscScalar *new_a; \ 63 \ 64 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 65 \ 66 /* malloc new storage space */ \ 67 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \ 68 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 69 new_j = (PetscInt*)(new_a + new_nz); \ 70 new_i = new_j + new_nz; \ 71 \ 72 /* copy over old data into new slots */ \ 73 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 74 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 75 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 76 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 77 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 78 len*sizeof(PetscInt));CHKERRQ(ierr); \ 79 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 81 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 82 /* free up old matrix storage */ \ 83 \ 84 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 85 if (!a->singlemalloc) { \ 86 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 87 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 88 } \ 89 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 90 a->singlemalloc = PETSC_TRUE; \ 91 \ 92 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 93 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 94 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \ 95 a->maxnz += CHUNKSIZE; \ 96 a->reallocs++; \ 97 } \ 98 N = nrow++ - 1; a->nz++; \ 99 /* shift up all the later entries in this row */ \ 100 for (ii=N; ii>=_i; ii--) { \ 101 rp[ii+1] = rp[ii]; \ 102 ap[ii+1] = ap[ii]; \ 103 } \ 104 rp[_i] = col1; \ 105 ap[_i] = value; \ 106 a_noinsert: ; \ 107 ailen[row] = nrow; \ 108 } 109 110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 111 { \ 112 \ 113 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 114 rmax = bimax[row]; nrow = bilen[row]; \ 115 col1 = col - shift; \ 116 \ 117 low = 0; high = nrow; \ 118 while (high-low > 5) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > col) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > col1) break; \ 125 if (rp[_i] == col1) { \ 126 if (addv == ADD_VALUES) ap[_i] += value; \ 127 else ap[_i] = value; \ 128 goto b_noinsert; \ 129 } \ 130 } \ 131 if (nonew == 1) goto b_noinsert; \ 132 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 133 if (nrow >= rmax) { \ 134 /* there is no extra room in row, therefore enlarge */ \ 135 PetscInt new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 136 PetscScalar *new_a; \ 137 \ 138 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 139 \ 140 /* malloc new storage space */ \ 141 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \ 142 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 143 new_j = (PetscInt*)(new_a + new_nz); \ 144 new_i = new_j + new_nz; \ 145 \ 146 /* copy over old data into new slots */ \ 147 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 148 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 149 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 150 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 151 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 152 len*sizeof(PetscInt));CHKERRQ(ierr); \ 153 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 154 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 155 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 156 /* free up old matrix storage */ \ 157 \ 158 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 159 if (!b->singlemalloc) { \ 160 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 161 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 162 } \ 163 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 164 b->singlemalloc = PETSC_TRUE; \ 165 \ 166 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 167 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 168 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \ 169 b->maxnz += CHUNKSIZE; \ 170 b->reallocs++; \ 171 } \ 172 N = nrow++ - 1; b->nz++; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ap[ii+1] = ap[ii]; \ 177 } \ 178 rp[_i] = col1; \ 179 ap[_i] = value; \ 180 b_noinsert: ; \ 181 bilen[row] = nrow; \ 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatSetValues_MPIAIJ" 186 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 187 { 188 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 189 PetscScalar value; 190 PetscErrorCode ierr; 191 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 192 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 193 PetscTruth roworiented = aij->roworiented; 194 195 /* Some Variables required in the macro */ 196 Mat A = aij->A; 197 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 198 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 199 PetscScalar *aa = a->a; 200 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 201 Mat B = aij->B; 202 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 203 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 204 PetscScalar *ba = b->a; 205 206 PetscInt *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 207 PetscInt nonew = a->nonew,shift=0; 208 PetscScalar *ap; 209 210 PetscFunctionBegin; 211 for (i=0; i<m; i++) { 212 if (im[i] < 0) continue; 213 #if defined(PETSC_USE_BOPT_g) 214 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 215 #endif 216 if (im[i] >= rstart && im[i] < rend) { 217 row = im[i] - rstart; 218 for (j=0; j<n; j++) { 219 if (in[j] >= cstart && in[j] < cend){ 220 col = in[j] - cstart; 221 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 222 if (ignorezeroentries && value == 0.0) continue; 223 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 224 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 225 } else if (in[j] < 0) continue; 226 #if defined(PETSC_USE_BOPT_g) 227 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 228 #endif 229 else { 230 if (mat->was_assembled) { 231 if (!aij->colmap) { 232 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 233 } 234 #if defined (PETSC_USE_CTABLE) 235 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 236 col--; 237 #else 238 col = aij->colmap[in[j]] - 1; 239 #endif 240 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 241 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 244 B = aij->B; 245 b = (Mat_SeqAIJ*)B->data; 246 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 247 ba = b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 if (ignorezeroentries && value == 0.0) continue; 252 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 253 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 254 } 255 } 256 } else { 257 if (!aij->donotstash) { 258 if (roworiented) { 259 if (ignorezeroentries && v[i*n] == 0.0) continue; 260 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 261 } else { 262 if (ignorezeroentries && v[i] == 0.0) continue; 263 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatGetValues_MPIAIJ" 273 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 274 { 275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 276 PetscErrorCode ierr; 277 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 278 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 279 280 PetscFunctionBegin; 281 for (i=0; i<m; i++) { 282 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 283 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 284 if (idxm[i] >= rstart && idxm[i] < rend) { 285 row = idxm[i] - rstart; 286 for (j=0; j<n; j++) { 287 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 288 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 289 if (idxn[j] >= cstart && idxn[j] < cend){ 290 col = idxn[j] - cstart; 291 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 292 } else { 293 if (!aij->colmap) { 294 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 295 } 296 #if defined (PETSC_USE_CTABLE) 297 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 298 col --; 299 #else 300 col = aij->colmap[idxn[j]] - 1; 301 #endif 302 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 303 else { 304 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 305 } 306 } 307 } 308 } else { 309 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 310 } 311 } 312 PetscFunctionReturn(0); 313 } 314 315 #undef __FUNCT__ 316 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 317 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 318 { 319 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 320 PetscErrorCode ierr; 321 PetscInt nstash,reallocs; 322 InsertMode addv; 323 324 PetscFunctionBegin; 325 if (aij->donotstash) { 326 PetscFunctionReturn(0); 327 } 328 329 /* make sure all processors are either in INSERTMODE or ADDMODE */ 330 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 331 if (addv == (ADD_VALUES|INSERT_VALUES)) { 332 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 333 } 334 mat->insertmode = addv; /* in case this processor had no cache */ 335 336 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 337 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 338 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs); 339 PetscFunctionReturn(0); 340 } 341 342 #undef __FUNCT__ 343 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 344 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 345 { 346 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 347 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 348 PetscErrorCode ierr; 349 PetscMPIInt n; 350 PetscInt i,j,rstart,ncols,flg; 351 PetscInt *row,*col,other_disassembled; 352 PetscScalar *val; 353 InsertMode addv = mat->insertmode; 354 355 PetscFunctionBegin; 356 if (!aij->donotstash) { 357 while (1) { 358 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 359 if (!flg) break; 360 361 for (i=0; i<n;) { 362 /* Now identify the consecutive vals belonging to the same row */ 363 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 364 if (j < n) ncols = j-i; 365 else ncols = n-i; 366 /* Now assemble all these values with a single function call */ 367 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 368 i = j; 369 } 370 } 371 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 372 } 373 374 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 375 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 376 377 /* determine if any processor has disassembled, if so we must 378 also disassemble ourselfs, in order that we may reassemble. */ 379 /* 380 if nonzero structure of submatrix B cannot change then we know that 381 no processor disassembled thus we can skip this stuff 382 */ 383 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 384 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 385 if (mat->was_assembled && !other_disassembled) { 386 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 387 /* reaccess the b because aij->B was changed */ 388 b = (Mat_SeqAIJ *)aij->B->data; 389 } 390 } 391 392 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 393 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 394 } 395 396 b->inode.use = PETSC_FALSE; 397 b->compressedrow.use = PETSC_TRUE; 398 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 399 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 400 401 if (aij->rowvalues) { 402 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 403 aij->rowvalues = 0; 404 } 405 406 /* used by MatAXPY() */ 407 a->xtoy = 0; b->xtoy = 0; 408 a->XtoY = 0; b->XtoY = 0; 409 410 PetscFunctionReturn(0); 411 } 412 413 #undef __FUNCT__ 414 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 415 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 416 { 417 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 418 PetscErrorCode ierr; 419 420 PetscFunctionBegin; 421 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 422 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 423 PetscFunctionReturn(0); 424 } 425 426 #undef __FUNCT__ 427 #define __FUNCT__ "MatZeroRows_MPIAIJ" 428 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 429 { 430 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 431 PetscErrorCode ierr; 432 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag; 433 PetscInt i,N,*rows,*owners = l->rowners; 434 PetscInt *nprocs,j,idx,nsends,row; 435 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 436 PetscInt *rvalues,count,base,slen,*source; 437 PetscInt *lens,*lrows,*values,rstart=l->rstart; 438 MPI_Comm comm = A->comm; 439 MPI_Request *send_waits,*recv_waits; 440 MPI_Status recv_status,*send_status; 441 IS istmp; 442 PetscTruth found; 443 444 PetscFunctionBegin; 445 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 446 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 447 448 /* first count number of contributors to each processor */ 449 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 450 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 451 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 452 for (i=0; i<N; i++) { 453 idx = rows[i]; 454 found = PETSC_FALSE; 455 for (j=0; j<size; j++) { 456 if (idx >= owners[j] && idx < owners[j+1]) { 457 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 458 } 459 } 460 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 461 } 462 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 463 464 /* inform other processors of number of messages and max length*/ 465 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 466 467 /* post receives: */ 468 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 469 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 470 for (i=0; i<nrecvs; i++) { 471 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 472 } 473 474 /* do sends: 475 1) starts[i] gives the starting index in svalues for stuff going to 476 the ith processor 477 */ 478 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 479 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 480 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 481 starts[0] = 0; 482 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 483 for (i=0; i<N; i++) { 484 svalues[starts[owner[i]]++] = rows[i]; 485 } 486 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 487 488 starts[0] = 0; 489 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 490 count = 0; 491 for (i=0; i<size; i++) { 492 if (nprocs[2*i+1]) { 493 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 494 } 495 } 496 ierr = PetscFree(starts);CHKERRQ(ierr); 497 498 base = owners[rank]; 499 500 /* wait on receives */ 501 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 502 source = lens + nrecvs; 503 count = nrecvs; slen = 0; 504 while (count) { 505 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 506 /* unpack receives into our local space */ 507 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 508 source[imdex] = recv_status.MPI_SOURCE; 509 lens[imdex] = n; 510 slen += n; 511 count--; 512 } 513 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 514 515 /* move the data into the send scatter */ 516 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 517 count = 0; 518 for (i=0; i<nrecvs; i++) { 519 values = rvalues + i*nmax; 520 for (j=0; j<lens[i]; j++) { 521 lrows[count++] = values[j] - base; 522 } 523 } 524 ierr = PetscFree(rvalues);CHKERRQ(ierr); 525 ierr = PetscFree(lens);CHKERRQ(ierr); 526 ierr = PetscFree(owner);CHKERRQ(ierr); 527 ierr = PetscFree(nprocs);CHKERRQ(ierr); 528 529 /* actually zap the local rows */ 530 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 531 PetscLogObjectParent(A,istmp); 532 533 /* 534 Zero the required rows. If the "diagonal block" of the matrix 535 is square and the user wishes to set the diagonal we use seperate 536 code so that MatSetValues() is not called for each diagonal allocating 537 new memory, thus calling lots of mallocs and slowing things down. 538 539 Contributed by: Mathew Knepley 540 */ 541 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 542 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 543 if (diag && (l->A->M == l->A->N)) { 544 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 545 } else if (diag) { 546 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 547 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 548 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 549 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 550 } 551 for (i = 0; i < slen; i++) { 552 row = lrows[i] + rstart; 553 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 554 } 555 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 556 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 557 } else { 558 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 559 } 560 ierr = ISDestroy(istmp);CHKERRQ(ierr); 561 ierr = PetscFree(lrows);CHKERRQ(ierr); 562 563 /* wait on sends */ 564 if (nsends) { 565 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 566 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 567 ierr = PetscFree(send_status);CHKERRQ(ierr); 568 } 569 ierr = PetscFree(send_waits);CHKERRQ(ierr); 570 ierr = PetscFree(svalues);CHKERRQ(ierr); 571 572 PetscFunctionReturn(0); 573 } 574 575 #undef __FUNCT__ 576 #define __FUNCT__ "MatMult_MPIAIJ" 577 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 578 { 579 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 580 PetscErrorCode ierr; 581 PetscInt nt; 582 583 PetscFunctionBegin; 584 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 585 if (nt != A->n) { 586 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt); 587 } 588 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 589 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 590 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 591 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 592 PetscFunctionReturn(0); 593 } 594 595 #undef __FUNCT__ 596 #define __FUNCT__ "MatMultAdd_MPIAIJ" 597 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 598 { 599 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 600 PetscErrorCode ierr; 601 602 PetscFunctionBegin; 603 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 604 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 605 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 606 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 607 PetscFunctionReturn(0); 608 } 609 610 #undef __FUNCT__ 611 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 612 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 613 { 614 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 615 PetscErrorCode ierr; 616 617 PetscFunctionBegin; 618 /* do nondiagonal part */ 619 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 620 /* send it on its way */ 621 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 622 /* do local part */ 623 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 624 /* receive remote parts: note this assumes the values are not actually */ 625 /* inserted in yy until the next line, which is true for my implementation*/ 626 /* but is not perhaps always true. */ 627 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 628 PetscFunctionReturn(0); 629 } 630 631 EXTERN_C_BEGIN 632 #undef __FUNCT__ 633 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 634 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 635 { 636 MPI_Comm comm; 637 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 638 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 639 IS Me,Notme; 640 PetscErrorCode ierr; 641 PetscInt M,N,first,last,*notme,i; 642 PetscMPIInt size; 643 644 PetscFunctionBegin; 645 646 /* Easy test: symmetric diagonal block */ 647 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 648 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 649 if (!*f) PetscFunctionReturn(0); 650 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 651 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 652 if (size == 1) PetscFunctionReturn(0); 653 654 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 655 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 656 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 657 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 658 for (i=0; i<first; i++) notme[i] = i; 659 for (i=last; i<M; i++) notme[i-last+first] = i; 660 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 661 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 662 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 663 Aoff = Aoffs[0]; 664 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 665 Boff = Boffs[0]; 666 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 667 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 668 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 669 ierr = ISDestroy(Me);CHKERRQ(ierr); 670 ierr = ISDestroy(Notme);CHKERRQ(ierr); 671 672 PetscFunctionReturn(0); 673 } 674 EXTERN_C_END 675 676 #undef __FUNCT__ 677 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 678 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 679 { 680 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 681 PetscErrorCode ierr; 682 683 PetscFunctionBegin; 684 /* do nondiagonal part */ 685 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 686 /* send it on its way */ 687 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 688 /* do local part */ 689 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 690 /* receive remote parts: note this assumes the values are not actually */ 691 /* inserted in yy until the next line, which is true for my implementation*/ 692 /* but is not perhaps always true. */ 693 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 694 PetscFunctionReturn(0); 695 } 696 697 /* 698 This only works correctly for square matrices where the subblock A->A is the 699 diagonal block 700 */ 701 #undef __FUNCT__ 702 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 703 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 704 { 705 PetscErrorCode ierr; 706 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 707 708 PetscFunctionBegin; 709 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 710 if (a->rstart != a->cstart || a->rend != a->cend) { 711 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 712 } 713 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 714 PetscFunctionReturn(0); 715 } 716 717 #undef __FUNCT__ 718 #define __FUNCT__ "MatScale_MPIAIJ" 719 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 720 { 721 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 722 PetscErrorCode ierr; 723 724 PetscFunctionBegin; 725 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 726 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "MatDestroy_MPIAIJ" 732 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 733 { 734 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 735 PetscErrorCode ierr; 736 737 PetscFunctionBegin; 738 #if defined(PETSC_USE_LOG) 739 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 740 #endif 741 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 742 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 743 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 744 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 745 #if defined (PETSC_USE_CTABLE) 746 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 747 #else 748 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 749 #endif 750 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 751 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 752 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 753 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 754 ierr = PetscFree(aij);CHKERRQ(ierr); 755 756 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 757 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 758 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 759 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 760 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 761 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 762 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 763 PetscFunctionReturn(0); 764 } 765 766 #undef __FUNCT__ 767 #define __FUNCT__ "MatView_MPIAIJ_Binary" 768 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 769 { 770 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 771 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 772 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 773 PetscErrorCode ierr; 774 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 775 int fd; 776 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 777 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 778 PetscScalar *column_values; 779 780 PetscFunctionBegin; 781 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 782 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 783 nz = A->nz + B->nz; 784 if (!rank) { 785 header[0] = MAT_FILE_COOKIE; 786 header[1] = mat->M; 787 header[2] = mat->N; 788 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 789 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 790 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 791 /* get largest number of rows any processor has */ 792 rlen = mat->m; 793 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 794 for (i=1; i<size; i++) { 795 rlen = PetscMax(rlen,range[i+1] - range[i]); 796 } 797 } else { 798 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 799 rlen = mat->m; 800 } 801 802 /* load up the local row counts */ 803 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 804 for (i=0; i<mat->m; i++) { 805 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 806 } 807 808 /* store the row lengths to the file */ 809 if (!rank) { 810 MPI_Status status; 811 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 812 for (i=1; i<size; i++) { 813 rlen = range[i+1] - range[i]; 814 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 815 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 816 } 817 } else { 818 ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 819 } 820 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 821 822 /* load up the local column indices */ 823 nzmax = nz; /* )th processor needs space a largest processor needs */ 824 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 825 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 826 cnt = 0; 827 for (i=0; i<mat->m; i++) { 828 for (j=B->i[i]; j<B->i[i+1]; j++) { 829 if ( (col = garray[B->j[j]]) > cstart) break; 830 column_indices[cnt++] = col; 831 } 832 for (k=A->i[i]; k<A->i[i+1]; k++) { 833 column_indices[cnt++] = A->j[k] + cstart; 834 } 835 for (; j<B->i[i+1]; j++) { 836 column_indices[cnt++] = garray[B->j[j]]; 837 } 838 } 839 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 840 841 /* store the column indices to the file */ 842 if (!rank) { 843 MPI_Status status; 844 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 845 for (i=1; i<size; i++) { 846 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 847 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 848 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 849 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 850 } 851 } else { 852 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 853 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 854 } 855 ierr = PetscFree(column_indices);CHKERRQ(ierr); 856 857 /* load up the local column values */ 858 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 859 cnt = 0; 860 for (i=0; i<mat->m; i++) { 861 for (j=B->i[i]; j<B->i[i+1]; j++) { 862 if ( garray[B->j[j]] > cstart) break; 863 column_values[cnt++] = B->a[j]; 864 } 865 for (k=A->i[i]; k<A->i[i+1]; k++) { 866 column_values[cnt++] = A->a[k]; 867 } 868 for (; j<B->i[i+1]; j++) { 869 column_values[cnt++] = B->a[j]; 870 } 871 } 872 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 873 874 /* store the column values to the file */ 875 if (!rank) { 876 MPI_Status status; 877 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 878 for (i=1; i<size; i++) { 879 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 880 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 881 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 882 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 883 } 884 } else { 885 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 886 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 887 } 888 ierr = PetscFree(column_values);CHKERRQ(ierr); 889 PetscFunctionReturn(0); 890 } 891 892 #undef __FUNCT__ 893 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 894 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 895 { 896 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 897 PetscErrorCode ierr; 898 PetscMPIInt rank = aij->rank,size = aij->size; 899 PetscTruth isdraw,iascii,flg,isbinary; 900 PetscViewer sviewer; 901 PetscViewerFormat format; 902 903 PetscFunctionBegin; 904 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 905 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 906 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 907 if (iascii) { 908 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 909 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 910 MatInfo info; 911 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 912 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 913 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 914 if (flg) { 915 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 916 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 917 } else { 918 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 919 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 920 } 921 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 922 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 923 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 924 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 925 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 926 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 927 PetscFunctionReturn(0); 928 } else if (format == PETSC_VIEWER_ASCII_INFO) { 929 PetscFunctionReturn(0); 930 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 931 PetscFunctionReturn(0); 932 } 933 } else if (isbinary) { 934 if (size == 1) { 935 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 936 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 937 } else { 938 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 939 } 940 PetscFunctionReturn(0); 941 } else if (isdraw) { 942 PetscDraw draw; 943 PetscTruth isnull; 944 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 945 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 946 } 947 948 if (size == 1) { 949 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 950 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 951 } else { 952 /* assemble the entire matrix onto first processor. */ 953 Mat A; 954 Mat_SeqAIJ *Aloc; 955 PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 956 PetscScalar *a; 957 958 if (!rank) { 959 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 960 } else { 961 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 962 } 963 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 964 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 965 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 966 PetscLogObjectParent(mat,A); 967 968 /* copy over the A part */ 969 Aloc = (Mat_SeqAIJ*)aij->A->data; 970 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 971 row = aij->rstart; 972 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 973 for (i=0; i<m; i++) { 974 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 975 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 976 } 977 aj = Aloc->j; 978 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 979 980 /* copy over the B part */ 981 Aloc = (Mat_SeqAIJ*)aij->B->data; 982 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 983 row = aij->rstart; 984 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 985 ct = cols; 986 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 987 for (i=0; i<m; i++) { 988 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 989 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 990 } 991 ierr = PetscFree(ct);CHKERRQ(ierr); 992 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 993 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 994 /* 995 Everyone has to call to draw the matrix since the graphics waits are 996 synchronized across all processors that share the PetscDraw object 997 */ 998 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 999 if (!rank) { 1000 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1001 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1002 } 1003 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1004 ierr = MatDestroy(A);CHKERRQ(ierr); 1005 } 1006 PetscFunctionReturn(0); 1007 } 1008 1009 #undef __FUNCT__ 1010 #define __FUNCT__ "MatView_MPIAIJ" 1011 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1012 { 1013 PetscErrorCode ierr; 1014 PetscTruth iascii,isdraw,issocket,isbinary; 1015 1016 PetscFunctionBegin; 1017 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1018 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1019 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1020 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1021 if (iascii || isdraw || isbinary || issocket) { 1022 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1023 } else { 1024 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1025 } 1026 PetscFunctionReturn(0); 1027 } 1028 1029 1030 1031 #undef __FUNCT__ 1032 #define __FUNCT__ "MatRelax_MPIAIJ" 1033 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1034 { 1035 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1036 PetscErrorCode ierr; 1037 Vec bb1; 1038 PetscScalar mone=-1.0; 1039 1040 PetscFunctionBegin; 1041 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1042 1043 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1044 1045 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1046 if (flag & SOR_ZERO_INITIAL_GUESS) { 1047 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1048 its--; 1049 } 1050 1051 while (its--) { 1052 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1053 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1054 1055 /* update rhs: bb1 = bb - B*x */ 1056 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1057 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1058 1059 /* local sweep */ 1060 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1061 CHKERRQ(ierr); 1062 } 1063 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1064 if (flag & SOR_ZERO_INITIAL_GUESS) { 1065 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1066 its--; 1067 } 1068 while (its--) { 1069 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1070 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1071 1072 /* update rhs: bb1 = bb - B*x */ 1073 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1074 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1075 1076 /* local sweep */ 1077 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1078 CHKERRQ(ierr); 1079 } 1080 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1081 if (flag & SOR_ZERO_INITIAL_GUESS) { 1082 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1083 its--; 1084 } 1085 while (its--) { 1086 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1087 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1088 1089 /* update rhs: bb1 = bb - B*x */ 1090 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1091 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1092 1093 /* local sweep */ 1094 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1095 CHKERRQ(ierr); 1096 } 1097 } else { 1098 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1099 } 1100 1101 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1102 PetscFunctionReturn(0); 1103 } 1104 1105 #undef __FUNCT__ 1106 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1107 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1108 { 1109 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1110 Mat A = mat->A,B = mat->B; 1111 PetscErrorCode ierr; 1112 PetscReal isend[5],irecv[5]; 1113 1114 PetscFunctionBegin; 1115 info->block_size = 1.0; 1116 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1117 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1118 isend[3] = info->memory; isend[4] = info->mallocs; 1119 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1120 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1121 isend[3] += info->memory; isend[4] += info->mallocs; 1122 if (flag == MAT_LOCAL) { 1123 info->nz_used = isend[0]; 1124 info->nz_allocated = isend[1]; 1125 info->nz_unneeded = isend[2]; 1126 info->memory = isend[3]; 1127 info->mallocs = isend[4]; 1128 } else if (flag == MAT_GLOBAL_MAX) { 1129 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1130 info->nz_used = irecv[0]; 1131 info->nz_allocated = irecv[1]; 1132 info->nz_unneeded = irecv[2]; 1133 info->memory = irecv[3]; 1134 info->mallocs = irecv[4]; 1135 } else if (flag == MAT_GLOBAL_SUM) { 1136 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1137 info->nz_used = irecv[0]; 1138 info->nz_allocated = irecv[1]; 1139 info->nz_unneeded = irecv[2]; 1140 info->memory = irecv[3]; 1141 info->mallocs = irecv[4]; 1142 } 1143 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1144 info->fill_ratio_needed = 0; 1145 info->factor_mallocs = 0; 1146 info->rows_global = (double)matin->M; 1147 info->columns_global = (double)matin->N; 1148 info->rows_local = (double)matin->m; 1149 info->columns_local = (double)matin->N; 1150 1151 PetscFunctionReturn(0); 1152 } 1153 1154 #undef __FUNCT__ 1155 #define __FUNCT__ "MatSetOption_MPIAIJ" 1156 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1157 { 1158 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1159 PetscErrorCode ierr; 1160 1161 PetscFunctionBegin; 1162 switch (op) { 1163 case MAT_NO_NEW_NONZERO_LOCATIONS: 1164 case MAT_YES_NEW_NONZERO_LOCATIONS: 1165 case MAT_COLUMNS_UNSORTED: 1166 case MAT_COLUMNS_SORTED: 1167 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1168 case MAT_KEEP_ZEROED_ROWS: 1169 case MAT_NEW_NONZERO_LOCATION_ERR: 1170 case MAT_USE_INODES: 1171 case MAT_DO_NOT_USE_INODES: 1172 case MAT_IGNORE_ZERO_ENTRIES: 1173 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1174 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1175 break; 1176 case MAT_ROW_ORIENTED: 1177 a->roworiented = PETSC_TRUE; 1178 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1179 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1180 break; 1181 case MAT_ROWS_SORTED: 1182 case MAT_ROWS_UNSORTED: 1183 case MAT_YES_NEW_DIAGONALS: 1184 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1185 break; 1186 case MAT_COLUMN_ORIENTED: 1187 a->roworiented = PETSC_FALSE; 1188 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1189 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1190 break; 1191 case MAT_IGNORE_OFF_PROC_ENTRIES: 1192 a->donotstash = PETSC_TRUE; 1193 break; 1194 case MAT_NO_NEW_DIAGONALS: 1195 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1196 case MAT_SYMMETRIC: 1197 case MAT_STRUCTURALLY_SYMMETRIC: 1198 case MAT_HERMITIAN: 1199 case MAT_SYMMETRY_ETERNAL: 1200 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1201 break; 1202 case MAT_NOT_SYMMETRIC: 1203 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1204 case MAT_NOT_HERMITIAN: 1205 case MAT_NOT_SYMMETRY_ETERNAL: 1206 break; 1207 default: 1208 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1209 } 1210 PetscFunctionReturn(0); 1211 } 1212 1213 #undef __FUNCT__ 1214 #define __FUNCT__ "MatGetRow_MPIAIJ" 1215 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1216 { 1217 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1218 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1219 PetscErrorCode ierr; 1220 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1221 PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1222 PetscInt *cmap,*idx_p; 1223 1224 PetscFunctionBegin; 1225 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1226 mat->getrowactive = PETSC_TRUE; 1227 1228 if (!mat->rowvalues && (idx || v)) { 1229 /* 1230 allocate enough space to hold information from the longest row. 1231 */ 1232 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1233 PetscInt max = 1,tmp; 1234 for (i=0; i<matin->m; i++) { 1235 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1236 if (max < tmp) { max = tmp; } 1237 } 1238 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1239 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1240 } 1241 1242 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1243 lrow = row - rstart; 1244 1245 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1246 if (!v) {pvA = 0; pvB = 0;} 1247 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1248 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1249 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1250 nztot = nzA + nzB; 1251 1252 cmap = mat->garray; 1253 if (v || idx) { 1254 if (nztot) { 1255 /* Sort by increasing column numbers, assuming A and B already sorted */ 1256 PetscInt imark = -1; 1257 if (v) { 1258 *v = v_p = mat->rowvalues; 1259 for (i=0; i<nzB; i++) { 1260 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1261 else break; 1262 } 1263 imark = i; 1264 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1265 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1266 } 1267 if (idx) { 1268 *idx = idx_p = mat->rowindices; 1269 if (imark > -1) { 1270 for (i=0; i<imark; i++) { 1271 idx_p[i] = cmap[cworkB[i]]; 1272 } 1273 } else { 1274 for (i=0; i<nzB; i++) { 1275 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1276 else break; 1277 } 1278 imark = i; 1279 } 1280 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1281 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1282 } 1283 } else { 1284 if (idx) *idx = 0; 1285 if (v) *v = 0; 1286 } 1287 } 1288 *nz = nztot; 1289 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1290 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1291 PetscFunctionReturn(0); 1292 } 1293 1294 #undef __FUNCT__ 1295 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1296 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1297 { 1298 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1299 1300 PetscFunctionBegin; 1301 if (aij->getrowactive == PETSC_FALSE) { 1302 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1303 } 1304 aij->getrowactive = PETSC_FALSE; 1305 PetscFunctionReturn(0); 1306 } 1307 1308 #undef __FUNCT__ 1309 #define __FUNCT__ "MatNorm_MPIAIJ" 1310 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1311 { 1312 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1313 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1314 PetscErrorCode ierr; 1315 PetscInt i,j,cstart = aij->cstart; 1316 PetscReal sum = 0.0; 1317 PetscScalar *v; 1318 1319 PetscFunctionBegin; 1320 if (aij->size == 1) { 1321 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1322 } else { 1323 if (type == NORM_FROBENIUS) { 1324 v = amat->a; 1325 for (i=0; i<amat->nz; i++) { 1326 #if defined(PETSC_USE_COMPLEX) 1327 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1328 #else 1329 sum += (*v)*(*v); v++; 1330 #endif 1331 } 1332 v = bmat->a; 1333 for (i=0; i<bmat->nz; i++) { 1334 #if defined(PETSC_USE_COMPLEX) 1335 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1336 #else 1337 sum += (*v)*(*v); v++; 1338 #endif 1339 } 1340 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1341 *norm = sqrt(*norm); 1342 } else if (type == NORM_1) { /* max column norm */ 1343 PetscReal *tmp,*tmp2; 1344 PetscInt *jj,*garray = aij->garray; 1345 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1346 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1347 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1348 *norm = 0.0; 1349 v = amat->a; jj = amat->j; 1350 for (j=0; j<amat->nz; j++) { 1351 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1352 } 1353 v = bmat->a; jj = bmat->j; 1354 for (j=0; j<bmat->nz; j++) { 1355 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1356 } 1357 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1358 for (j=0; j<mat->N; j++) { 1359 if (tmp2[j] > *norm) *norm = tmp2[j]; 1360 } 1361 ierr = PetscFree(tmp);CHKERRQ(ierr); 1362 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1363 } else if (type == NORM_INFINITY) { /* max row norm */ 1364 PetscReal ntemp = 0.0; 1365 for (j=0; j<aij->A->m; j++) { 1366 v = amat->a + amat->i[j]; 1367 sum = 0.0; 1368 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1369 sum += PetscAbsScalar(*v); v++; 1370 } 1371 v = bmat->a + bmat->i[j]; 1372 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1373 sum += PetscAbsScalar(*v); v++; 1374 } 1375 if (sum > ntemp) ntemp = sum; 1376 } 1377 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1378 } else { 1379 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1380 } 1381 } 1382 PetscFunctionReturn(0); 1383 } 1384 1385 #undef __FUNCT__ 1386 #define __FUNCT__ "MatTranspose_MPIAIJ" 1387 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1388 { 1389 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1390 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1391 PetscErrorCode ierr; 1392 PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1393 Mat B; 1394 PetscScalar *array; 1395 1396 PetscFunctionBegin; 1397 if (!matout && M != N) { 1398 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1399 } 1400 1401 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1402 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1403 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1404 1405 /* copy over the A part */ 1406 Aloc = (Mat_SeqAIJ*)a->A->data; 1407 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1408 row = a->rstart; 1409 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1410 for (i=0; i<m; i++) { 1411 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1412 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1413 } 1414 aj = Aloc->j; 1415 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1416 1417 /* copy over the B part */ 1418 Aloc = (Mat_SeqAIJ*)a->B->data; 1419 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1420 row = a->rstart; 1421 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1422 ct = cols; 1423 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1424 for (i=0; i<m; i++) { 1425 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1426 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1427 } 1428 ierr = PetscFree(ct);CHKERRQ(ierr); 1429 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1430 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1431 if (matout) { 1432 *matout = B; 1433 } else { 1434 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1435 } 1436 PetscFunctionReturn(0); 1437 } 1438 1439 #undef __FUNCT__ 1440 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1441 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1442 { 1443 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1444 Mat a = aij->A,b = aij->B; 1445 PetscErrorCode ierr; 1446 PetscInt s1,s2,s3; 1447 1448 PetscFunctionBegin; 1449 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1450 if (rr) { 1451 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1452 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1453 /* Overlap communication with computation. */ 1454 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1455 } 1456 if (ll) { 1457 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1458 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1459 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1460 } 1461 /* scale the diagonal block */ 1462 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1463 1464 if (rr) { 1465 /* Do a scatter end and then right scale the off-diagonal block */ 1466 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1467 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1468 } 1469 1470 PetscFunctionReturn(0); 1471 } 1472 1473 1474 #undef __FUNCT__ 1475 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1476 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1477 { 1478 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1479 PetscErrorCode ierr; 1480 1481 PetscFunctionBegin; 1482 if (!a->rank) { 1483 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1484 } 1485 PetscFunctionReturn(0); 1486 } 1487 1488 #undef __FUNCT__ 1489 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1490 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1491 { 1492 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1493 PetscErrorCode ierr; 1494 1495 PetscFunctionBegin; 1496 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1497 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1498 PetscFunctionReturn(0); 1499 } 1500 #undef __FUNCT__ 1501 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1502 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1503 { 1504 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1505 PetscErrorCode ierr; 1506 1507 PetscFunctionBegin; 1508 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1509 PetscFunctionReturn(0); 1510 } 1511 1512 #undef __FUNCT__ 1513 #define __FUNCT__ "MatEqual_MPIAIJ" 1514 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1515 { 1516 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1517 Mat a,b,c,d; 1518 PetscTruth flg; 1519 PetscErrorCode ierr; 1520 1521 PetscFunctionBegin; 1522 a = matA->A; b = matA->B; 1523 c = matB->A; d = matB->B; 1524 1525 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1526 if (flg == PETSC_TRUE) { 1527 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1528 } 1529 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1530 PetscFunctionReturn(0); 1531 } 1532 1533 #undef __FUNCT__ 1534 #define __FUNCT__ "MatCopy_MPIAIJ" 1535 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1536 { 1537 PetscErrorCode ierr; 1538 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1539 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1540 1541 PetscFunctionBegin; 1542 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1543 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1544 /* because of the column compression in the off-processor part of the matrix a->B, 1545 the number of columns in a->B and b->B may be different, hence we cannot call 1546 the MatCopy() directly on the two parts. If need be, we can provide a more 1547 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1548 then copying the submatrices */ 1549 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1550 } else { 1551 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1552 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1553 } 1554 PetscFunctionReturn(0); 1555 } 1556 1557 #undef __FUNCT__ 1558 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1559 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1560 { 1561 PetscErrorCode ierr; 1562 1563 PetscFunctionBegin; 1564 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1565 PetscFunctionReturn(0); 1566 } 1567 1568 #include "petscblaslapack.h" 1569 #undef __FUNCT__ 1570 #define __FUNCT__ "MatAXPY_MPIAIJ" 1571 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1572 { 1573 PetscErrorCode ierr; 1574 PetscInt i; 1575 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1576 PetscBLASInt bnz,one=1; 1577 Mat_SeqAIJ *x,*y; 1578 1579 PetscFunctionBegin; 1580 if (str == SAME_NONZERO_PATTERN) { 1581 x = (Mat_SeqAIJ *)xx->A->data; 1582 y = (Mat_SeqAIJ *)yy->A->data; 1583 bnz = (PetscBLASInt)x->nz; 1584 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1585 x = (Mat_SeqAIJ *)xx->B->data; 1586 y = (Mat_SeqAIJ *)yy->B->data; 1587 bnz = (PetscBLASInt)x->nz; 1588 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1589 } else if (str == SUBSET_NONZERO_PATTERN) { 1590 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1591 1592 x = (Mat_SeqAIJ *)xx->B->data; 1593 y = (Mat_SeqAIJ *)yy->B->data; 1594 if (y->xtoy && y->XtoY != xx->B) { 1595 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1596 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1597 } 1598 if (!y->xtoy) { /* get xtoy */ 1599 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1600 y->XtoY = xx->B; 1601 } 1602 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1603 } else { 1604 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1605 } 1606 PetscFunctionReturn(0); 1607 } 1608 1609 /* -------------------------------------------------------------------*/ 1610 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1611 MatGetRow_MPIAIJ, 1612 MatRestoreRow_MPIAIJ, 1613 MatMult_MPIAIJ, 1614 /* 4*/ MatMultAdd_MPIAIJ, 1615 MatMultTranspose_MPIAIJ, 1616 MatMultTransposeAdd_MPIAIJ, 1617 0, 1618 0, 1619 0, 1620 /*10*/ 0, 1621 0, 1622 0, 1623 MatRelax_MPIAIJ, 1624 MatTranspose_MPIAIJ, 1625 /*15*/ MatGetInfo_MPIAIJ, 1626 MatEqual_MPIAIJ, 1627 MatGetDiagonal_MPIAIJ, 1628 MatDiagonalScale_MPIAIJ, 1629 MatNorm_MPIAIJ, 1630 /*20*/ MatAssemblyBegin_MPIAIJ, 1631 MatAssemblyEnd_MPIAIJ, 1632 0, 1633 MatSetOption_MPIAIJ, 1634 MatZeroEntries_MPIAIJ, 1635 /*25*/ MatZeroRows_MPIAIJ, 1636 0, 1637 0, 1638 0, 1639 0, 1640 /*30*/ MatSetUpPreallocation_MPIAIJ, 1641 0, 1642 0, 1643 0, 1644 0, 1645 /*35*/ MatDuplicate_MPIAIJ, 1646 0, 1647 0, 1648 0, 1649 0, 1650 /*40*/ MatAXPY_MPIAIJ, 1651 MatGetSubMatrices_MPIAIJ, 1652 MatIncreaseOverlap_MPIAIJ, 1653 MatGetValues_MPIAIJ, 1654 MatCopy_MPIAIJ, 1655 /*45*/ MatPrintHelp_MPIAIJ, 1656 MatScale_MPIAIJ, 1657 0, 1658 0, 1659 0, 1660 /*50*/ MatSetBlockSize_MPIAIJ, 1661 0, 1662 0, 1663 0, 1664 0, 1665 /*55*/ MatFDColoringCreate_MPIAIJ, 1666 0, 1667 MatSetUnfactored_MPIAIJ, 1668 0, 1669 0, 1670 /*60*/ MatGetSubMatrix_MPIAIJ, 1671 MatDestroy_MPIAIJ, 1672 MatView_MPIAIJ, 1673 MatGetPetscMaps_Petsc, 1674 0, 1675 /*65*/ 0, 1676 0, 1677 0, 1678 0, 1679 0, 1680 /*70*/ 0, 1681 0, 1682 MatSetColoring_MPIAIJ, 1683 #if defined(PETSC_HAVE_ADIC) 1684 MatSetValuesAdic_MPIAIJ, 1685 #else 1686 0, 1687 #endif 1688 MatSetValuesAdifor_MPIAIJ, 1689 /*75*/ 0, 1690 0, 1691 0, 1692 0, 1693 0, 1694 /*80*/ 0, 1695 0, 1696 0, 1697 0, 1698 /*84*/ MatLoad_MPIAIJ, 1699 0, 1700 0, 1701 0, 1702 0, 1703 0, 1704 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1705 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1706 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1707 MatPtAP_MPIAIJ_MPIAIJ, 1708 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1709 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 1710 0, 1711 0, 1712 0}; 1713 1714 /* ----------------------------------------------------------------------------------------*/ 1715 1716 EXTERN_C_BEGIN 1717 #undef __FUNCT__ 1718 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1719 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 1720 { 1721 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1722 PetscErrorCode ierr; 1723 1724 PetscFunctionBegin; 1725 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1726 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1727 PetscFunctionReturn(0); 1728 } 1729 EXTERN_C_END 1730 1731 EXTERN_C_BEGIN 1732 #undef __FUNCT__ 1733 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1734 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 1735 { 1736 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1737 PetscErrorCode ierr; 1738 1739 PetscFunctionBegin; 1740 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1741 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1742 PetscFunctionReturn(0); 1743 } 1744 EXTERN_C_END 1745 1746 #include "petscpc.h" 1747 EXTERN_C_BEGIN 1748 #undef __FUNCT__ 1749 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1750 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1751 { 1752 Mat_MPIAIJ *b; 1753 PetscErrorCode ierr; 1754 PetscInt i; 1755 1756 PetscFunctionBegin; 1757 B->preallocated = PETSC_TRUE; 1758 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1759 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1760 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1761 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1762 if (d_nnz) { 1763 for (i=0; i<B->m; i++) { 1764 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1765 } 1766 } 1767 if (o_nnz) { 1768 for (i=0; i<B->m; i++) { 1769 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1770 } 1771 } 1772 b = (Mat_MPIAIJ*)B->data; 1773 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1774 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1775 1776 PetscFunctionReturn(0); 1777 } 1778 EXTERN_C_END 1779 1780 /*MC 1781 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1782 1783 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1784 and MATMPIAIJ otherwise. As a result, for single process communicators, 1785 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 1786 for communicators controlling multiple processes. It is recommended that you call both of 1787 the above preallocation routines for simplicity. 1788 1789 Options Database Keys: 1790 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 1791 1792 Level: beginner 1793 1794 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ 1795 M*/ 1796 1797 EXTERN_C_BEGIN 1798 #undef __FUNCT__ 1799 #define __FUNCT__ "MatCreate_AIJ" 1800 PetscErrorCode MatCreate_AIJ(Mat A) 1801 { 1802 PetscErrorCode ierr; 1803 PetscMPIInt size; 1804 1805 PetscFunctionBegin; 1806 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr); 1807 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1808 if (size == 1) { 1809 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1810 } else { 1811 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1812 } 1813 PetscFunctionReturn(0); 1814 } 1815 EXTERN_C_END 1816 1817 #undef __FUNCT__ 1818 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1819 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1820 { 1821 Mat mat; 1822 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1823 PetscErrorCode ierr; 1824 1825 PetscFunctionBegin; 1826 *newmat = 0; 1827 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1828 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1829 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1830 a = (Mat_MPIAIJ*)mat->data; 1831 1832 mat->factor = matin->factor; 1833 mat->bs = matin->bs; 1834 mat->assembled = PETSC_TRUE; 1835 mat->insertmode = NOT_SET_VALUES; 1836 mat->preallocated = PETSC_TRUE; 1837 1838 a->rstart = oldmat->rstart; 1839 a->rend = oldmat->rend; 1840 a->cstart = oldmat->cstart; 1841 a->cend = oldmat->cend; 1842 a->size = oldmat->size; 1843 a->rank = oldmat->rank; 1844 a->donotstash = oldmat->donotstash; 1845 a->roworiented = oldmat->roworiented; 1846 a->rowindices = 0; 1847 a->rowvalues = 0; 1848 a->getrowactive = PETSC_FALSE; 1849 1850 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1851 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1852 if (oldmat->colmap) { 1853 #if defined (PETSC_USE_CTABLE) 1854 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1855 #else 1856 ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1857 PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt)); 1858 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1859 #endif 1860 } else a->colmap = 0; 1861 if (oldmat->garray) { 1862 PetscInt len; 1863 len = oldmat->B->n; 1864 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1865 PetscLogObjectMemory(mat,len*sizeof(PetscInt)); 1866 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1867 } else a->garray = 0; 1868 1869 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1870 PetscLogObjectParent(mat,a->lvec); 1871 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1872 PetscLogObjectParent(mat,a->Mvctx); 1873 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1874 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1875 PetscLogObjectParent(mat,a->A); 1876 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1877 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1878 PetscLogObjectParent(mat,a->B); 1879 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1880 *newmat = mat; 1881 PetscFunctionReturn(0); 1882 } 1883 1884 #include "petscsys.h" 1885 1886 #undef __FUNCT__ 1887 #define __FUNCT__ "MatLoad_MPIAIJ" 1888 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1889 { 1890 Mat A; 1891 PetscScalar *vals,*svals; 1892 MPI_Comm comm = ((PetscObject)viewer)->comm; 1893 MPI_Status status; 1894 PetscErrorCode ierr; 1895 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1896 PetscInt i,nz,j,rstart,rend; 1897 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1898 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1899 PetscInt cend,cstart,n,*rowners; 1900 int fd; 1901 1902 PetscFunctionBegin; 1903 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1904 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1905 if (!rank) { 1906 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1907 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1908 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1909 } 1910 1911 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1912 M = header[1]; N = header[2]; 1913 /* determine ownership of all rows */ 1914 m = M/size + ((M % size) > rank); 1915 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1916 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1917 rowners[0] = 0; 1918 for (i=2; i<=size; i++) { 1919 rowners[i] += rowners[i-1]; 1920 } 1921 rstart = rowners[rank]; 1922 rend = rowners[rank+1]; 1923 1924 /* distribute row lengths to all processors */ 1925 ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr); 1926 if (!rank) { 1927 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 1928 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1929 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1930 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1931 for (j=0; j<m; j++) { 1932 procsnz[0] += ourlens[j]; 1933 } 1934 for (i=1; i<size; i++) { 1935 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 1936 /* calculate the number of nonzeros on each processor */ 1937 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 1938 procsnz[i] += rowlengths[j]; 1939 } 1940 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1941 } 1942 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1943 } else { 1944 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1945 } 1946 1947 if (!rank) { 1948 /* determine max buffer needed and allocate it */ 1949 maxnz = 0; 1950 for (i=0; i<size; i++) { 1951 maxnz = PetscMax(maxnz,procsnz[i]); 1952 } 1953 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1954 1955 /* read in my part of the matrix column indices */ 1956 nz = procsnz[0]; 1957 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1958 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1959 1960 /* read in every one elses and ship off */ 1961 for (i=1; i<size; i++) { 1962 nz = procsnz[i]; 1963 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1964 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1965 } 1966 ierr = PetscFree(cols);CHKERRQ(ierr); 1967 } else { 1968 /* determine buffer space needed for message */ 1969 nz = 0; 1970 for (i=0; i<m; i++) { 1971 nz += ourlens[i]; 1972 } 1973 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1974 1975 /* receive message of column indices*/ 1976 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1977 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1978 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1979 } 1980 1981 /* determine column ownership if matrix is not square */ 1982 if (N != M) { 1983 n = N/size + ((N % size) > rank); 1984 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1985 cstart = cend - n; 1986 } else { 1987 cstart = rstart; 1988 cend = rend; 1989 n = cend - cstart; 1990 } 1991 1992 /* loop over local rows, determining number of off diagonal entries */ 1993 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1994 jj = 0; 1995 for (i=0; i<m; i++) { 1996 for (j=0; j<ourlens[i]; j++) { 1997 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1998 jj++; 1999 } 2000 } 2001 2002 /* create our matrix */ 2003 for (i=0; i<m; i++) { 2004 ourlens[i] -= offlens[i]; 2005 } 2006 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2007 ierr = MatSetType(A,type);CHKERRQ(ierr); 2008 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2009 2010 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2011 for (i=0; i<m; i++) { 2012 ourlens[i] += offlens[i]; 2013 } 2014 2015 if (!rank) { 2016 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2017 2018 /* read in my part of the matrix numerical values */ 2019 nz = procsnz[0]; 2020 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2021 2022 /* insert into matrix */ 2023 jj = rstart; 2024 smycols = mycols; 2025 svals = vals; 2026 for (i=0; i<m; i++) { 2027 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2028 smycols += ourlens[i]; 2029 svals += ourlens[i]; 2030 jj++; 2031 } 2032 2033 /* read in other processors and ship out */ 2034 for (i=1; i<size; i++) { 2035 nz = procsnz[i]; 2036 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2037 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2038 } 2039 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2040 } else { 2041 /* receive numeric values */ 2042 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2043 2044 /* receive message of values*/ 2045 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2046 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2047 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2048 2049 /* insert into matrix */ 2050 jj = rstart; 2051 smycols = mycols; 2052 svals = vals; 2053 for (i=0; i<m; i++) { 2054 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2055 smycols += ourlens[i]; 2056 svals += ourlens[i]; 2057 jj++; 2058 } 2059 } 2060 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2061 ierr = PetscFree(vals);CHKERRQ(ierr); 2062 ierr = PetscFree(mycols);CHKERRQ(ierr); 2063 ierr = PetscFree(rowners);CHKERRQ(ierr); 2064 2065 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2066 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2067 *newmat = A; 2068 PetscFunctionReturn(0); 2069 } 2070 2071 #undef __FUNCT__ 2072 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2073 /* 2074 Not great since it makes two copies of the submatrix, first an SeqAIJ 2075 in local and then by concatenating the local matrices the end result. 2076 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2077 */ 2078 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2079 { 2080 PetscErrorCode ierr; 2081 PetscMPIInt rank,size; 2082 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2083 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2084 Mat *local,M,Mreuse; 2085 PetscScalar *vwork,*aa; 2086 MPI_Comm comm = mat->comm; 2087 Mat_SeqAIJ *aij; 2088 2089 2090 PetscFunctionBegin; 2091 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2092 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2093 2094 if (call == MAT_REUSE_MATRIX) { 2095 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2096 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2097 local = &Mreuse; 2098 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2099 } else { 2100 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2101 Mreuse = *local; 2102 ierr = PetscFree(local);CHKERRQ(ierr); 2103 } 2104 2105 /* 2106 m - number of local rows 2107 n - number of columns (same on all processors) 2108 rstart - first row in new global matrix generated 2109 */ 2110 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2111 if (call == MAT_INITIAL_MATRIX) { 2112 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2113 ii = aij->i; 2114 jj = aij->j; 2115 2116 /* 2117 Determine the number of non-zeros in the diagonal and off-diagonal 2118 portions of the matrix in order to do correct preallocation 2119 */ 2120 2121 /* first get start and end of "diagonal" columns */ 2122 if (csize == PETSC_DECIDE) { 2123 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2124 if (mglobal == n) { /* square matrix */ 2125 nlocal = m; 2126 } else { 2127 nlocal = n/size + ((n % size) > rank); 2128 } 2129 } else { 2130 nlocal = csize; 2131 } 2132 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2133 rstart = rend - nlocal; 2134 if (rank == size - 1 && rend != n) { 2135 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2136 } 2137 2138 /* next, compute all the lengths */ 2139 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2140 olens = dlens + m; 2141 for (i=0; i<m; i++) { 2142 jend = ii[i+1] - ii[i]; 2143 olen = 0; 2144 dlen = 0; 2145 for (j=0; j<jend; j++) { 2146 if (*jj < rstart || *jj >= rend) olen++; 2147 else dlen++; 2148 jj++; 2149 } 2150 olens[i] = olen; 2151 dlens[i] = dlen; 2152 } 2153 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2154 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2155 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2156 ierr = PetscFree(dlens);CHKERRQ(ierr); 2157 } else { 2158 PetscInt ml,nl; 2159 2160 M = *newmat; 2161 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2162 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2163 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2164 /* 2165 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2166 rather than the slower MatSetValues(). 2167 */ 2168 M->was_assembled = PETSC_TRUE; 2169 M->assembled = PETSC_FALSE; 2170 } 2171 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2172 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2173 ii = aij->i; 2174 jj = aij->j; 2175 aa = aij->a; 2176 for (i=0; i<m; i++) { 2177 row = rstart + i; 2178 nz = ii[i+1] - ii[i]; 2179 cwork = jj; jj += nz; 2180 vwork = aa; aa += nz; 2181 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2182 } 2183 2184 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2185 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2186 *newmat = M; 2187 2188 /* save submatrix used in processor for next request */ 2189 if (call == MAT_INITIAL_MATRIX) { 2190 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2191 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2192 } 2193 2194 PetscFunctionReturn(0); 2195 } 2196 2197 EXTERN_C_BEGIN 2198 #undef __FUNCT__ 2199 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2200 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2201 { 2202 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2203 PetscInt m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2204 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2205 const PetscInt *JJ; 2206 PetscScalar *values; 2207 PetscErrorCode ierr; 2208 2209 PetscFunctionBegin; 2210 #if defined(PETSC_OPT_g) 2211 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2212 #endif 2213 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2214 o_nnz = d_nnz + m; 2215 2216 for (i=0; i<m; i++) { 2217 nnz = I[i+1]- I[i]; 2218 JJ = J + I[i]; 2219 nnz_max = PetscMax(nnz_max,nnz); 2220 #if defined(PETSC_OPT_g) 2221 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2222 #endif 2223 for (j=0; j<nnz; j++) { 2224 if (*JJ >= cstart) break; 2225 JJ++; 2226 } 2227 d = 0; 2228 for (; j<nnz; j++) { 2229 if (*JJ++ >= cend) break; 2230 d++; 2231 } 2232 d_nnz[i] = d; 2233 o_nnz[i] = nnz - d; 2234 } 2235 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2236 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2237 2238 if (v) values = (PetscScalar*)v; 2239 else { 2240 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2241 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2242 } 2243 2244 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2245 for (i=0; i<m; i++) { 2246 ii = i + rstart; 2247 nnz = I[i+1]- I[i]; 2248 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr); 2249 } 2250 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2251 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2252 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2253 2254 if (!v) { 2255 ierr = PetscFree(values);CHKERRQ(ierr); 2256 } 2257 PetscFunctionReturn(0); 2258 } 2259 EXTERN_C_END 2260 2261 #undef __FUNCT__ 2262 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2263 /*@C 2264 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2265 (the default parallel PETSc format). 2266 2267 Collective on MPI_Comm 2268 2269 Input Parameters: 2270 + A - the matrix 2271 . i - the indices into j for the start of each local row (starts with zero) 2272 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2273 - v - optional values in the matrix 2274 2275 Level: developer 2276 2277 .keywords: matrix, aij, compressed row, sparse, parallel 2278 2279 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2280 @*/ 2281 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2282 { 2283 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2284 2285 PetscFunctionBegin; 2286 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2287 if (f) { 2288 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2289 } 2290 PetscFunctionReturn(0); 2291 } 2292 2293 #undef __FUNCT__ 2294 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2295 /*@C 2296 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2297 (the default parallel PETSc format). For good matrix assembly performance 2298 the user should preallocate the matrix storage by setting the parameters 2299 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2300 performance can be increased by more than a factor of 50. 2301 2302 Collective on MPI_Comm 2303 2304 Input Parameters: 2305 + A - the matrix 2306 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2307 (same value is used for all local rows) 2308 . d_nnz - array containing the number of nonzeros in the various rows of the 2309 DIAGONAL portion of the local submatrix (possibly different for each row) 2310 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2311 The size of this array is equal to the number of local rows, i.e 'm'. 2312 You must leave room for the diagonal entry even if it is zero. 2313 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2314 submatrix (same value is used for all local rows). 2315 - o_nnz - array containing the number of nonzeros in the various rows of the 2316 OFF-DIAGONAL portion of the local submatrix (possibly different for 2317 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2318 structure. The size of this array is equal to the number 2319 of local rows, i.e 'm'. 2320 2321 If the *_nnz parameter is given then the *_nz parameter is ignored 2322 2323 The AIJ format (also called the Yale sparse matrix format or 2324 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2325 storage. The stored row and column indices begin with zero. See the users manual for details. 2326 2327 The parallel matrix is partitioned such that the first m0 rows belong to 2328 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2329 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2330 2331 The DIAGONAL portion of the local submatrix of a processor can be defined 2332 as the submatrix which is obtained by extraction the part corresponding 2333 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2334 first row that belongs to the processor, and r2 is the last row belonging 2335 to the this processor. This is a square mxm matrix. The remaining portion 2336 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2337 2338 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2339 2340 Example usage: 2341 2342 Consider the following 8x8 matrix with 34 non-zero values, that is 2343 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2344 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2345 as follows: 2346 2347 .vb 2348 1 2 0 | 0 3 0 | 0 4 2349 Proc0 0 5 6 | 7 0 0 | 8 0 2350 9 0 10 | 11 0 0 | 12 0 2351 ------------------------------------- 2352 13 0 14 | 15 16 17 | 0 0 2353 Proc1 0 18 0 | 19 20 21 | 0 0 2354 0 0 0 | 22 23 0 | 24 0 2355 ------------------------------------- 2356 Proc2 25 26 27 | 0 0 28 | 29 0 2357 30 0 0 | 31 32 33 | 0 34 2358 .ve 2359 2360 This can be represented as a collection of submatrices as: 2361 2362 .vb 2363 A B C 2364 D E F 2365 G H I 2366 .ve 2367 2368 Where the submatrices A,B,C are owned by proc0, D,E,F are 2369 owned by proc1, G,H,I are owned by proc2. 2370 2371 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2372 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2373 The 'M','N' parameters are 8,8, and have the same values on all procs. 2374 2375 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2376 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2377 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2378 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2379 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2380 matrix, ans [DF] as another SeqAIJ matrix. 2381 2382 When d_nz, o_nz parameters are specified, d_nz storage elements are 2383 allocated for every row of the local diagonal submatrix, and o_nz 2384 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2385 One way to choose d_nz and o_nz is to use the max nonzerors per local 2386 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2387 In this case, the values of d_nz,o_nz are: 2388 .vb 2389 proc0 : dnz = 2, o_nz = 2 2390 proc1 : dnz = 3, o_nz = 2 2391 proc2 : dnz = 1, o_nz = 4 2392 .ve 2393 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2394 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2395 for proc3. i.e we are using 12+15+10=37 storage locations to store 2396 34 values. 2397 2398 When d_nnz, o_nnz parameters are specified, the storage is specified 2399 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2400 In the above case the values for d_nnz,o_nnz are: 2401 .vb 2402 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2403 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2404 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2405 .ve 2406 Here the space allocated is sum of all the above values i.e 34, and 2407 hence pre-allocation is perfect. 2408 2409 Level: intermediate 2410 2411 .keywords: matrix, aij, compressed row, sparse, parallel 2412 2413 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2414 MPIAIJ 2415 @*/ 2416 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2417 { 2418 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2419 2420 PetscFunctionBegin; 2421 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2422 if (f) { 2423 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2424 } 2425 PetscFunctionReturn(0); 2426 } 2427 2428 #undef __FUNCT__ 2429 #define __FUNCT__ "MatCreateMPIAIJ" 2430 /*@C 2431 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2432 (the default parallel PETSc format). For good matrix assembly performance 2433 the user should preallocate the matrix storage by setting the parameters 2434 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2435 performance can be increased by more than a factor of 50. 2436 2437 Collective on MPI_Comm 2438 2439 Input Parameters: 2440 + comm - MPI communicator 2441 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2442 This value should be the same as the local size used in creating the 2443 y vector for the matrix-vector product y = Ax. 2444 . n - This value should be the same as the local size used in creating the 2445 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2446 calculated if N is given) For square matrices n is almost always m. 2447 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2448 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2449 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2450 (same value is used for all local rows) 2451 . d_nnz - array containing the number of nonzeros in the various rows of the 2452 DIAGONAL portion of the local submatrix (possibly different for each row) 2453 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2454 The size of this array is equal to the number of local rows, i.e 'm'. 2455 You must leave room for the diagonal entry even if it is zero. 2456 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2457 submatrix (same value is used for all local rows). 2458 - o_nnz - array containing the number of nonzeros in the various rows of the 2459 OFF-DIAGONAL portion of the local submatrix (possibly different for 2460 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2461 structure. The size of this array is equal to the number 2462 of local rows, i.e 'm'. 2463 2464 Output Parameter: 2465 . A - the matrix 2466 2467 Notes: 2468 If the *_nnz parameter is given then the *_nz parameter is ignored 2469 2470 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2471 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2472 storage requirements for this matrix. 2473 2474 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2475 processor than it must be used on all processors that share the object for 2476 that argument. 2477 2478 The user MUST specify either the local or global matrix dimensions 2479 (possibly both). 2480 2481 The parallel matrix is partitioned such that the first m0 rows belong to 2482 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2483 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2484 2485 The DIAGONAL portion of the local submatrix of a processor can be defined 2486 as the submatrix which is obtained by extraction the part corresponding 2487 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2488 first row that belongs to the processor, and r2 is the last row belonging 2489 to the this processor. This is a square mxm matrix. The remaining portion 2490 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2491 2492 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2493 2494 When calling this routine with a single process communicator, a matrix of 2495 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2496 type of communicator, use the construction mechanism: 2497 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2498 2499 By default, this format uses inodes (identical nodes) when possible. 2500 We search for consecutive rows with the same nonzero structure, thereby 2501 reusing matrix information to achieve increased efficiency. 2502 2503 Options Database Keys: 2504 + -mat_aij_no_inode - Do not use inodes 2505 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2506 - -mat_aij_oneindex - Internally use indexing starting at 1 2507 rather than 0. Note that when calling MatSetValues(), 2508 the user still MUST index entries starting at 0! 2509 2510 2511 Example usage: 2512 2513 Consider the following 8x8 matrix with 34 non-zero values, that is 2514 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2515 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2516 as follows: 2517 2518 .vb 2519 1 2 0 | 0 3 0 | 0 4 2520 Proc0 0 5 6 | 7 0 0 | 8 0 2521 9 0 10 | 11 0 0 | 12 0 2522 ------------------------------------- 2523 13 0 14 | 15 16 17 | 0 0 2524 Proc1 0 18 0 | 19 20 21 | 0 0 2525 0 0 0 | 22 23 0 | 24 0 2526 ------------------------------------- 2527 Proc2 25 26 27 | 0 0 28 | 29 0 2528 30 0 0 | 31 32 33 | 0 34 2529 .ve 2530 2531 This can be represented as a collection of submatrices as: 2532 2533 .vb 2534 A B C 2535 D E F 2536 G H I 2537 .ve 2538 2539 Where the submatrices A,B,C are owned by proc0, D,E,F are 2540 owned by proc1, G,H,I are owned by proc2. 2541 2542 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2543 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2544 The 'M','N' parameters are 8,8, and have the same values on all procs. 2545 2546 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2547 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2548 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2549 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2550 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2551 matrix, ans [DF] as another SeqAIJ matrix. 2552 2553 When d_nz, o_nz parameters are specified, d_nz storage elements are 2554 allocated for every row of the local diagonal submatrix, and o_nz 2555 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2556 One way to choose d_nz and o_nz is to use the max nonzerors per local 2557 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2558 In this case, the values of d_nz,o_nz are: 2559 .vb 2560 proc0 : dnz = 2, o_nz = 2 2561 proc1 : dnz = 3, o_nz = 2 2562 proc2 : dnz = 1, o_nz = 4 2563 .ve 2564 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2565 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2566 for proc3. i.e we are using 12+15+10=37 storage locations to store 2567 34 values. 2568 2569 When d_nnz, o_nnz parameters are specified, the storage is specified 2570 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2571 In the above case the values for d_nnz,o_nnz are: 2572 .vb 2573 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2574 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2575 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2576 .ve 2577 Here the space allocated is sum of all the above values i.e 34, and 2578 hence pre-allocation is perfect. 2579 2580 Level: intermediate 2581 2582 .keywords: matrix, aij, compressed row, sparse, parallel 2583 2584 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2585 MPIAIJ 2586 @*/ 2587 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2588 { 2589 PetscErrorCode ierr; 2590 PetscMPIInt size; 2591 2592 PetscFunctionBegin; 2593 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2594 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2595 if (size > 1) { 2596 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2597 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2598 } else { 2599 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2600 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2601 } 2602 PetscFunctionReturn(0); 2603 } 2604 2605 #undef __FUNCT__ 2606 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2607 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2608 { 2609 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2610 2611 PetscFunctionBegin; 2612 *Ad = a->A; 2613 *Ao = a->B; 2614 *colmap = a->garray; 2615 PetscFunctionReturn(0); 2616 } 2617 2618 #undef __FUNCT__ 2619 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2620 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2621 { 2622 PetscErrorCode ierr; 2623 PetscInt i; 2624 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2625 2626 PetscFunctionBegin; 2627 if (coloring->ctype == IS_COLORING_LOCAL) { 2628 ISColoringValue *allcolors,*colors; 2629 ISColoring ocoloring; 2630 2631 /* set coloring for diagonal portion */ 2632 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2633 2634 /* set coloring for off-diagonal portion */ 2635 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2636 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2637 for (i=0; i<a->B->n; i++) { 2638 colors[i] = allcolors[a->garray[i]]; 2639 } 2640 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2641 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2642 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2643 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2644 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2645 ISColoringValue *colors; 2646 PetscInt *larray; 2647 ISColoring ocoloring; 2648 2649 /* set coloring for diagonal portion */ 2650 ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2651 for (i=0; i<a->A->n; i++) { 2652 larray[i] = i + a->cstart; 2653 } 2654 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2655 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2656 for (i=0; i<a->A->n; i++) { 2657 colors[i] = coloring->colors[larray[i]]; 2658 } 2659 ierr = PetscFree(larray);CHKERRQ(ierr); 2660 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2661 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2662 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2663 2664 /* set coloring for off-diagonal portion */ 2665 ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2666 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2667 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2668 for (i=0; i<a->B->n; i++) { 2669 colors[i] = coloring->colors[larray[i]]; 2670 } 2671 ierr = PetscFree(larray);CHKERRQ(ierr); 2672 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2673 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2674 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2675 } else { 2676 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2677 } 2678 2679 PetscFunctionReturn(0); 2680 } 2681 2682 #if defined(PETSC_HAVE_ADIC) 2683 #undef __FUNCT__ 2684 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2685 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2686 { 2687 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2688 PetscErrorCode ierr; 2689 2690 PetscFunctionBegin; 2691 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2692 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2693 PetscFunctionReturn(0); 2694 } 2695 #endif 2696 2697 #undef __FUNCT__ 2698 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2699 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2700 { 2701 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2702 PetscErrorCode ierr; 2703 2704 PetscFunctionBegin; 2705 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2706 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2707 PetscFunctionReturn(0); 2708 } 2709 2710 #undef __FUNCT__ 2711 #define __FUNCT__ "MatMerge" 2712 /*@C 2713 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2714 matrices from each processor 2715 2716 Collective on MPI_Comm 2717 2718 Input Parameters: 2719 + comm - the communicators the parallel matrix will live on 2720 . inmat - the input sequential matrices 2721 . n - number of local columns (or PETSC_DECIDE) 2722 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2723 2724 Output Parameter: 2725 . outmat - the parallel matrix generated 2726 2727 Level: advanced 2728 2729 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2730 2731 @*/ 2732 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2733 { 2734 PetscErrorCode ierr; 2735 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2736 PetscInt *indx; 2737 PetscScalar *values; 2738 PetscMap columnmap,rowmap; 2739 2740 PetscFunctionBegin; 2741 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2742 /* 2743 PetscMPIInt rank; 2744 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2745 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N); 2746 */ 2747 if (scall == MAT_INITIAL_MATRIX){ 2748 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2749 if (n == PETSC_DECIDE){ 2750 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2751 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2752 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2753 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2754 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2755 } 2756 2757 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2758 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2759 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2760 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2761 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2762 2763 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2764 for (i=0;i<m;i++) { 2765 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2766 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2767 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2768 } 2769 /* This routine will ONLY return MPIAIJ type matrix */ 2770 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2771 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2772 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2773 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2774 2775 } else if (scall == MAT_REUSE_MATRIX){ 2776 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2777 } else { 2778 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2779 } 2780 2781 for (i=0;i<m;i++) { 2782 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2783 I = i + rstart; 2784 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2785 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2786 } 2787 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2788 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2789 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2790 2791 PetscFunctionReturn(0); 2792 } 2793 2794 #undef __FUNCT__ 2795 #define __FUNCT__ "MatFileSplit" 2796 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2797 { 2798 PetscErrorCode ierr; 2799 PetscMPIInt rank; 2800 PetscInt m,N,i,rstart,nnz; 2801 size_t len; 2802 const PetscInt *indx; 2803 PetscViewer out; 2804 char *name; 2805 Mat B; 2806 const PetscScalar *values; 2807 2808 PetscFunctionBegin; 2809 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2810 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2811 /* Should this be the type of the diagonal block of A? */ 2812 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2813 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2814 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2815 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2816 for (i=0;i<m;i++) { 2817 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2818 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2819 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2820 } 2821 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2822 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2823 2824 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2825 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2826 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2827 sprintf(name,"%s.%d",outfile,rank); 2828 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2829 ierr = PetscFree(name); 2830 ierr = MatView(B,out);CHKERRQ(ierr); 2831 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2832 ierr = MatDestroy(B);CHKERRQ(ierr); 2833 PetscFunctionReturn(0); 2834 } 2835 2836 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2837 #undef __FUNCT__ 2838 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2839 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2840 { 2841 PetscErrorCode ierr; 2842 Mat_Merge_SeqsToMPI *merge; 2843 PetscObjectContainer container; 2844 2845 PetscFunctionBegin; 2846 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2847 if (container) { 2848 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2849 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2850 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2851 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2852 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2853 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2854 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2855 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2856 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2857 if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);} 2858 if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);} 2859 if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);} 2860 2861 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2862 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2863 } 2864 ierr = PetscFree(merge);CHKERRQ(ierr); 2865 2866 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2867 PetscFunctionReturn(0); 2868 } 2869 2870 #include "src/mat/utils/freespace.h" 2871 #include "petscbt.h" 2872 #undef __FUNCT__ 2873 #define __FUNCT__ "MatMerge_SeqsToMPI" 2874 /*@C 2875 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2876 matrices from each processor 2877 2878 Collective on MPI_Comm 2879 2880 Input Parameters: 2881 + comm - the communicators the parallel matrix will live on 2882 . seqmat - the input sequential matrices 2883 . m - number of local rows (or PETSC_DECIDE) 2884 . n - number of local columns (or PETSC_DECIDE) 2885 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2886 2887 Output Parameter: 2888 . mpimat - the parallel matrix generated 2889 2890 Level: advanced 2891 2892 Notes: 2893 The dimensions of the sequential matrix in each processor MUST be the same. 2894 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2895 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2896 @*/ 2897 static PetscEvent logkey_seqstompinum = 0; 2898 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2899 { 2900 PetscErrorCode ierr; 2901 MPI_Comm comm=mpimat->comm; 2902 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2903 PetscMPIInt size,rank,taga,*len_s; 2904 PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2905 PetscInt proc,m; 2906 PetscInt **buf_ri,**buf_rj; 2907 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2908 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2909 MPI_Request *s_waits,*r_waits; 2910 MPI_Status *status; 2911 MatScalar *aa=a->a,**abuf_r,*ba_i; 2912 Mat_Merge_SeqsToMPI *merge; 2913 PetscObjectContainer container; 2914 2915 PetscFunctionBegin; 2916 if (!logkey_seqstompinum) { 2917 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2918 } 2919 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2920 2921 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2922 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2923 2924 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2925 if (container) { 2926 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2927 } 2928 bi = merge->bi; 2929 bj = merge->bj; 2930 buf_ri = merge->buf_ri; 2931 buf_rj = merge->buf_rj; 2932 2933 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2934 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2935 len_s = merge->len_s; 2936 2937 /* send and recv matrix values */ 2938 /*-----------------------------*/ 2939 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2940 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 2941 2942 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2943 for (proc=0,k=0; proc<size; proc++){ 2944 if (!len_s[proc]) continue; 2945 i = owners[proc]; 2946 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 2947 k++; 2948 } 2949 2950 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 2951 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 2952 ierr = PetscFree(status);CHKERRQ(ierr); 2953 2954 ierr = PetscFree(s_waits);CHKERRQ(ierr); 2955 ierr = PetscFree(r_waits);CHKERRQ(ierr); 2956 2957 /* insert mat values of mpimat */ 2958 /*----------------------------*/ 2959 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 2960 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 2961 nextrow = buf_ri_k + merge->nrecv; 2962 nextai = nextrow + merge->nrecv; 2963 2964 for (k=0; k<merge->nrecv; k++){ 2965 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 2966 nrows = *(buf_ri_k[k]); 2967 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 2968 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 2969 } 2970 2971 /* set values of ba */ 2972 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2973 for (i=0; i<m; i++) { 2974 arow = owners[rank] + i; 2975 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 2976 bnzi = bi[i+1] - bi[i]; 2977 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 2978 2979 /* add local non-zero vals of this proc's seqmat into ba */ 2980 anzi = ai[arow+1] - ai[arow]; 2981 aj = a->j + ai[arow]; 2982 aa = a->a + ai[arow]; 2983 nextaj = 0; 2984 for (j=0; nextaj<anzi; j++){ 2985 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2986 ba_i[j] += aa[nextaj++]; 2987 } 2988 } 2989 2990 /* add received vals into ba */ 2991 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 2992 /* i-th row */ 2993 if (i == *nextrow[k]) { 2994 anzi = *(nextai[k]+1) - *nextai[k]; 2995 aj = buf_rj[k] + *(nextai[k]); 2996 aa = abuf_r[k] + *(nextai[k]); 2997 nextaj = 0; 2998 for (j=0; nextaj<anzi; j++){ 2999 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3000 ba_i[j] += aa[nextaj++]; 3001 } 3002 } 3003 nextrow[k]++; nextai[k]++; 3004 } 3005 } 3006 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3007 } 3008 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3009 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3010 3011 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3012 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3013 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3014 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3015 PetscFunctionReturn(0); 3016 } 3017 static PetscEvent logkey_seqstompisym = 0; 3018 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3019 { 3020 PetscErrorCode ierr; 3021 Mat B_mpi; 3022 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3023 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3024 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3025 PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 3026 PetscInt len,proc,*dnz,*onz; 3027 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3028 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3029 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3030 MPI_Status *status; 3031 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3032 PetscBT lnkbt; 3033 Mat_Merge_SeqsToMPI *merge; 3034 PetscObjectContainer container; 3035 3036 PetscFunctionBegin; 3037 if (!logkey_seqstompisym) { 3038 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3039 } 3040 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3041 3042 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3043 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3044 3045 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3046 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3047 3048 /* determine row ownership */ 3049 /*---------------------------------------------------------*/ 3050 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3051 if (m == PETSC_DECIDE) { 3052 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 3053 } else { 3054 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3055 } 3056 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 3057 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3058 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3059 3060 if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); } 3061 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3062 3063 /* determine the number of messages to send, their lengths */ 3064 /*---------------------------------------------------------*/ 3065 len_s = merge->len_s; 3066 3067 len = 0; /* length of buf_si[] */ 3068 merge->nsend = 0; 3069 for (proc=0; proc<size; proc++){ 3070 len_si[proc] = 0; 3071 if (proc == rank){ 3072 len_s[proc] = 0; 3073 } else { 3074 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3075 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3076 } 3077 if (len_s[proc]) { 3078 merge->nsend++; 3079 nrows = 0; 3080 for (i=owners[proc]; i<owners[proc+1]; i++){ 3081 if (ai[i+1] > ai[i]) nrows++; 3082 } 3083 len_si[proc] = 2*(nrows+1); 3084 len += len_si[proc]; 3085 } 3086 } 3087 3088 /* determine the number and length of messages to receive for ij-structure */ 3089 /*-------------------------------------------------------------------------*/ 3090 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3091 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3092 3093 /* post the Irecv of j-structure */ 3094 /*-------------------------------*/ 3095 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3096 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3097 3098 /* post the Isend of j-structure */ 3099 /*--------------------------------*/ 3100 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3101 sj_waits = si_waits + merge->nsend; 3102 3103 for (proc=0, k=0; proc<size; proc++){ 3104 if (!len_s[proc]) continue; 3105 i = owners[proc]; 3106 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3107 k++; 3108 } 3109 3110 /* receives and sends of j-structure are complete */ 3111 /*------------------------------------------------*/ 3112 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 3113 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 3114 3115 /* send and recv i-structure */ 3116 /*---------------------------*/ 3117 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3118 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3119 3120 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3121 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3122 for (proc=0,k=0; proc<size; proc++){ 3123 if (!len_s[proc]) continue; 3124 /* form outgoing message for i-structure: 3125 buf_si[0]: nrows to be sent 3126 [1:nrows]: row index (global) 3127 [nrows+1:2*nrows+1]: i-structure index 3128 */ 3129 /*-------------------------------------------*/ 3130 nrows = len_si[proc]/2 - 1; 3131 buf_si_i = buf_si + nrows+1; 3132 buf_si[0] = nrows; 3133 buf_si_i[0] = 0; 3134 nrows = 0; 3135 for (i=owners[proc]; i<owners[proc+1]; i++){ 3136 anzi = ai[i+1] - ai[i]; 3137 if (anzi) { 3138 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3139 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3140 nrows++; 3141 } 3142 } 3143 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3144 k++; 3145 buf_si += len_si[proc]; 3146 } 3147 3148 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 3149 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 3150 3151 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3152 for (i=0; i<merge->nrecv; i++){ 3153 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3154 } 3155 3156 ierr = PetscFree(len_si);CHKERRQ(ierr); 3157 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3158 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3159 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3160 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3161 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3162 ierr = PetscFree(status);CHKERRQ(ierr); 3163 3164 /* compute a local seq matrix in each processor */ 3165 /*----------------------------------------------*/ 3166 /* allocate bi array and free space for accumulating nonzero column info */ 3167 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3168 bi[0] = 0; 3169 3170 /* create and initialize a linked list */ 3171 nlnk = N+1; 3172 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3173 3174 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3175 len = 0; 3176 len = ai[owners[rank+1]] - ai[owners[rank]]; 3177 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3178 current_space = free_space; 3179 3180 /* determine symbolic info for each local row */ 3181 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3182 nextrow = buf_ri_k + merge->nrecv; 3183 nextai = nextrow + merge->nrecv; 3184 for (k=0; k<merge->nrecv; k++){ 3185 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3186 nrows = *buf_ri_k[k]; 3187 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3188 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3189 } 3190 3191 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3192 len = 0; 3193 for (i=0;i<m;i++) { 3194 bnzi = 0; 3195 /* add local non-zero cols of this proc's seqmat into lnk */ 3196 arow = owners[rank] + i; 3197 anzi = ai[arow+1] - ai[arow]; 3198 aj = a->j + ai[arow]; 3199 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3200 bnzi += nlnk; 3201 /* add received col data into lnk */ 3202 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3203 if (i == *nextrow[k]) { /* i-th row */ 3204 anzi = *(nextai[k]+1) - *nextai[k]; 3205 aj = buf_rj[k] + *nextai[k]; 3206 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3207 bnzi += nlnk; 3208 nextrow[k]++; nextai[k]++; 3209 } 3210 } 3211 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3212 3213 /* if free space is not available, make more free space */ 3214 if (current_space->local_remaining<bnzi) { 3215 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3216 nspacedouble++; 3217 } 3218 /* copy data into free space, then initialize lnk */ 3219 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3220 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3221 3222 current_space->array += bnzi; 3223 current_space->local_used += bnzi; 3224 current_space->local_remaining -= bnzi; 3225 3226 bi[i+1] = bi[i] + bnzi; 3227 } 3228 3229 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3230 3231 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3232 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3233 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3234 3235 /* create symbolic parallel matrix B_mpi */ 3236 /*---------------------------------------*/ 3237 if (n==PETSC_DECIDE) { 3238 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr); 3239 } else { 3240 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 3241 } 3242 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3243 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3244 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3245 3246 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3247 B_mpi->assembled = PETSC_FALSE; 3248 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3249 merge->bi = bi; 3250 merge->bj = bj; 3251 merge->buf_ri = buf_ri; 3252 merge->buf_rj = buf_rj; 3253 merge->coi = PETSC_NULL; 3254 merge->coj = PETSC_NULL; 3255 merge->owners_co = PETSC_NULL; 3256 3257 /* attach the supporting struct to B_mpi for reuse */ 3258 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3259 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3260 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3261 *mpimat = B_mpi; 3262 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3263 PetscFunctionReturn(0); 3264 } 3265 3266 static PetscEvent logkey_seqstompi = 0; 3267 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3268 { 3269 PetscErrorCode ierr; 3270 3271 PetscFunctionBegin; 3272 if (!logkey_seqstompi) { 3273 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3274 } 3275 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3276 if (scall == MAT_INITIAL_MATRIX){ 3277 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3278 } 3279 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3280 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3281 PetscFunctionReturn(0); 3282 } 3283 static PetscEvent logkey_getlocalmat = 0; 3284 #undef __FUNCT__ 3285 #define __FUNCT__ "MatGetLocalMat" 3286 /*@C 3287 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3288 3289 Not Collective 3290 3291 Input Parameters: 3292 + A - the matrix 3293 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3294 3295 Output Parameter: 3296 . A_loc - the local sequential matrix generated 3297 3298 Level: developer 3299 3300 @*/ 3301 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3302 { 3303 PetscErrorCode ierr; 3304 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3305 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3306 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3307 PetscScalar *aa=a->a,*ba=b->a,*ca; 3308 PetscInt am=A->m,i,j,k,cstart=mpimat->cstart; 3309 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3310 3311 PetscFunctionBegin; 3312 if (!logkey_getlocalmat) { 3313 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3314 } 3315 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3316 if (scall == MAT_INITIAL_MATRIX){ 3317 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3318 ci[0] = 0; 3319 for (i=0; i<am; i++){ 3320 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3321 } 3322 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3323 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3324 k = 0; 3325 for (i=0; i<am; i++) { 3326 ncols_o = bi[i+1] - bi[i]; 3327 ncols_d = ai[i+1] - ai[i]; 3328 /* off-diagonal portion of A */ 3329 for (jo=0; jo<ncols_o; jo++) { 3330 col = cmap[*bj]; 3331 if (col >= cstart) break; 3332 cj[k] = col; bj++; 3333 ca[k++] = *ba++; 3334 } 3335 /* diagonal portion of A */ 3336 for (j=0; j<ncols_d; j++) { 3337 cj[k] = cstart + *aj++; 3338 ca[k++] = *aa++; 3339 } 3340 /* off-diagonal portion of A */ 3341 for (j=jo; j<ncols_o; j++) { 3342 cj[k] = cmap[*bj++]; 3343 ca[k++] = *ba++; 3344 } 3345 } 3346 /* put together the new matrix */ 3347 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3348 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3349 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3350 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3351 mat->freedata = PETSC_TRUE; 3352 mat->nonew = 0; 3353 } else if (scall == MAT_REUSE_MATRIX){ 3354 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3355 ci = mat->i; cj = mat->j; ca = mat->a; 3356 for (i=0; i<am; i++) { 3357 /* off-diagonal portion of A */ 3358 ncols_o = bi[i+1] - bi[i]; 3359 for (jo=0; jo<ncols_o; jo++) { 3360 col = cmap[*bj]; 3361 if (col >= cstart) break; 3362 *ca++ = *ba++; bj++; 3363 } 3364 /* diagonal portion of A */ 3365 ncols_d = ai[i+1] - ai[i]; 3366 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3367 /* off-diagonal portion of A */ 3368 for (j=jo; j<ncols_o; j++) { 3369 *ca++ = *ba++; bj++; 3370 } 3371 } 3372 } else { 3373 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3374 } 3375 3376 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3377 PetscFunctionReturn(0); 3378 } 3379 3380 static PetscEvent logkey_getlocalmatcondensed = 0; 3381 #undef __FUNCT__ 3382 #define __FUNCT__ "MatGetLocalMatCondensed" 3383 /*@C 3384 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3385 3386 Not Collective 3387 3388 Input Parameters: 3389 + A - the matrix 3390 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3391 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3392 3393 Output Parameter: 3394 . A_loc - the local sequential matrix generated 3395 3396 Level: developer 3397 3398 @*/ 3399 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3400 { 3401 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3402 PetscErrorCode ierr; 3403 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3404 IS isrowa,iscola; 3405 Mat *aloc; 3406 3407 PetscFunctionBegin; 3408 if (!logkey_getlocalmatcondensed) { 3409 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3410 } 3411 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3412 if (!row){ 3413 start = a->rstart; end = a->rend; 3414 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3415 } else { 3416 isrowa = *row; 3417 } 3418 if (!col){ 3419 start = a->cstart; 3420 cmap = a->garray; 3421 nzA = a->A->n; 3422 nzB = a->B->n; 3423 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3424 ncols = 0; 3425 for (i=0; i<nzB; i++) { 3426 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3427 else break; 3428 } 3429 imark = i; 3430 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3431 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3432 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3433 ierr = PetscFree(idx);CHKERRQ(ierr); 3434 } else { 3435 iscola = *col; 3436 } 3437 if (scall != MAT_INITIAL_MATRIX){ 3438 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3439 aloc[0] = *A_loc; 3440 } 3441 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3442 *A_loc = aloc[0]; 3443 ierr = PetscFree(aloc);CHKERRQ(ierr); 3444 if (!row){ 3445 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3446 } 3447 if (!col){ 3448 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3449 } 3450 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3451 PetscFunctionReturn(0); 3452 } 3453 3454 static PetscEvent logkey_GetBrowsOfAcols = 0; 3455 #undef __FUNCT__ 3456 #define __FUNCT__ "MatGetBrowsOfAcols" 3457 /*@C 3458 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3459 3460 Collective on Mat 3461 3462 Input Parameters: 3463 + A,B - the matrices in mpiaij format 3464 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3465 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3466 3467 Output Parameter: 3468 + rowb, colb - index sets of rows and columns of B to extract 3469 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3470 - B_seq - the sequential matrix generated 3471 3472 Level: developer 3473 3474 @*/ 3475 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3476 { 3477 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3478 PetscErrorCode ierr; 3479 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3480 IS isrowb,iscolb; 3481 Mat *bseq; 3482 3483 PetscFunctionBegin; 3484 if (a->cstart != b->rstart || a->cend != b->rend){ 3485 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3486 } 3487 if (!logkey_GetBrowsOfAcols) { 3488 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3489 } 3490 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3491 3492 if (scall == MAT_INITIAL_MATRIX){ 3493 start = a->cstart; 3494 cmap = a->garray; 3495 nzA = a->A->n; 3496 nzB = a->B->n; 3497 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3498 ncols = 0; 3499 for (i=0; i<nzB; i++) { /* row < local row index */ 3500 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3501 else break; 3502 } 3503 imark = i; 3504 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3505 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3506 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3507 ierr = PetscFree(idx);CHKERRQ(ierr); 3508 *brstart = imark; 3509 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3510 } else { 3511 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3512 isrowb = *rowb; iscolb = *colb; 3513 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3514 bseq[0] = *B_seq; 3515 } 3516 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3517 *B_seq = bseq[0]; 3518 ierr = PetscFree(bseq);CHKERRQ(ierr); 3519 if (!rowb){ 3520 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3521 } else { 3522 *rowb = isrowb; 3523 } 3524 if (!colb){ 3525 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3526 } else { 3527 *colb = iscolb; 3528 } 3529 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3530 PetscFunctionReturn(0); 3531 } 3532 3533 static PetscEvent logkey_GetBrowsOfAocols = 0; 3534 #undef __FUNCT__ 3535 #define __FUNCT__ "MatGetBrowsOfAoCols" 3536 /*@C 3537 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3538 of the OFF-DIAGONAL portion of local A 3539 3540 Collective on Mat 3541 3542 Input Parameters: 3543 + A,B - the matrices in mpiaij format 3544 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3545 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3546 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3547 3548 Output Parameter: 3549 + B_oth - the sequential matrix generated 3550 3551 Level: developer 3552 3553 @*/ 3554 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3555 { 3556 VecScatter_MPI_General *gen_to,*gen_from; 3557 PetscErrorCode ierr; 3558 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3559 Mat_SeqAIJ *b_oth; 3560 VecScatter ctx=a->Mvctx; 3561 MPI_Comm comm=ctx->comm; 3562 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3563 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj; 3564 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3565 PetscInt i,k,l,nrecvs,nsends,nrows,*rrow,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3566 MPI_Request *rwaits,*swaits; 3567 MPI_Status *sstatus,rstatus; 3568 PetscInt *cols; 3569 PetscScalar *vals; 3570 PetscMPIInt j; 3571 3572 PetscFunctionBegin; 3573 if (a->cstart != b->rstart || a->cend != b->rend){ 3574 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3575 } 3576 if (!logkey_GetBrowsOfAocols) { 3577 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3578 } 3579 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3580 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3581 3582 gen_to = (VecScatter_MPI_General*)ctx->todata; 3583 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3584 rvalues = gen_from->values; /* holds the length of sending row */ 3585 svalues = gen_to->values; /* holds the length of receiving row */ 3586 nrecvs = gen_from->n; 3587 nsends = gen_to->n; 3588 rwaits = gen_from->requests; 3589 swaits = gen_to->requests; 3590 rrow = gen_from->indices; /* local row index to be received */ 3591 srow = gen_to->indices; /* local row index to be sent */ 3592 rstarts = gen_from->starts; 3593 sstarts = gen_to->starts; 3594 rprocs = gen_from->procs; 3595 sprocs = gen_to->procs; 3596 sstatus = gen_to->sstatus; 3597 3598 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3599 if (scall == MAT_INITIAL_MATRIX){ 3600 /* i-array */ 3601 /*---------*/ 3602 /* post receives */ 3603 for (i=0; i<nrecvs; i++){ 3604 rowlen = (PetscInt*)rvalues + rstarts[i]; 3605 nrows = rstarts[i+1]-rstarts[i]; 3606 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3607 } 3608 3609 /* pack the outgoing message */ 3610 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3611 rstartsj = sstartsj + nsends +1; 3612 sstartsj[0] = 0; rstartsj[0] = 0; 3613 len = 0; /* total length of j or a array to be sent */ 3614 k = 0; 3615 for (i=0; i<nsends; i++){ 3616 rowlen = (PetscInt*)svalues + sstarts[i]; 3617 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3618 for (j=0; j<nrows; j++) { 3619 row = srow[k] + b->rowners[rank]; /* global row idx */ 3620 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3621 len += rowlen[j]; 3622 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3623 k++; 3624 } 3625 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3626 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3627 } 3628 /* recvs and sends of i-array are completed */ 3629 i = nrecvs; 3630 while (i--) { 3631 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3632 } 3633 if (nsends) { 3634 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3635 } 3636 /* allocate buffers for sending j and a arrays */ 3637 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3638 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3639 3640 /* create i-array of B_oth */ 3641 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3642 b_othi[0] = 0; 3643 len = 0; /* total length of j or a array to be received */ 3644 k = 0; 3645 for (i=0; i<nrecvs; i++){ 3646 rowlen = (PetscInt*)rvalues + rstarts[i]; 3647 nrows = rstarts[i+1]-rstarts[i]; 3648 for (j=0; j<nrows; j++) { 3649 b_othi[k+1] = b_othi[k] + rowlen[j]; 3650 len += rowlen[j]; k++; 3651 } 3652 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3653 } 3654 3655 /* allocate space for j and a arrrays of B_oth */ 3656 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3657 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3658 3659 /* j-array */ 3660 /*---------*/ 3661 /* post receives of j-array */ 3662 for (i=0; i<nrecvs; i++){ 3663 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3664 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3665 } 3666 k = 0; 3667 for (i=0; i<nsends; i++){ 3668 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3669 bufJ = bufj+sstartsj[i]; 3670 for (j=0; j<nrows; j++) { 3671 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3672 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3673 for (l=0; l<ncols; l++){ 3674 *bufJ++ = cols[l]; 3675 } 3676 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3677 } 3678 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3679 } 3680 3681 /* recvs and sends of j-array are completed */ 3682 i = nrecvs; 3683 while (i--) { 3684 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3685 } 3686 if (nsends) { 3687 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3688 } 3689 } else if (scall == MAT_REUSE_MATRIX){ 3690 sstartsj = *startsj; 3691 rstartsj = sstartsj + nsends +1; 3692 bufa = *bufa_ptr; 3693 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3694 b_otha = b_oth->a; 3695 } else { 3696 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3697 } 3698 3699 /* a-array */ 3700 /*---------*/ 3701 /* post receives of a-array */ 3702 for (i=0; i<nrecvs; i++){ 3703 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3704 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3705 } 3706 k = 0; 3707 for (i=0; i<nsends; i++){ 3708 nrows = sstarts[i+1]-sstarts[i]; 3709 bufA = bufa+sstartsj[i]; 3710 for (j=0; j<nrows; j++) { 3711 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3712 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3713 for (l=0; l<ncols; l++){ 3714 *bufA++ = vals[l]; 3715 } 3716 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3717 3718 } 3719 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3720 } 3721 /* recvs and sends of a-array are completed */ 3722 i = nrecvs; 3723 while (i--) { 3724 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3725 } 3726 if (nsends) { 3727 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3728 } 3729 3730 if (scall == MAT_INITIAL_MATRIX){ 3731 /* put together the new matrix */ 3732 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3733 3734 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3735 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3736 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3737 b_oth->freedata = PETSC_TRUE; 3738 b_oth->nonew = 0; 3739 3740 ierr = PetscFree(bufj);CHKERRQ(ierr); 3741 if (!startsj || !bufa_ptr){ 3742 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3743 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3744 } else { 3745 *startsj = sstartsj; 3746 *bufa_ptr = bufa; 3747 } 3748 } 3749 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3750 3751 PetscFunctionReturn(0); 3752 } 3753 3754 /*MC 3755 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3756 3757 Options Database Keys: 3758 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3759 3760 Level: beginner 3761 3762 .seealso: MatCreateMPIAIJ 3763 M*/ 3764 3765 EXTERN_C_BEGIN 3766 #undef __FUNCT__ 3767 #define __FUNCT__ "MatCreate_MPIAIJ" 3768 PetscErrorCode MatCreate_MPIAIJ(Mat B) 3769 { 3770 Mat_MPIAIJ *b; 3771 PetscErrorCode ierr; 3772 PetscInt i; 3773 PetscMPIInt size; 3774 3775 PetscFunctionBegin; 3776 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3777 3778 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3779 B->data = (void*)b; 3780 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 3781 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3782 B->factor = 0; 3783 B->bs = 1; 3784 B->assembled = PETSC_FALSE; 3785 B->mapping = 0; 3786 3787 B->insertmode = NOT_SET_VALUES; 3788 b->size = size; 3789 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3790 3791 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 3792 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 3793 3794 /* the information in the maps duplicates the information computed below, eventually 3795 we should remove the duplicate information that is not contained in the maps */ 3796 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 3797 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 3798 3799 /* build local table of row and column ownerships */ 3800 ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 3801 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 3802 b->cowners = b->rowners + b->size + 2; 3803 ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3804 b->rowners[0] = 0; 3805 for (i=2; i<=b->size; i++) { 3806 b->rowners[i] += b->rowners[i-1]; 3807 } 3808 b->rstart = b->rowners[b->rank]; 3809 b->rend = b->rowners[b->rank+1]; 3810 ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3811 b->cowners[0] = 0; 3812 for (i=2; i<=b->size; i++) { 3813 b->cowners[i] += b->cowners[i-1]; 3814 } 3815 b->cstart = b->cowners[b->rank]; 3816 b->cend = b->cowners[b->rank+1]; 3817 3818 /* build cache for off array entries formed */ 3819 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3820 b->donotstash = PETSC_FALSE; 3821 b->colmap = 0; 3822 b->garray = 0; 3823 b->roworiented = PETSC_TRUE; 3824 3825 /* stuff used for matrix vector multiply */ 3826 b->lvec = PETSC_NULL; 3827 b->Mvctx = PETSC_NULL; 3828 3829 /* stuff for MatGetRow() */ 3830 b->rowindices = 0; 3831 b->rowvalues = 0; 3832 b->getrowactive = PETSC_FALSE; 3833 3834 /* Explicitly create 2 MATSEQAIJ matrices. */ 3835 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 3836 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3837 PetscLogObjectParent(B,b->A); 3838 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 3839 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3840 PetscLogObjectParent(B,b->B); 3841 3842 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3843 "MatStoreValues_MPIAIJ", 3844 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3845 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3846 "MatRetrieveValues_MPIAIJ", 3847 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3848 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3849 "MatGetDiagonalBlock_MPIAIJ", 3850 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3851 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3852 "MatIsTranspose_MPIAIJ", 3853 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3854 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3855 "MatMPIAIJSetPreallocation_MPIAIJ", 3856 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3857 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3858 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3859 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3860 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3861 "MatDiagonalScaleLocal_MPIAIJ", 3862 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3863 PetscFunctionReturn(0); 3864 } 3865 EXTERN_C_END 3866