1 2 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 3 4 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 5 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 6 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,int,IS[],int); 7 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat *[]); 8 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int [],PetscScalar []); 9 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,int,const int[],int,const int [],const PetscScalar [],InsertMode); 10 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode); 11 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]); 12 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]); 13 EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat); 14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*); 15 16 /* UGLY, ugly, ugly 17 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 18 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 19 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 20 converts the entries into single precision and then calls ..._MatScalar() to put them 21 into the single precision data structures. 22 */ 23 #if defined(PETSC_USE_MAT_SINGLE) 24 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 25 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 26 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 27 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 28 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 29 #else 30 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 31 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 32 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 33 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 34 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 35 #endif 36 37 #undef __FUNCT__ 38 #define __FUNCT__ "MatGetRowMax_MPIBAIJ" 39 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v) 40 { 41 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 42 PetscErrorCode ierr; 43 int i; 44 PetscScalar *va,*vb; 45 Vec vtmp; 46 47 PetscFunctionBegin; 48 49 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 50 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 51 52 ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr); 53 ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 54 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 55 56 for (i=0; i<A->m; i++){ 57 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 58 } 59 60 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 61 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 62 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 63 64 PetscFunctionReturn(0); 65 } 66 67 EXTERN_C_BEGIN 68 #undef __FUNCT__ 69 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 70 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 71 { 72 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 73 PetscErrorCode ierr; 74 75 PetscFunctionBegin; 76 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 77 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 EXTERN_C_END 81 82 EXTERN_C_BEGIN 83 #undef __FUNCT__ 84 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 85 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 86 { 87 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 88 PetscErrorCode ierr; 89 90 PetscFunctionBegin; 91 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 92 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 93 PetscFunctionReturn(0); 94 } 95 EXTERN_C_END 96 97 /* 98 Local utility routine that creates a mapping from the global column 99 number to the local number in the off-diagonal part of the local 100 storage of the matrix. This is done in a non scable way since the 101 length of colmap equals the global matrix length. 102 */ 103 #undef __FUNCT__ 104 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 105 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 106 { 107 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 108 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 109 int nbs = B->nbs,i,bs=B->bs,ierr; 110 111 PetscFunctionBegin; 112 #if defined (PETSC_USE_CTABLE) 113 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 114 for (i=0; i<nbs; i++){ 115 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 116 } 117 #else 118 ierr = PetscMalloc((baij->Nbs+1)*sizeof(int),&baij->colmap);CHKERRQ(ierr); 119 PetscLogObjectMemory(mat,baij->Nbs*sizeof(int)); 120 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 121 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 122 #endif 123 PetscFunctionReturn(0); 124 } 125 126 #define CHUNKSIZE 10 127 128 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 129 { \ 130 \ 131 brow = row/bs; \ 132 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 133 rmax = aimax[brow]; nrow = ailen[brow]; \ 134 bcol = col/bs; \ 135 ridx = row % bs; cidx = col % bs; \ 136 low = 0; high = nrow; \ 137 while (high-low > 3) { \ 138 t = (low+high)/2; \ 139 if (rp[t] > bcol) high = t; \ 140 else low = t; \ 141 } \ 142 for (_i=low; _i<high; _i++) { \ 143 if (rp[_i] > bcol) break; \ 144 if (rp[_i] == bcol) { \ 145 bap = ap + bs2*_i + bs*cidx + ridx; \ 146 if (addv == ADD_VALUES) *bap += value; \ 147 else *bap = value; \ 148 goto a_noinsert; \ 149 } \ 150 } \ 151 if (a->nonew == 1) goto a_noinsert; \ 152 else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 153 if (nrow >= rmax) { \ 154 /* there is no extra room in row, therefore enlarge */ \ 155 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 156 MatScalar *new_a; \ 157 \ 158 if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 159 \ 160 /* malloc new storage space */ \ 161 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 162 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 163 new_j = (int*)(new_a + bs2*new_nz); \ 164 new_i = new_j + new_nz; \ 165 \ 166 /* copy over old data into new slots */ \ 167 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 168 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 169 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 170 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 171 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 172 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 173 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 174 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 175 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 176 /* free up old matrix storage */ \ 177 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 178 if (!a->singlemalloc) { \ 179 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 180 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 181 } \ 182 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 183 a->singlemalloc = PETSC_TRUE; \ 184 \ 185 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 186 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 187 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 188 a->maxnz += bs2*CHUNKSIZE; \ 189 a->reallocs++; \ 190 a->nz++; \ 191 } \ 192 N = nrow++ - 1; \ 193 /* shift up all the later entries in this row */ \ 194 for (ii=N; ii>=_i; ii--) { \ 195 rp[ii+1] = rp[ii]; \ 196 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 197 } \ 198 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 199 rp[_i] = bcol; \ 200 ap[bs2*_i + bs*cidx + ridx] = value; \ 201 a_noinsert:; \ 202 ailen[brow] = nrow; \ 203 } 204 205 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 206 { \ 207 brow = row/bs; \ 208 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 209 rmax = bimax[brow]; nrow = bilen[brow]; \ 210 bcol = col/bs; \ 211 ridx = row % bs; cidx = col % bs; \ 212 low = 0; high = nrow; \ 213 while (high-low > 3) { \ 214 t = (low+high)/2; \ 215 if (rp[t] > bcol) high = t; \ 216 else low = t; \ 217 } \ 218 for (_i=low; _i<high; _i++) { \ 219 if (rp[_i] > bcol) break; \ 220 if (rp[_i] == bcol) { \ 221 bap = ap + bs2*_i + bs*cidx + ridx; \ 222 if (addv == ADD_VALUES) *bap += value; \ 223 else *bap = value; \ 224 goto b_noinsert; \ 225 } \ 226 } \ 227 if (b->nonew == 1) goto b_noinsert; \ 228 else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 229 if (nrow >= rmax) { \ 230 /* there is no extra room in row, therefore enlarge */ \ 231 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 232 MatScalar *new_a; \ 233 \ 234 if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 235 \ 236 /* malloc new storage space */ \ 237 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 238 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 239 new_j = (int*)(new_a + bs2*new_nz); \ 240 new_i = new_j + new_nz; \ 241 \ 242 /* copy over old data into new slots */ \ 243 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 244 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 245 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 246 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 247 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 248 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 249 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 250 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 251 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 252 /* free up old matrix storage */ \ 253 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 254 if (!b->singlemalloc) { \ 255 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 256 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 257 } \ 258 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 259 b->singlemalloc = PETSC_TRUE; \ 260 \ 261 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 262 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 263 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 264 b->maxnz += bs2*CHUNKSIZE; \ 265 b->reallocs++; \ 266 b->nz++; \ 267 } \ 268 N = nrow++ - 1; \ 269 /* shift up all the later entries in this row */ \ 270 for (ii=N; ii>=_i; ii--) { \ 271 rp[ii+1] = rp[ii]; \ 272 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 273 } \ 274 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 275 rp[_i] = bcol; \ 276 ap[bs2*_i + bs*cidx + ridx] = value; \ 277 b_noinsert:; \ 278 bilen[brow] = nrow; \ 279 } 280 281 #if defined(PETSC_USE_MAT_SINGLE) 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatSetValues_MPIBAIJ" 284 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 285 { 286 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 287 PetscErrorCode ierr; 288 int i,N = m*n; 289 MatScalar *vsingle; 290 291 PetscFunctionBegin; 292 if (N > b->setvalueslen) { 293 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 294 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 295 b->setvalueslen = N; 296 } 297 vsingle = b->setvaluescopy; 298 299 for (i=0; i<N; i++) { 300 vsingle[i] = v[i]; 301 } 302 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 303 PetscFunctionReturn(0); 304 } 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 308 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 309 { 310 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 311 PetscErrorCode ierr; 312 int i,N = m*n*b->bs2; 313 MatScalar *vsingle; 314 315 PetscFunctionBegin; 316 if (N > b->setvalueslen) { 317 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 318 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 319 b->setvalueslen = N; 320 } 321 vsingle = b->setvaluescopy; 322 for (i=0; i<N; i++) { 323 vsingle[i] = v[i]; 324 } 325 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 326 PetscFunctionReturn(0); 327 } 328 329 #undef __FUNCT__ 330 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 331 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 332 { 333 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 334 PetscErrorCode ierr; 335 int i,N = m*n; 336 MatScalar *vsingle; 337 338 PetscFunctionBegin; 339 if (N > b->setvalueslen) { 340 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 341 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 342 b->setvalueslen = N; 343 } 344 vsingle = b->setvaluescopy; 345 for (i=0; i<N; i++) { 346 vsingle[i] = v[i]; 347 } 348 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 349 PetscFunctionReturn(0); 350 } 351 352 #undef __FUNCT__ 353 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 354 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 355 { 356 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 357 PetscErrorCode ierr; 358 int i,N = m*n*b->bs2; 359 MatScalar *vsingle; 360 361 PetscFunctionBegin; 362 if (N > b->setvalueslen) { 363 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 364 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 365 b->setvalueslen = N; 366 } 367 vsingle = b->setvaluescopy; 368 for (i=0; i<N; i++) { 369 vsingle[i] = v[i]; 370 } 371 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 372 PetscFunctionReturn(0); 373 } 374 #endif 375 376 #undef __FUNCT__ 377 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 378 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 379 { 380 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 381 MatScalar value; 382 PetscTruth roworiented = baij->roworiented; 383 PetscErrorCode ierr; 384 int i,j,row,col; 385 int rstart_orig=baij->rstart_bs; 386 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 387 int cend_orig=baij->cend_bs,bs=baij->bs; 388 389 /* Some Variables required in the macro */ 390 Mat A = baij->A; 391 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 392 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 393 MatScalar *aa=a->a; 394 395 Mat B = baij->B; 396 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 397 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 398 MatScalar *ba=b->a; 399 400 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 401 int low,high,t,ridx,cidx,bs2=a->bs2; 402 MatScalar *ap,*bap; 403 404 PetscFunctionBegin; 405 for (i=0; i<m; i++) { 406 if (im[i] < 0) continue; 407 #if defined(PETSC_USE_BOPT_g) 408 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 409 #endif 410 if (im[i] >= rstart_orig && im[i] < rend_orig) { 411 row = im[i] - rstart_orig; 412 for (j=0; j<n; j++) { 413 if (in[j] >= cstart_orig && in[j] < cend_orig){ 414 col = in[j] - cstart_orig; 415 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 416 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 417 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 418 } else if (in[j] < 0) continue; 419 #if defined(PETSC_USE_BOPT_g) 420 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[i],mat->N-1);} 421 #endif 422 else { 423 if (mat->was_assembled) { 424 if (!baij->colmap) { 425 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 426 } 427 #if defined (PETSC_USE_CTABLE) 428 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 429 col = col - 1; 430 #else 431 col = baij->colmap[in[j]/bs] - 1; 432 #endif 433 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 434 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 435 col = in[j]; 436 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 437 B = baij->B; 438 b = (Mat_SeqBAIJ*)(B)->data; 439 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 440 ba=b->a; 441 } else col += in[j]%bs; 442 } else col = in[j]; 443 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 444 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 445 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 446 } 447 } 448 } else { 449 if (!baij->donotstash) { 450 if (roworiented) { 451 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 452 } else { 453 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 454 } 455 } 456 } 457 } 458 PetscFunctionReturn(0); 459 } 460 461 #undef __FUNCT__ 462 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 463 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 464 { 465 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 466 const MatScalar *value; 467 MatScalar *barray=baij->barray; 468 PetscTruth roworiented = baij->roworiented; 469 PetscErrorCode ierr; 470 int i,j,ii,jj,row,col,rstart=baij->rstart; 471 int rend=baij->rend,cstart=baij->cstart,stepval; 472 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 473 474 PetscFunctionBegin; 475 if(!barray) { 476 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 477 baij->barray = barray; 478 } 479 480 if (roworiented) { 481 stepval = (n-1)*bs; 482 } else { 483 stepval = (m-1)*bs; 484 } 485 for (i=0; i<m; i++) { 486 if (im[i] < 0) continue; 487 #if defined(PETSC_USE_BOPT_g) 488 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1); 489 #endif 490 if (im[i] >= rstart && im[i] < rend) { 491 row = im[i] - rstart; 492 for (j=0; j<n; j++) { 493 /* If NumCol = 1 then a copy is not required */ 494 if ((roworiented) && (n == 1)) { 495 barray = (MatScalar*)v + i*bs2; 496 } else if((!roworiented) && (m == 1)) { 497 barray = (MatScalar*)v + j*bs2; 498 } else { /* Here a copy is required */ 499 if (roworiented) { 500 value = v + i*(stepval+bs)*bs + j*bs; 501 } else { 502 value = v + j*(stepval+bs)*bs + i*bs; 503 } 504 for (ii=0; ii<bs; ii++,value+=stepval) { 505 for (jj=0; jj<bs; jj++) { 506 *barray++ = *value++; 507 } 508 } 509 barray -=bs2; 510 } 511 512 if (in[j] >= cstart && in[j] < cend){ 513 col = in[j] - cstart; 514 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 515 } 516 else if (in[j] < 0) continue; 517 #if defined(PETSC_USE_BOPT_g) 518 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);} 519 #endif 520 else { 521 if (mat->was_assembled) { 522 if (!baij->colmap) { 523 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 524 } 525 526 #if defined(PETSC_USE_BOPT_g) 527 #if defined (PETSC_USE_CTABLE) 528 { int data; 529 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 530 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 531 } 532 #else 533 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 534 #endif 535 #endif 536 #if defined (PETSC_USE_CTABLE) 537 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 538 col = (col - 1)/bs; 539 #else 540 col = (baij->colmap[in[j]] - 1)/bs; 541 #endif 542 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 543 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 544 col = in[j]; 545 } 546 } 547 else col = in[j]; 548 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 549 } 550 } 551 } else { 552 if (!baij->donotstash) { 553 if (roworiented) { 554 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 555 } else { 556 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 557 } 558 } 559 } 560 } 561 PetscFunctionReturn(0); 562 } 563 564 #define HASH_KEY 0.6180339887 565 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 566 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 567 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 568 #undef __FUNCT__ 569 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 570 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 571 { 572 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 573 PetscTruth roworiented = baij->roworiented; 574 PetscErrorCode ierr; 575 int i,j,row,col; 576 int rstart_orig=baij->rstart_bs; 577 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 578 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 579 PetscReal tmp; 580 MatScalar **HD = baij->hd,value; 581 #if defined(PETSC_USE_BOPT_g) 582 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 583 #endif 584 585 PetscFunctionBegin; 586 587 for (i=0; i<m; i++) { 588 #if defined(PETSC_USE_BOPT_g) 589 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 590 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 591 #endif 592 row = im[i]; 593 if (row >= rstart_orig && row < rend_orig) { 594 for (j=0; j<n; j++) { 595 col = in[j]; 596 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 597 /* Look up into the Hash Table */ 598 key = (row/bs)*Nbs+(col/bs)+1; 599 h1 = HASH(size,key,tmp); 600 601 602 idx = h1; 603 #if defined(PETSC_USE_BOPT_g) 604 insert_ct++; 605 total_ct++; 606 if (HT[idx] != key) { 607 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 608 if (idx == size) { 609 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 610 if (idx == h1) { 611 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col); 612 } 613 } 614 } 615 #else 616 if (HT[idx] != key) { 617 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 618 if (idx == size) { 619 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 620 if (idx == h1) { 621 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col); 622 } 623 } 624 } 625 #endif 626 /* A HASH table entry is found, so insert the values at the correct address */ 627 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 628 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 629 } 630 } else { 631 if (!baij->donotstash) { 632 if (roworiented) { 633 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 634 } else { 635 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 636 } 637 } 638 } 639 } 640 #if defined(PETSC_USE_BOPT_g) 641 baij->ht_total_ct = total_ct; 642 baij->ht_insert_ct = insert_ct; 643 #endif 644 PetscFunctionReturn(0); 645 } 646 647 #undef __FUNCT__ 648 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 649 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 650 { 651 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 652 PetscTruth roworiented = baij->roworiented; 653 PetscErrorCode ierr; 654 int i,j,ii,jj,row,col; 655 int rstart=baij->rstart ; 656 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 657 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 658 PetscReal tmp; 659 MatScalar **HD = baij->hd,*baij_a; 660 const MatScalar *v_t,*value; 661 #if defined(PETSC_USE_BOPT_g) 662 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 663 #endif 664 665 PetscFunctionBegin; 666 667 if (roworiented) { 668 stepval = (n-1)*bs; 669 } else { 670 stepval = (m-1)*bs; 671 } 672 for (i=0; i<m; i++) { 673 #if defined(PETSC_USE_BOPT_g) 674 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",im[i]); 675 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],baij->Mbs-1); 676 #endif 677 row = im[i]; 678 v_t = v + i*bs2; 679 if (row >= rstart && row < rend) { 680 for (j=0; j<n; j++) { 681 col = in[j]; 682 683 /* Look up into the Hash Table */ 684 key = row*Nbs+col+1; 685 h1 = HASH(size,key,tmp); 686 687 idx = h1; 688 #if defined(PETSC_USE_BOPT_g) 689 total_ct++; 690 insert_ct++; 691 if (HT[idx] != key) { 692 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 693 if (idx == size) { 694 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 695 if (idx == h1) { 696 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col); 697 } 698 } 699 } 700 #else 701 if (HT[idx] != key) { 702 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 703 if (idx == size) { 704 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 705 if (idx == h1) { 706 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col); 707 } 708 } 709 } 710 #endif 711 baij_a = HD[idx]; 712 if (roworiented) { 713 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 714 /* value = v + (i*(stepval+bs)+j)*bs; */ 715 value = v_t; 716 v_t += bs; 717 if (addv == ADD_VALUES) { 718 for (ii=0; ii<bs; ii++,value+=stepval) { 719 for (jj=ii; jj<bs2; jj+=bs) { 720 baij_a[jj] += *value++; 721 } 722 } 723 } else { 724 for (ii=0; ii<bs; ii++,value+=stepval) { 725 for (jj=ii; jj<bs2; jj+=bs) { 726 baij_a[jj] = *value++; 727 } 728 } 729 } 730 } else { 731 value = v + j*(stepval+bs)*bs + i*bs; 732 if (addv == ADD_VALUES) { 733 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 734 for (jj=0; jj<bs; jj++) { 735 baij_a[jj] += *value++; 736 } 737 } 738 } else { 739 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 740 for (jj=0; jj<bs; jj++) { 741 baij_a[jj] = *value++; 742 } 743 } 744 } 745 } 746 } 747 } else { 748 if (!baij->donotstash) { 749 if (roworiented) { 750 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 751 } else { 752 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 753 } 754 } 755 } 756 } 757 #if defined(PETSC_USE_BOPT_g) 758 baij->ht_total_ct = total_ct; 759 baij->ht_insert_ct = insert_ct; 760 #endif 761 PetscFunctionReturn(0); 762 } 763 764 #undef __FUNCT__ 765 #define __FUNCT__ "MatGetValues_MPIBAIJ" 766 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 767 { 768 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 769 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 770 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 771 772 PetscFunctionBegin; 773 for (i=0; i<m; i++) { 774 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 775 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 776 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 777 row = idxm[i] - bsrstart; 778 for (j=0; j<n; j++) { 779 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 780 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 781 if (idxn[j] >= bscstart && idxn[j] < bscend){ 782 col = idxn[j] - bscstart; 783 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 784 } else { 785 if (!baij->colmap) { 786 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 787 } 788 #if defined (PETSC_USE_CTABLE) 789 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 790 data --; 791 #else 792 data = baij->colmap[idxn[j]/bs]-1; 793 #endif 794 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 795 else { 796 col = data + idxn[j]%bs; 797 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 798 } 799 } 800 } 801 } else { 802 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 803 } 804 } 805 PetscFunctionReturn(0); 806 } 807 808 #undef __FUNCT__ 809 #define __FUNCT__ "MatNorm_MPIBAIJ" 810 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 811 { 812 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 813 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 814 PetscErrorCode ierr; 815 int i,bs2=baij->bs2; 816 PetscReal sum = 0.0; 817 MatScalar *v; 818 819 PetscFunctionBegin; 820 if (baij->size == 1) { 821 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 822 } else { 823 if (type == NORM_FROBENIUS) { 824 v = amat->a; 825 for (i=0; i<amat->nz*bs2; i++) { 826 #if defined(PETSC_USE_COMPLEX) 827 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 828 #else 829 sum += (*v)*(*v); v++; 830 #endif 831 } 832 v = bmat->a; 833 for (i=0; i<bmat->nz*bs2; i++) { 834 #if defined(PETSC_USE_COMPLEX) 835 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 836 #else 837 sum += (*v)*(*v); v++; 838 #endif 839 } 840 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 841 *nrm = sqrt(*nrm); 842 } else { 843 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 844 } 845 } 846 PetscFunctionReturn(0); 847 } 848 849 850 /* 851 Creates the hash table, and sets the table 852 This table is created only once. 853 If new entried need to be added to the matrix 854 then the hash table has to be destroyed and 855 recreated. 856 */ 857 #undef __FUNCT__ 858 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 859 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 860 { 861 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 862 Mat A = baij->A,B=baij->B; 863 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 864 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 865 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 866 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 867 int *HT,key; 868 MatScalar **HD; 869 PetscReal tmp; 870 #if defined(PETSC_USE_BOPT_g) 871 int ct=0,max=0; 872 #endif 873 874 PetscFunctionBegin; 875 baij->ht_size=(int)(factor*nz); 876 size = baij->ht_size; 877 878 if (baij->ht) { 879 PetscFunctionReturn(0); 880 } 881 882 /* Allocate Memory for Hash Table */ 883 ierr = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 884 baij->ht = (int*)(baij->hd + size); 885 HD = baij->hd; 886 HT = baij->ht; 887 888 889 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(PetscScalar*)));CHKERRQ(ierr); 890 891 892 /* Loop Over A */ 893 for (i=0; i<a->mbs; i++) { 894 for (j=ai[i]; j<ai[i+1]; j++) { 895 row = i+rstart; 896 col = aj[j]+cstart; 897 898 key = row*Nbs + col + 1; 899 h1 = HASH(size,key,tmp); 900 for (k=0; k<size; k++){ 901 if (!HT[(h1+k)%size]) { 902 HT[(h1+k)%size] = key; 903 HD[(h1+k)%size] = a->a + j*bs2; 904 break; 905 #if defined(PETSC_USE_BOPT_g) 906 } else { 907 ct++; 908 #endif 909 } 910 } 911 #if defined(PETSC_USE_BOPT_g) 912 if (k> max) max = k; 913 #endif 914 } 915 } 916 /* Loop Over B */ 917 for (i=0; i<b->mbs; i++) { 918 for (j=bi[i]; j<bi[i+1]; j++) { 919 row = i+rstart; 920 col = garray[bj[j]]; 921 key = row*Nbs + col + 1; 922 h1 = HASH(size,key,tmp); 923 for (k=0; k<size; k++){ 924 if (!HT[(h1+k)%size]) { 925 HT[(h1+k)%size] = key; 926 HD[(h1+k)%size] = b->a + j*bs2; 927 break; 928 #if defined(PETSC_USE_BOPT_g) 929 } else { 930 ct++; 931 #endif 932 } 933 } 934 #if defined(PETSC_USE_BOPT_g) 935 if (k> max) max = k; 936 #endif 937 } 938 } 939 940 /* Print Summary */ 941 #if defined(PETSC_USE_BOPT_g) 942 for (i=0,j=0; i<size; i++) { 943 if (HT[i]) {j++;} 944 } 945 PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max); 946 #endif 947 PetscFunctionReturn(0); 948 } 949 950 #undef __FUNCT__ 951 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 952 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 953 { 954 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 955 PetscErrorCode ierr; 956 int nstash,reallocs; 957 InsertMode addv; 958 959 PetscFunctionBegin; 960 if (baij->donotstash) { 961 PetscFunctionReturn(0); 962 } 963 964 /* make sure all processors are either in INSERTMODE or ADDMODE */ 965 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 966 if (addv == (ADD_VALUES|INSERT_VALUES)) { 967 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 968 } 969 mat->insertmode = addv; /* in case this processor had no cache */ 970 971 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 972 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 973 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 974 PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 975 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 976 PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 977 PetscFunctionReturn(0); 978 } 979 980 #undef __FUNCT__ 981 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 982 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 983 { 984 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 985 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 986 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 987 int *row,*col,other_disassembled; 988 PetscTruth r1,r2,r3; 989 MatScalar *val; 990 InsertMode addv = mat->insertmode; 991 992 PetscFunctionBegin; 993 if (!baij->donotstash) { 994 while (1) { 995 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 996 if (!flg) break; 997 998 for (i=0; i<n;) { 999 /* Now identify the consecutive vals belonging to the same row */ 1000 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1001 if (j < n) ncols = j-i; 1002 else ncols = n-i; 1003 /* Now assemble all these values with a single function call */ 1004 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 1005 i = j; 1006 } 1007 } 1008 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1009 /* Now process the block-stash. Since the values are stashed column-oriented, 1010 set the roworiented flag to column oriented, and after MatSetValues() 1011 restore the original flags */ 1012 r1 = baij->roworiented; 1013 r2 = a->roworiented; 1014 r3 = b->roworiented; 1015 baij->roworiented = PETSC_FALSE; 1016 a->roworiented = PETSC_FALSE; 1017 b->roworiented = PETSC_FALSE; 1018 while (1) { 1019 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1020 if (!flg) break; 1021 1022 for (i=0; i<n;) { 1023 /* Now identify the consecutive vals belonging to the same row */ 1024 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1025 if (j < n) ncols = j-i; 1026 else ncols = n-i; 1027 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1028 i = j; 1029 } 1030 } 1031 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1032 baij->roworiented = r1; 1033 a->roworiented = r2; 1034 b->roworiented = r3; 1035 } 1036 1037 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1038 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1039 1040 /* determine if any processor has disassembled, if so we must 1041 also disassemble ourselfs, in order that we may reassemble. */ 1042 /* 1043 if nonzero structure of submatrix B cannot change then we know that 1044 no processor disassembled thus we can skip this stuff 1045 */ 1046 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1047 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1048 if (mat->was_assembled && !other_disassembled) { 1049 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1050 } 1051 } 1052 1053 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1054 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1055 } 1056 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1057 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1058 1059 #if defined(PETSC_USE_BOPT_g) 1060 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1061 PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 1062 baij->ht_total_ct = 0; 1063 baij->ht_insert_ct = 0; 1064 } 1065 #endif 1066 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1067 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1068 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1069 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1070 } 1071 1072 if (baij->rowvalues) { 1073 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1074 baij->rowvalues = 0; 1075 } 1076 PetscFunctionReturn(0); 1077 } 1078 1079 #undef __FUNCT__ 1080 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1081 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1082 { 1083 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1084 PetscErrorCode ierr; 1085 int bs = baij->bs,size = baij->size,rank = baij->rank; 1086 PetscTruth iascii,isdraw; 1087 PetscViewer sviewer; 1088 PetscViewerFormat format; 1089 1090 PetscFunctionBegin; 1091 /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */ 1092 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1093 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1094 if (iascii) { 1095 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1096 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1097 MatInfo info; 1098 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1099 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1100 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 1101 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 1102 baij->bs,(int)info.memory);CHKERRQ(ierr); 1103 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1104 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1105 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1106 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1107 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1108 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1109 PetscFunctionReturn(0); 1110 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1111 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 1112 PetscFunctionReturn(0); 1113 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1114 PetscFunctionReturn(0); 1115 } 1116 } 1117 1118 if (isdraw) { 1119 PetscDraw draw; 1120 PetscTruth isnull; 1121 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1122 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1123 } 1124 1125 if (size == 1) { 1126 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 1127 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1128 } else { 1129 /* assemble the entire matrix onto first processor. */ 1130 Mat A; 1131 Mat_SeqBAIJ *Aloc; 1132 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1133 MatScalar *a; 1134 1135 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1136 /* Perhaps this should be the type of mat? */ 1137 if (!rank) { 1138 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 1139 } else { 1140 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 1141 } 1142 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1143 ierr = MatMPIBAIJSetPreallocation(A,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1144 PetscLogObjectParent(mat,A); 1145 1146 /* copy over the A part */ 1147 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1148 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1149 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 1150 1151 for (i=0; i<mbs; i++) { 1152 rvals[0] = bs*(baij->rstart + i); 1153 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1154 for (j=ai[i]; j<ai[i+1]; j++) { 1155 col = (baij->cstart+aj[j])*bs; 1156 for (k=0; k<bs; k++) { 1157 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1158 col++; a += bs; 1159 } 1160 } 1161 } 1162 /* copy over the B part */ 1163 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1164 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1165 for (i=0; i<mbs; i++) { 1166 rvals[0] = bs*(baij->rstart + i); 1167 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1168 for (j=ai[i]; j<ai[i+1]; j++) { 1169 col = baij->garray[aj[j]]*bs; 1170 for (k=0; k<bs; k++) { 1171 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1172 col++; a += bs; 1173 } 1174 } 1175 } 1176 ierr = PetscFree(rvals);CHKERRQ(ierr); 1177 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1178 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1179 /* 1180 Everyone has to call to draw the matrix since the graphics waits are 1181 synchronized across all processors that share the PetscDraw object 1182 */ 1183 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1184 if (!rank) { 1185 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1186 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1187 } 1188 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1189 ierr = MatDestroy(A);CHKERRQ(ierr); 1190 } 1191 PetscFunctionReturn(0); 1192 } 1193 1194 #undef __FUNCT__ 1195 #define __FUNCT__ "MatView_MPIBAIJ" 1196 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1197 { 1198 PetscErrorCode ierr; 1199 PetscTruth iascii,isdraw,issocket,isbinary; 1200 1201 PetscFunctionBegin; 1202 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1203 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1204 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1205 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1206 if (iascii || isdraw || issocket || isbinary) { 1207 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1208 } else { 1209 SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1210 } 1211 PetscFunctionReturn(0); 1212 } 1213 1214 #undef __FUNCT__ 1215 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1216 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1217 { 1218 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1219 PetscErrorCode ierr; 1220 1221 PetscFunctionBegin; 1222 #if defined(PETSC_USE_LOG) 1223 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 1224 #endif 1225 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1226 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1227 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1228 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1229 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1230 #if defined (PETSC_USE_CTABLE) 1231 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1232 #else 1233 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1234 #endif 1235 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1236 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1237 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1238 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1239 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1240 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1241 #if defined(PETSC_USE_MAT_SINGLE) 1242 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 1243 #endif 1244 ierr = PetscFree(baij);CHKERRQ(ierr); 1245 PetscFunctionReturn(0); 1246 } 1247 1248 #undef __FUNCT__ 1249 #define __FUNCT__ "MatMult_MPIBAIJ" 1250 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1251 { 1252 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1253 PetscErrorCode ierr; 1254 int nt; 1255 1256 PetscFunctionBegin; 1257 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1258 if (nt != A->n) { 1259 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1260 } 1261 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1262 if (nt != A->m) { 1263 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1264 } 1265 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1266 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1267 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1268 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1269 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1270 PetscFunctionReturn(0); 1271 } 1272 1273 #undef __FUNCT__ 1274 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1275 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1276 { 1277 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1278 PetscErrorCode ierr; 1279 1280 PetscFunctionBegin; 1281 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1282 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1283 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1284 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1285 PetscFunctionReturn(0); 1286 } 1287 1288 #undef __FUNCT__ 1289 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1290 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1291 { 1292 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1293 PetscErrorCode ierr; 1294 1295 PetscFunctionBegin; 1296 /* do nondiagonal part */ 1297 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1298 /* send it on its way */ 1299 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1300 /* do local part */ 1301 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1302 /* receive remote parts: note this assumes the values are not actually */ 1303 /* inserted in yy until the next line, which is true for my implementation*/ 1304 /* but is not perhaps always true. */ 1305 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1306 PetscFunctionReturn(0); 1307 } 1308 1309 #undef __FUNCT__ 1310 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1311 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1312 { 1313 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1314 PetscErrorCode ierr; 1315 1316 PetscFunctionBegin; 1317 /* do nondiagonal part */ 1318 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1319 /* send it on its way */ 1320 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1321 /* do local part */ 1322 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1323 /* receive remote parts: note this assumes the values are not actually */ 1324 /* inserted in yy until the next line, which is true for my implementation*/ 1325 /* but is not perhaps always true. */ 1326 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1327 PetscFunctionReturn(0); 1328 } 1329 1330 /* 1331 This only works correctly for square matrices where the subblock A->A is the 1332 diagonal block 1333 */ 1334 #undef __FUNCT__ 1335 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1336 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1337 { 1338 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1339 PetscErrorCode ierr; 1340 1341 PetscFunctionBegin; 1342 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1343 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1344 PetscFunctionReturn(0); 1345 } 1346 1347 #undef __FUNCT__ 1348 #define __FUNCT__ "MatScale_MPIBAIJ" 1349 PetscErrorCode MatScale_MPIBAIJ(const PetscScalar *aa,Mat A) 1350 { 1351 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1352 PetscErrorCode ierr; 1353 1354 PetscFunctionBegin; 1355 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1356 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1357 PetscFunctionReturn(0); 1358 } 1359 1360 #undef __FUNCT__ 1361 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1362 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1363 { 1364 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1365 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1366 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1367 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1368 int *cmap,*idx_p,cstart = mat->cstart; 1369 1370 PetscFunctionBegin; 1371 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1372 mat->getrowactive = PETSC_TRUE; 1373 1374 if (!mat->rowvalues && (idx || v)) { 1375 /* 1376 allocate enough space to hold information from the longest row. 1377 */ 1378 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1379 int max = 1,mbs = mat->mbs,tmp; 1380 for (i=0; i<mbs; i++) { 1381 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1382 if (max < tmp) { max = tmp; } 1383 } 1384 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1385 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1386 } 1387 1388 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1389 lrow = row - brstart; 1390 1391 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1392 if (!v) {pvA = 0; pvB = 0;} 1393 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1394 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1395 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1396 nztot = nzA + nzB; 1397 1398 cmap = mat->garray; 1399 if (v || idx) { 1400 if (nztot) { 1401 /* Sort by increasing column numbers, assuming A and B already sorted */ 1402 int imark = -1; 1403 if (v) { 1404 *v = v_p = mat->rowvalues; 1405 for (i=0; i<nzB; i++) { 1406 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1407 else break; 1408 } 1409 imark = i; 1410 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1411 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1412 } 1413 if (idx) { 1414 *idx = idx_p = mat->rowindices; 1415 if (imark > -1) { 1416 for (i=0; i<imark; i++) { 1417 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1418 } 1419 } else { 1420 for (i=0; i<nzB; i++) { 1421 if (cmap[cworkB[i]/bs] < cstart) 1422 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1423 else break; 1424 } 1425 imark = i; 1426 } 1427 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1428 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1429 } 1430 } else { 1431 if (idx) *idx = 0; 1432 if (v) *v = 0; 1433 } 1434 } 1435 *nz = nztot; 1436 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1437 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1438 PetscFunctionReturn(0); 1439 } 1440 1441 #undef __FUNCT__ 1442 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1443 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1444 { 1445 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1446 1447 PetscFunctionBegin; 1448 if (baij->getrowactive == PETSC_FALSE) { 1449 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1450 } 1451 baij->getrowactive = PETSC_FALSE; 1452 PetscFunctionReturn(0); 1453 } 1454 1455 #undef __FUNCT__ 1456 #define __FUNCT__ "MatGetBlockSize_MPIBAIJ" 1457 PetscErrorCode MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1458 { 1459 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1460 1461 PetscFunctionBegin; 1462 *bs = baij->bs; 1463 PetscFunctionReturn(0); 1464 } 1465 1466 #undef __FUNCT__ 1467 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1468 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1469 { 1470 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1471 PetscErrorCode ierr; 1472 1473 PetscFunctionBegin; 1474 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1475 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1476 PetscFunctionReturn(0); 1477 } 1478 1479 #undef __FUNCT__ 1480 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1481 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1482 { 1483 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1484 Mat A = a->A,B = a->B; 1485 PetscErrorCode ierr; 1486 PetscReal isend[5],irecv[5]; 1487 1488 PetscFunctionBegin; 1489 info->block_size = (PetscReal)a->bs; 1490 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1491 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1492 isend[3] = info->memory; isend[4] = info->mallocs; 1493 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1494 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1495 isend[3] += info->memory; isend[4] += info->mallocs; 1496 if (flag == MAT_LOCAL) { 1497 info->nz_used = isend[0]; 1498 info->nz_allocated = isend[1]; 1499 info->nz_unneeded = isend[2]; 1500 info->memory = isend[3]; 1501 info->mallocs = isend[4]; 1502 } else if (flag == MAT_GLOBAL_MAX) { 1503 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1504 info->nz_used = irecv[0]; 1505 info->nz_allocated = irecv[1]; 1506 info->nz_unneeded = irecv[2]; 1507 info->memory = irecv[3]; 1508 info->mallocs = irecv[4]; 1509 } else if (flag == MAT_GLOBAL_SUM) { 1510 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1511 info->nz_used = irecv[0]; 1512 info->nz_allocated = irecv[1]; 1513 info->nz_unneeded = irecv[2]; 1514 info->memory = irecv[3]; 1515 info->mallocs = irecv[4]; 1516 } else { 1517 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1518 } 1519 info->rows_global = (PetscReal)A->M; 1520 info->columns_global = (PetscReal)A->N; 1521 info->rows_local = (PetscReal)A->m; 1522 info->columns_local = (PetscReal)A->N; 1523 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1524 info->fill_ratio_needed = 0; 1525 info->factor_mallocs = 0; 1526 PetscFunctionReturn(0); 1527 } 1528 1529 #undef __FUNCT__ 1530 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1531 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op) 1532 { 1533 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1534 PetscErrorCode ierr; 1535 1536 PetscFunctionBegin; 1537 switch (op) { 1538 case MAT_NO_NEW_NONZERO_LOCATIONS: 1539 case MAT_YES_NEW_NONZERO_LOCATIONS: 1540 case MAT_COLUMNS_UNSORTED: 1541 case MAT_COLUMNS_SORTED: 1542 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1543 case MAT_KEEP_ZEROED_ROWS: 1544 case MAT_NEW_NONZERO_LOCATION_ERR: 1545 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1546 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1547 break; 1548 case MAT_ROW_ORIENTED: 1549 a->roworiented = PETSC_TRUE; 1550 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1551 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1552 break; 1553 case MAT_ROWS_SORTED: 1554 case MAT_ROWS_UNSORTED: 1555 case MAT_YES_NEW_DIAGONALS: 1556 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1557 break; 1558 case MAT_COLUMN_ORIENTED: 1559 a->roworiented = PETSC_FALSE; 1560 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1561 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1562 break; 1563 case MAT_IGNORE_OFF_PROC_ENTRIES: 1564 a->donotstash = PETSC_TRUE; 1565 break; 1566 case MAT_NO_NEW_DIAGONALS: 1567 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1568 case MAT_USE_HASH_TABLE: 1569 a->ht_flag = PETSC_TRUE; 1570 break; 1571 case MAT_SYMMETRIC: 1572 case MAT_STRUCTURALLY_SYMMETRIC: 1573 case MAT_HERMITIAN: 1574 case MAT_SYMMETRY_ETERNAL: 1575 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1576 break; 1577 case MAT_NOT_SYMMETRIC: 1578 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1579 case MAT_NOT_HERMITIAN: 1580 case MAT_NOT_SYMMETRY_ETERNAL: 1581 break; 1582 default: 1583 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1584 } 1585 PetscFunctionReturn(0); 1586 } 1587 1588 #undef __FUNCT__ 1589 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1590 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1591 { 1592 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1593 Mat_SeqBAIJ *Aloc; 1594 Mat B; 1595 PetscErrorCode ierr; 1596 int M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col; 1597 int bs=baij->bs,mbs=baij->mbs; 1598 MatScalar *a; 1599 1600 PetscFunctionBegin; 1601 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1602 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1603 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1604 ierr = MatMPIBAIJSetPreallocation(B,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1605 1606 /* copy over the A part */ 1607 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1608 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1609 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 1610 1611 for (i=0; i<mbs; i++) { 1612 rvals[0] = bs*(baij->rstart + i); 1613 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1614 for (j=ai[i]; j<ai[i+1]; j++) { 1615 col = (baij->cstart+aj[j])*bs; 1616 for (k=0; k<bs; k++) { 1617 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1618 col++; a += bs; 1619 } 1620 } 1621 } 1622 /* copy over the B part */ 1623 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1624 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1625 for (i=0; i<mbs; i++) { 1626 rvals[0] = bs*(baij->rstart + i); 1627 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1628 for (j=ai[i]; j<ai[i+1]; j++) { 1629 col = baij->garray[aj[j]]*bs; 1630 for (k=0; k<bs; k++) { 1631 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1632 col++; a += bs; 1633 } 1634 } 1635 } 1636 ierr = PetscFree(rvals);CHKERRQ(ierr); 1637 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1638 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1639 1640 if (matout) { 1641 *matout = B; 1642 } else { 1643 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1644 } 1645 PetscFunctionReturn(0); 1646 } 1647 1648 #undef __FUNCT__ 1649 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1650 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1651 { 1652 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1653 Mat a = baij->A,b = baij->B; 1654 PetscErrorCode ierr; 1655 int s1,s2,s3; 1656 1657 PetscFunctionBegin; 1658 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1659 if (rr) { 1660 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1661 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1662 /* Overlap communication with computation. */ 1663 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1664 } 1665 if (ll) { 1666 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1667 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1668 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1669 } 1670 /* scale the diagonal block */ 1671 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1672 1673 if (rr) { 1674 /* Do a scatter end and then right scale the off-diagonal block */ 1675 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1676 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1677 } 1678 1679 PetscFunctionReturn(0); 1680 } 1681 1682 #undef __FUNCT__ 1683 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1684 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag) 1685 { 1686 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1687 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1688 int *nprocs,j,idx,nsends,row; 1689 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1690 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1691 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1692 MPI_Comm comm = A->comm; 1693 MPI_Request *send_waits,*recv_waits; 1694 MPI_Status recv_status,*send_status; 1695 IS istmp; 1696 PetscTruth found; 1697 1698 PetscFunctionBegin; 1699 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 1700 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1701 1702 /* first count number of contributors to each processor */ 1703 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 1704 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1705 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 1706 for (i=0; i<N; i++) { 1707 idx = rows[i]; 1708 found = PETSC_FALSE; 1709 for (j=0; j<size; j++) { 1710 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1711 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 1712 } 1713 } 1714 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1715 } 1716 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1717 1718 /* inform other processors of number of messages and max length*/ 1719 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1720 1721 /* post receives: */ 1722 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 1723 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1724 for (i=0; i<nrecvs; i++) { 1725 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1726 } 1727 1728 /* do sends: 1729 1) starts[i] gives the starting index in svalues for stuff going to 1730 the ith processor 1731 */ 1732 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 1733 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1734 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 1735 starts[0] = 0; 1736 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1737 for (i=0; i<N; i++) { 1738 svalues[starts[owner[i]]++] = rows[i]; 1739 } 1740 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1741 1742 starts[0] = 0; 1743 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1744 count = 0; 1745 for (i=0; i<size; i++) { 1746 if (nprocs[2*i+1]) { 1747 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1748 } 1749 } 1750 ierr = PetscFree(starts);CHKERRQ(ierr); 1751 1752 base = owners[rank]*bs; 1753 1754 /* wait on receives */ 1755 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 1756 source = lens + nrecvs; 1757 count = nrecvs; slen = 0; 1758 while (count) { 1759 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1760 /* unpack receives into our local space */ 1761 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1762 source[imdex] = recv_status.MPI_SOURCE; 1763 lens[imdex] = n; 1764 slen += n; 1765 count--; 1766 } 1767 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1768 1769 /* move the data into the send scatter */ 1770 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 1771 count = 0; 1772 for (i=0; i<nrecvs; i++) { 1773 values = rvalues + i*nmax; 1774 for (j=0; j<lens[i]; j++) { 1775 lrows[count++] = values[j] - base; 1776 } 1777 } 1778 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1779 ierr = PetscFree(lens);CHKERRQ(ierr); 1780 ierr = PetscFree(owner);CHKERRQ(ierr); 1781 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1782 1783 /* actually zap the local rows */ 1784 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1785 PetscLogObjectParent(A,istmp); 1786 1787 /* 1788 Zero the required rows. If the "diagonal block" of the matrix 1789 is square and the user wishes to set the diagonal we use seperate 1790 code so that MatSetValues() is not called for each diagonal allocating 1791 new memory, thus calling lots of mallocs and slowing things down. 1792 1793 Contributed by: Mathew Knepley 1794 */ 1795 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1796 ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1797 if (diag && (l->A->M == l->A->N)) { 1798 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1799 } else if (diag) { 1800 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1801 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1802 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1803 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1804 } 1805 for (i=0; i<slen; i++) { 1806 row = lrows[i] + rstart_bs; 1807 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1808 } 1809 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1810 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1811 } else { 1812 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1813 } 1814 1815 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1816 ierr = PetscFree(lrows);CHKERRQ(ierr); 1817 1818 /* wait on sends */ 1819 if (nsends) { 1820 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1821 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1822 ierr = PetscFree(send_status);CHKERRQ(ierr); 1823 } 1824 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1825 ierr = PetscFree(svalues);CHKERRQ(ierr); 1826 1827 PetscFunctionReturn(0); 1828 } 1829 1830 #undef __FUNCT__ 1831 #define __FUNCT__ "MatPrintHelp_MPIBAIJ" 1832 PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A) 1833 { 1834 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1835 MPI_Comm comm = A->comm; 1836 static int called = 0; 1837 PetscErrorCode ierr; 1838 1839 PetscFunctionBegin; 1840 if (!a->rank) { 1841 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1842 } 1843 if (called) {PetscFunctionReturn(0);} else called = 1; 1844 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1845 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1846 PetscFunctionReturn(0); 1847 } 1848 1849 #undef __FUNCT__ 1850 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1851 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1852 { 1853 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1854 PetscErrorCode ierr; 1855 1856 PetscFunctionBegin; 1857 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1858 PetscFunctionReturn(0); 1859 } 1860 1861 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1862 1863 #undef __FUNCT__ 1864 #define __FUNCT__ "MatEqual_MPIBAIJ" 1865 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1866 { 1867 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1868 Mat a,b,c,d; 1869 PetscTruth flg; 1870 PetscErrorCode ierr; 1871 1872 PetscFunctionBegin; 1873 a = matA->A; b = matA->B; 1874 c = matB->A; d = matB->B; 1875 1876 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1877 if (flg == PETSC_TRUE) { 1878 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1879 } 1880 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1881 PetscFunctionReturn(0); 1882 } 1883 1884 1885 #undef __FUNCT__ 1886 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1887 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1888 { 1889 PetscErrorCode ierr; 1890 1891 PetscFunctionBegin; 1892 ierr = MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1893 PetscFunctionReturn(0); 1894 } 1895 1896 /* -------------------------------------------------------------------*/ 1897 static struct _MatOps MatOps_Values = { 1898 MatSetValues_MPIBAIJ, 1899 MatGetRow_MPIBAIJ, 1900 MatRestoreRow_MPIBAIJ, 1901 MatMult_MPIBAIJ, 1902 /* 4*/ MatMultAdd_MPIBAIJ, 1903 MatMultTranspose_MPIBAIJ, 1904 MatMultTransposeAdd_MPIBAIJ, 1905 0, 1906 0, 1907 0, 1908 /*10*/ 0, 1909 0, 1910 0, 1911 0, 1912 MatTranspose_MPIBAIJ, 1913 /*15*/ MatGetInfo_MPIBAIJ, 1914 MatEqual_MPIBAIJ, 1915 MatGetDiagonal_MPIBAIJ, 1916 MatDiagonalScale_MPIBAIJ, 1917 MatNorm_MPIBAIJ, 1918 /*20*/ MatAssemblyBegin_MPIBAIJ, 1919 MatAssemblyEnd_MPIBAIJ, 1920 0, 1921 MatSetOption_MPIBAIJ, 1922 MatZeroEntries_MPIBAIJ, 1923 /*25*/ MatZeroRows_MPIBAIJ, 1924 0, 1925 0, 1926 0, 1927 0, 1928 /*30*/ MatSetUpPreallocation_MPIBAIJ, 1929 0, 1930 0, 1931 0, 1932 0, 1933 /*35*/ MatDuplicate_MPIBAIJ, 1934 0, 1935 0, 1936 0, 1937 0, 1938 /*40*/ 0, 1939 MatGetSubMatrices_MPIBAIJ, 1940 MatIncreaseOverlap_MPIBAIJ, 1941 MatGetValues_MPIBAIJ, 1942 0, 1943 /*45*/ MatPrintHelp_MPIBAIJ, 1944 MatScale_MPIBAIJ, 1945 0, 1946 0, 1947 0, 1948 /*50*/ MatGetBlockSize_MPIBAIJ, 1949 0, 1950 0, 1951 0, 1952 0, 1953 /*55*/ 0, 1954 0, 1955 MatSetUnfactored_MPIBAIJ, 1956 0, 1957 MatSetValuesBlocked_MPIBAIJ, 1958 /*60*/ 0, 1959 MatDestroy_MPIBAIJ, 1960 MatView_MPIBAIJ, 1961 MatGetPetscMaps_Petsc, 1962 0, 1963 /*65*/ 0, 1964 0, 1965 0, 1966 0, 1967 0, 1968 /*70*/ MatGetRowMax_MPIBAIJ, 1969 0, 1970 0, 1971 0, 1972 0, 1973 /*75*/ 0, 1974 0, 1975 0, 1976 0, 1977 0, 1978 /*80*/ 0, 1979 0, 1980 0, 1981 0, 1982 /*85*/ MatLoad_MPIBAIJ 1983 }; 1984 1985 1986 EXTERN_C_BEGIN 1987 #undef __FUNCT__ 1988 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 1989 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1990 { 1991 PetscFunctionBegin; 1992 *a = ((Mat_MPIBAIJ *)A->data)->A; 1993 *iscopy = PETSC_FALSE; 1994 PetscFunctionReturn(0); 1995 } 1996 EXTERN_C_END 1997 1998 EXTERN_C_BEGIN 1999 extern int MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,Mat*); 2000 EXTERN_C_END 2001 2002 EXTERN_C_BEGIN 2003 #undef __FUNCT__ 2004 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2005 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2006 { 2007 Mat_MPIBAIJ *b; 2008 PetscErrorCode ierr; 2009 int i; 2010 2011 PetscFunctionBegin; 2012 B->preallocated = PETSC_TRUE; 2013 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2014 2015 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2016 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2017 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2018 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2019 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2020 if (d_nnz) { 2021 for (i=0; i<B->m/bs; i++) { 2022 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 2023 } 2024 } 2025 if (o_nnz) { 2026 for (i=0; i<B->m/bs; i++) { 2027 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 2028 } 2029 } 2030 2031 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 2032 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 2033 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 2034 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 2035 2036 b = (Mat_MPIBAIJ*)B->data; 2037 b->bs = bs; 2038 b->bs2 = bs*bs; 2039 b->mbs = B->m/bs; 2040 b->nbs = B->n/bs; 2041 b->Mbs = B->M/bs; 2042 b->Nbs = B->N/bs; 2043 2044 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 2045 b->rowners[0] = 0; 2046 for (i=2; i<=b->size; i++) { 2047 b->rowners[i] += b->rowners[i-1]; 2048 } 2049 b->rstart = b->rowners[b->rank]; 2050 b->rend = b->rowners[b->rank+1]; 2051 2052 ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 2053 b->cowners[0] = 0; 2054 for (i=2; i<=b->size; i++) { 2055 b->cowners[i] += b->cowners[i-1]; 2056 } 2057 b->cstart = b->cowners[b->rank]; 2058 b->cend = b->cowners[b->rank+1]; 2059 2060 for (i=0; i<=b->size; i++) { 2061 b->rowners_bs[i] = b->rowners[i]*bs; 2062 } 2063 b->rstart_bs = b->rstart*bs; 2064 b->rend_bs = b->rend*bs; 2065 b->cstart_bs = b->cstart*bs; 2066 b->cend_bs = b->cend*bs; 2067 2068 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 2069 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2070 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2071 PetscLogObjectParent(B,b->A); 2072 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 2073 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2074 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2075 PetscLogObjectParent(B,b->B); 2076 2077 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2078 2079 PetscFunctionReturn(0); 2080 } 2081 EXTERN_C_END 2082 2083 EXTERN_C_BEGIN 2084 EXTERN PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2085 EXTERN PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2086 EXTERN_C_END 2087 2088 /*MC 2089 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2090 2091 Options Database Keys: 2092 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2093 2094 Level: beginner 2095 2096 .seealso: MatCreateMPIBAIJ 2097 M*/ 2098 2099 EXTERN_C_BEGIN 2100 #undef __FUNCT__ 2101 #define __FUNCT__ "MatCreate_MPIBAIJ" 2102 PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2103 { 2104 Mat_MPIBAIJ *b; 2105 PetscErrorCode ierr; 2106 PetscTruth flg; 2107 2108 PetscFunctionBegin; 2109 ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2110 B->data = (void*)b; 2111 2112 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2113 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2114 B->mapping = 0; 2115 B->factor = 0; 2116 B->assembled = PETSC_FALSE; 2117 2118 B->insertmode = NOT_SET_VALUES; 2119 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2120 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2121 2122 /* build local table of row and column ownerships */ 2123 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 2124 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2125 b->cowners = b->rowners + b->size + 2; 2126 b->rowners_bs = b->cowners + b->size + 2; 2127 2128 /* build cache for off array entries formed */ 2129 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2130 b->donotstash = PETSC_FALSE; 2131 b->colmap = PETSC_NULL; 2132 b->garray = PETSC_NULL; 2133 b->roworiented = PETSC_TRUE; 2134 2135 #if defined(PETSC_USE_MAT_SINGLE) 2136 /* stuff for MatSetValues_XXX in single precision */ 2137 b->setvalueslen = 0; 2138 b->setvaluescopy = PETSC_NULL; 2139 #endif 2140 2141 /* stuff used in block assembly */ 2142 b->barray = 0; 2143 2144 /* stuff used for matrix vector multiply */ 2145 b->lvec = 0; 2146 b->Mvctx = 0; 2147 2148 /* stuff for MatGetRow() */ 2149 b->rowindices = 0; 2150 b->rowvalues = 0; 2151 b->getrowactive = PETSC_FALSE; 2152 2153 /* hash table stuff */ 2154 b->ht = 0; 2155 b->hd = 0; 2156 b->ht_size = 0; 2157 b->ht_flag = PETSC_FALSE; 2158 b->ht_fact = 0; 2159 b->ht_total_ct = 0; 2160 b->ht_insert_ct = 0; 2161 2162 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2163 if (flg) { 2164 PetscReal fact = 1.39; 2165 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2166 ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2167 if (fact <= 1.0) fact = 1.39; 2168 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2169 PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2170 } 2171 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2172 "MatStoreValues_MPIBAIJ", 2173 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2174 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2175 "MatRetrieveValues_MPIBAIJ", 2176 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2177 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2178 "MatGetDiagonalBlock_MPIBAIJ", 2179 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2180 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2181 "MatMPIBAIJSetPreallocation_MPIBAIJ", 2182 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2183 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 2184 "MatDiagonalScaleLocal_MPIBAIJ", 2185 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2186 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 2187 "MatSetHashTableFactor_MPIBAIJ", 2188 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2189 PetscFunctionReturn(0); 2190 } 2191 EXTERN_C_END 2192 2193 /*MC 2194 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2195 2196 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2197 and MATMPIBAIJ otherwise. 2198 2199 Options Database Keys: 2200 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2201 2202 Level: beginner 2203 2204 .seealso: MatCreateMPIBAIJ,MATSEQBAIJ,MATMPIBAIJ 2205 M*/ 2206 2207 EXTERN_C_BEGIN 2208 #undef __FUNCT__ 2209 #define __FUNCT__ "MatCreate_BAIJ" 2210 PetscErrorCode MatCreate_BAIJ(Mat A) 2211 { 2212 PetscErrorCode ierr,size; 2213 2214 PetscFunctionBegin; 2215 ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr); 2216 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2217 if (size == 1) { 2218 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2219 } else { 2220 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2221 } 2222 PetscFunctionReturn(0); 2223 } 2224 EXTERN_C_END 2225 2226 #undef __FUNCT__ 2227 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2228 /*@C 2229 MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format 2230 (block compressed row). For good matrix assembly performance 2231 the user should preallocate the matrix storage by setting the parameters 2232 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2233 performance can be increased by more than a factor of 50. 2234 2235 Collective on Mat 2236 2237 Input Parameters: 2238 + A - the matrix 2239 . bs - size of blockk 2240 . d_nz - number of block nonzeros per block row in diagonal portion of local 2241 submatrix (same for all local rows) 2242 . d_nnz - array containing the number of block nonzeros in the various block rows 2243 of the in diagonal portion of the local (possibly different for each block 2244 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2245 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2246 submatrix (same for all local rows). 2247 - o_nnz - array containing the number of nonzeros in the various block rows of the 2248 off-diagonal portion of the local submatrix (possibly different for 2249 each block row) or PETSC_NULL. 2250 2251 Output Parameter: 2252 2253 2254 Options Database Keys: 2255 . -mat_no_unroll - uses code that does not unroll the loops in the 2256 block calculations (much slower) 2257 . -mat_block_size - size of the blocks to use 2258 2259 Notes: 2260 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2261 than it must be used on all processors that share the object for that argument. 2262 2263 Storage Information: 2264 For a square global matrix we define each processor's diagonal portion 2265 to be its local rows and the corresponding columns (a square submatrix); 2266 each processor's off-diagonal portion encompasses the remainder of the 2267 local matrix (a rectangular submatrix). 2268 2269 The user can specify preallocated storage for the diagonal part of 2270 the local submatrix with either d_nz or d_nnz (not both). Set 2271 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2272 memory allocation. Likewise, specify preallocated storage for the 2273 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2274 2275 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2276 the figure below we depict these three local rows and all columns (0-11). 2277 2278 .vb 2279 0 1 2 3 4 5 6 7 8 9 10 11 2280 ------------------- 2281 row 3 | o o o d d d o o o o o o 2282 row 4 | o o o d d d o o o o o o 2283 row 5 | o o o d d d o o o o o o 2284 ------------------- 2285 .ve 2286 2287 Thus, any entries in the d locations are stored in the d (diagonal) 2288 submatrix, and any entries in the o locations are stored in the 2289 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2290 stored simply in the MATSEQBAIJ format for compressed row storage. 2291 2292 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2293 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2294 In general, for PDE problems in which most nonzeros are near the diagonal, 2295 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2296 or you will get TERRIBLE performance; see the users' manual chapter on 2297 matrices. 2298 2299 Level: intermediate 2300 2301 .keywords: matrix, block, aij, compressed row, sparse, parallel 2302 2303 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2304 @*/ 2305 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2306 { 2307 PetscErrorCode ierr,(*f)(Mat,int,int,const int[],int,const int[]); 2308 2309 PetscFunctionBegin; 2310 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2311 if (f) { 2312 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2313 } 2314 PetscFunctionReturn(0); 2315 } 2316 2317 #undef __FUNCT__ 2318 #define __FUNCT__ "MatCreateMPIBAIJ" 2319 /*@C 2320 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2321 (block compressed row). For good matrix assembly performance 2322 the user should preallocate the matrix storage by setting the parameters 2323 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2324 performance can be increased by more than a factor of 50. 2325 2326 Collective on MPI_Comm 2327 2328 Input Parameters: 2329 + comm - MPI communicator 2330 . bs - size of blockk 2331 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2332 This value should be the same as the local size used in creating the 2333 y vector for the matrix-vector product y = Ax. 2334 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2335 This value should be the same as the local size used in creating the 2336 x vector for the matrix-vector product y = Ax. 2337 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2338 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2339 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2340 submatrix (same for all local rows) 2341 . d_nnz - array containing the number of nonzero blocks in the various block rows 2342 of the in diagonal portion of the local (possibly different for each block 2343 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2344 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2345 submatrix (same for all local rows). 2346 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2347 off-diagonal portion of the local submatrix (possibly different for 2348 each block row) or PETSC_NULL. 2349 2350 Output Parameter: 2351 . A - the matrix 2352 2353 Options Database Keys: 2354 . -mat_no_unroll - uses code that does not unroll the loops in the 2355 block calculations (much slower) 2356 . -mat_block_size - size of the blocks to use 2357 2358 Notes: 2359 A nonzero block is any block that as 1 or more nonzeros in it 2360 2361 The user MUST specify either the local or global matrix dimensions 2362 (possibly both). 2363 2364 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2365 than it must be used on all processors that share the object for that argument. 2366 2367 Storage Information: 2368 For a square global matrix we define each processor's diagonal portion 2369 to be its local rows and the corresponding columns (a square submatrix); 2370 each processor's off-diagonal portion encompasses the remainder of the 2371 local matrix (a rectangular submatrix). 2372 2373 The user can specify preallocated storage for the diagonal part of 2374 the local submatrix with either d_nz or d_nnz (not both). Set 2375 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2376 memory allocation. Likewise, specify preallocated storage for the 2377 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2378 2379 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2380 the figure below we depict these three local rows and all columns (0-11). 2381 2382 .vb 2383 0 1 2 3 4 5 6 7 8 9 10 11 2384 ------------------- 2385 row 3 | o o o d d d o o o o o o 2386 row 4 | o o o d d d o o o o o o 2387 row 5 | o o o d d d o o o o o o 2388 ------------------- 2389 .ve 2390 2391 Thus, any entries in the d locations are stored in the d (diagonal) 2392 submatrix, and any entries in the o locations are stored in the 2393 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2394 stored simply in the MATSEQBAIJ format for compressed row storage. 2395 2396 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2397 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2398 In general, for PDE problems in which most nonzeros are near the diagonal, 2399 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2400 or you will get TERRIBLE performance; see the users' manual chapter on 2401 matrices. 2402 2403 Level: intermediate 2404 2405 .keywords: matrix, block, aij, compressed row, sparse, parallel 2406 2407 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2408 @*/ 2409 PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2410 { 2411 PetscErrorCode ierr,size; 2412 2413 PetscFunctionBegin; 2414 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2415 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2416 if (size > 1) { 2417 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2418 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2419 } else { 2420 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2421 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2422 } 2423 PetscFunctionReturn(0); 2424 } 2425 2426 #undef __FUNCT__ 2427 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 2428 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2429 { 2430 Mat mat; 2431 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2432 PetscErrorCode ierr; 2433 int len=0; 2434 2435 PetscFunctionBegin; 2436 *newmat = 0; 2437 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2438 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2439 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2440 2441 mat->factor = matin->factor; 2442 mat->preallocated = PETSC_TRUE; 2443 mat->assembled = PETSC_TRUE; 2444 mat->insertmode = NOT_SET_VALUES; 2445 2446 a = (Mat_MPIBAIJ*)mat->data; 2447 a->bs = oldmat->bs; 2448 a->bs2 = oldmat->bs2; 2449 a->mbs = oldmat->mbs; 2450 a->nbs = oldmat->nbs; 2451 a->Mbs = oldmat->Mbs; 2452 a->Nbs = oldmat->Nbs; 2453 2454 a->rstart = oldmat->rstart; 2455 a->rend = oldmat->rend; 2456 a->cstart = oldmat->cstart; 2457 a->cend = oldmat->cend; 2458 a->size = oldmat->size; 2459 a->rank = oldmat->rank; 2460 a->donotstash = oldmat->donotstash; 2461 a->roworiented = oldmat->roworiented; 2462 a->rowindices = 0; 2463 a->rowvalues = 0; 2464 a->getrowactive = PETSC_FALSE; 2465 a->barray = 0; 2466 a->rstart_bs = oldmat->rstart_bs; 2467 a->rend_bs = oldmat->rend_bs; 2468 a->cstart_bs = oldmat->cstart_bs; 2469 a->cend_bs = oldmat->cend_bs; 2470 2471 /* hash table stuff */ 2472 a->ht = 0; 2473 a->hd = 0; 2474 a->ht_size = 0; 2475 a->ht_flag = oldmat->ht_flag; 2476 a->ht_fact = oldmat->ht_fact; 2477 a->ht_total_ct = 0; 2478 a->ht_insert_ct = 0; 2479 2480 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2481 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2482 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2483 if (oldmat->colmap) { 2484 #if defined (PETSC_USE_CTABLE) 2485 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2486 #else 2487 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2488 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2489 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2490 #endif 2491 } else a->colmap = 0; 2492 2493 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2494 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2495 PetscLogObjectMemory(mat,len*sizeof(int)); 2496 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2497 } else a->garray = 0; 2498 2499 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2500 PetscLogObjectParent(mat,a->lvec); 2501 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2502 PetscLogObjectParent(mat,a->Mvctx); 2503 2504 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2505 PetscLogObjectParent(mat,a->A); 2506 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2507 PetscLogObjectParent(mat,a->B); 2508 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2509 *newmat = mat; 2510 2511 PetscFunctionReturn(0); 2512 } 2513 2514 #include "petscsys.h" 2515 2516 #undef __FUNCT__ 2517 #define __FUNCT__ "MatLoad_MPIBAIJ" 2518 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 2519 { 2520 Mat A; 2521 int i,nz,ierr,j,rstart,rend,fd; 2522 PetscScalar *vals,*buf; 2523 MPI_Comm comm = ((PetscObject)viewer)->comm; 2524 MPI_Status status; 2525 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2526 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2527 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2528 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2529 int dcount,kmax,k,nzcount,tmp; 2530 2531 PetscFunctionBegin; 2532 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2533 2534 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2535 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2536 if (!rank) { 2537 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2538 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2539 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2540 if (header[3] < 0) { 2541 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ"); 2542 } 2543 } 2544 2545 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2546 M = header[1]; N = header[2]; 2547 2548 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2549 2550 /* 2551 This code adds extra rows to make sure the number of rows is 2552 divisible by the blocksize 2553 */ 2554 Mbs = M/bs; 2555 extra_rows = bs - M + bs*(Mbs); 2556 if (extra_rows == bs) extra_rows = 0; 2557 else Mbs++; 2558 if (extra_rows &&!rank) { 2559 PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2560 } 2561 2562 /* determine ownership of all rows */ 2563 mbs = Mbs/size + ((Mbs % size) > rank); 2564 m = mbs*bs; 2565 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2566 browners = rowners + size + 1; 2567 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2568 rowners[0] = 0; 2569 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2570 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2571 rstart = rowners[rank]; 2572 rend = rowners[rank+1]; 2573 2574 /* distribute row lengths to all processors */ 2575 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2576 if (!rank) { 2577 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2578 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2579 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2580 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2581 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2582 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2583 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2584 } else { 2585 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2586 } 2587 2588 if (!rank) { 2589 /* calculate the number of nonzeros on each processor */ 2590 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2591 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2592 for (i=0; i<size; i++) { 2593 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2594 procsnz[i] += rowlengths[j]; 2595 } 2596 } 2597 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2598 2599 /* determine max buffer needed and allocate it */ 2600 maxnz = 0; 2601 for (i=0; i<size; i++) { 2602 maxnz = PetscMax(maxnz,procsnz[i]); 2603 } 2604 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2605 2606 /* read in my part of the matrix column indices */ 2607 nz = procsnz[0]; 2608 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2609 mycols = ibuf; 2610 if (size == 1) nz -= extra_rows; 2611 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2612 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2613 2614 /* read in every ones (except the last) and ship off */ 2615 for (i=1; i<size-1; i++) { 2616 nz = procsnz[i]; 2617 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2618 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2619 } 2620 /* read in the stuff for the last proc */ 2621 if (size != 1) { 2622 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2623 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2624 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2625 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2626 } 2627 ierr = PetscFree(cols);CHKERRQ(ierr); 2628 } else { 2629 /* determine buffer space needed for message */ 2630 nz = 0; 2631 for (i=0; i<m; i++) { 2632 nz += locrowlens[i]; 2633 } 2634 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2635 mycols = ibuf; 2636 /* receive message of column indices*/ 2637 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2638 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2639 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2640 } 2641 2642 /* loop over local rows, determining number of off diagonal entries */ 2643 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2644 odlens = dlens + (rend-rstart); 2645 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2646 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2647 masked1 = mask + Mbs; 2648 masked2 = masked1 + Mbs; 2649 rowcount = 0; nzcount = 0; 2650 for (i=0; i<mbs; i++) { 2651 dcount = 0; 2652 odcount = 0; 2653 for (j=0; j<bs; j++) { 2654 kmax = locrowlens[rowcount]; 2655 for (k=0; k<kmax; k++) { 2656 tmp = mycols[nzcount++]/bs; 2657 if (!mask[tmp]) { 2658 mask[tmp] = 1; 2659 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2660 else masked1[dcount++] = tmp; 2661 } 2662 } 2663 rowcount++; 2664 } 2665 2666 dlens[i] = dcount; 2667 odlens[i] = odcount; 2668 2669 /* zero out the mask elements we set */ 2670 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2671 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2672 } 2673 2674 /* create our matrix */ 2675 ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr); 2676 ierr = MatSetType(A,type);CHKERRQ(ierr) 2677 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2678 2679 /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */ 2680 MatSetOption(A,MAT_COLUMNS_SORTED); 2681 2682 if (!rank) { 2683 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2684 /* read in my part of the matrix numerical values */ 2685 nz = procsnz[0]; 2686 vals = buf; 2687 mycols = ibuf; 2688 if (size == 1) nz -= extra_rows; 2689 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2690 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2691 2692 /* insert into matrix */ 2693 jj = rstart*bs; 2694 for (i=0; i<m; i++) { 2695 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2696 mycols += locrowlens[i]; 2697 vals += locrowlens[i]; 2698 jj++; 2699 } 2700 /* read in other processors (except the last one) and ship out */ 2701 for (i=1; i<size-1; i++) { 2702 nz = procsnz[i]; 2703 vals = buf; 2704 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2705 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2706 } 2707 /* the last proc */ 2708 if (size != 1){ 2709 nz = procsnz[i] - extra_rows; 2710 vals = buf; 2711 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2712 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2713 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2714 } 2715 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2716 } else { 2717 /* receive numeric values */ 2718 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2719 2720 /* receive message of values*/ 2721 vals = buf; 2722 mycols = ibuf; 2723 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2724 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2725 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2726 2727 /* insert into matrix */ 2728 jj = rstart*bs; 2729 for (i=0; i<m; i++) { 2730 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2731 mycols += locrowlens[i]; 2732 vals += locrowlens[i]; 2733 jj++; 2734 } 2735 } 2736 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2737 ierr = PetscFree(buf);CHKERRQ(ierr); 2738 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2739 ierr = PetscFree(rowners);CHKERRQ(ierr); 2740 ierr = PetscFree(dlens);CHKERRQ(ierr); 2741 ierr = PetscFree(mask);CHKERRQ(ierr); 2742 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2743 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2744 2745 *newmat = A; 2746 PetscFunctionReturn(0); 2747 } 2748 2749 #undef __FUNCT__ 2750 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2751 /*@ 2752 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2753 2754 Input Parameters: 2755 . mat - the matrix 2756 . fact - factor 2757 2758 Collective on Mat 2759 2760 Level: advanced 2761 2762 Notes: 2763 This can also be set by the command line option: -mat_use_hash_table fact 2764 2765 .keywords: matrix, hashtable, factor, HT 2766 2767 .seealso: MatSetOption() 2768 @*/ 2769 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2770 { 2771 PetscErrorCode ierr,(*f)(Mat,PetscReal); 2772 2773 PetscFunctionBegin; 2774 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 2775 if (f) { 2776 ierr = (*f)(mat,fact);CHKERRQ(ierr); 2777 } 2778 PetscFunctionReturn(0); 2779 } 2780 2781 #undef __FUNCT__ 2782 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 2783 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 2784 { 2785 Mat_MPIBAIJ *baij; 2786 2787 PetscFunctionBegin; 2788 baij = (Mat_MPIBAIJ*)mat->data; 2789 baij->ht_fact = fact; 2790 PetscFunctionReturn(0); 2791 } 2792 2793 #undef __FUNCT__ 2794 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2795 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2796 { 2797 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2798 PetscFunctionBegin; 2799 *Ad = a->A; 2800 *Ao = a->B; 2801 *colmap = a->garray; 2802 PetscFunctionReturn(0); 2803 } 2804