1 /*$Id: mpibaij.c,v 1.234 2001/09/25 22:56:49 balay Exp $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 5 EXTERN int MatSetUpMultiply_MPIBAIJ(Mat); 6 EXTERN int DisAssemble_MPIBAIJ(Mat); 7 EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS[],int); 8 EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat *[]); 9 EXTERN int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int [],PetscScalar []); 10 EXTERN int MatSetValues_SeqBAIJ(Mat,int,const int[],int,const int [],const PetscScalar [],InsertMode); 11 EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode); 12 EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]); 13 EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]); 14 EXTERN int MatPrintHelp_SeqBAIJ(Mat); 15 EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*); 16 17 /* UGLY, ugly, ugly 18 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 19 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 20 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 21 converts the entries into single precision and then calls ..._MatScalar() to put them 22 into the single precision data structures. 23 */ 24 #if defined(PETSC_USE_MAT_SINGLE) 25 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 26 EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 27 EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 28 EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 29 EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 30 #else 31 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 32 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 33 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 34 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 35 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 36 #endif 37 38 #undef __FUNCT__ 39 #define __FUNCT__ "MatGetRowMax_MPIBAIJ" 40 int MatGetRowMax_MPIBAIJ(Mat A,Vec v) 41 { 42 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 43 int ierr,i; 44 PetscScalar *va,*vb; 45 Vec vtmp; 46 47 PetscFunctionBegin; 48 49 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 50 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 51 52 ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr); 53 ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 54 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 55 56 for (i=0; i<A->m; i++){ 57 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 58 } 59 60 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 61 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 62 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 63 64 PetscFunctionReturn(0); 65 } 66 67 EXTERN_C_BEGIN 68 #undef __FUNCT__ 69 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 70 int MatStoreValues_MPIBAIJ(Mat mat) 71 { 72 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 73 int ierr; 74 75 PetscFunctionBegin; 76 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 77 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 EXTERN_C_END 81 82 EXTERN_C_BEGIN 83 #undef __FUNCT__ 84 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 85 int MatRetrieveValues_MPIBAIJ(Mat mat) 86 { 87 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 88 int ierr; 89 90 PetscFunctionBegin; 91 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 92 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 93 PetscFunctionReturn(0); 94 } 95 EXTERN_C_END 96 97 /* 98 Local utility routine that creates a mapping from the global column 99 number to the local number in the off-diagonal part of the local 100 storage of the matrix. This is done in a non scable way since the 101 length of colmap equals the global matrix length. 102 */ 103 #undef __FUNCT__ 104 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 105 int CreateColmap_MPIBAIJ_Private(Mat mat) 106 { 107 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 108 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 109 int nbs = B->nbs,i,bs=B->bs,ierr; 110 111 PetscFunctionBegin; 112 #if defined (PETSC_USE_CTABLE) 113 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 114 for (i=0; i<nbs; i++){ 115 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 116 } 117 #else 118 ierr = PetscMalloc((baij->Nbs+1)*sizeof(int),&baij->colmap);CHKERRQ(ierr); 119 PetscLogObjectMemory(mat,baij->Nbs*sizeof(int)); 120 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 121 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 122 #endif 123 PetscFunctionReturn(0); 124 } 125 126 #define CHUNKSIZE 10 127 128 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 129 { \ 130 \ 131 brow = row/bs; \ 132 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 133 rmax = aimax[brow]; nrow = ailen[brow]; \ 134 bcol = col/bs; \ 135 ridx = row % bs; cidx = col % bs; \ 136 low = 0; high = nrow; \ 137 while (high-low > 3) { \ 138 t = (low+high)/2; \ 139 if (rp[t] > bcol) high = t; \ 140 else low = t; \ 141 } \ 142 for (_i=low; _i<high; _i++) { \ 143 if (rp[_i] > bcol) break; \ 144 if (rp[_i] == bcol) { \ 145 bap = ap + bs2*_i + bs*cidx + ridx; \ 146 if (addv == ADD_VALUES) *bap += value; \ 147 else *bap = value; \ 148 goto a_noinsert; \ 149 } \ 150 } \ 151 if (a->nonew == 1) goto a_noinsert; \ 152 else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 153 if (nrow >= rmax) { \ 154 /* there is no extra room in row, therefore enlarge */ \ 155 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 156 MatScalar *new_a; \ 157 \ 158 if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 159 \ 160 /* malloc new storage space */ \ 161 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 162 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 163 new_j = (int*)(new_a + bs2*new_nz); \ 164 new_i = new_j + new_nz; \ 165 \ 166 /* copy over old data into new slots */ \ 167 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 168 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 169 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 170 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 171 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 172 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 173 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 174 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 175 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 176 /* free up old matrix storage */ \ 177 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 178 if (!a->singlemalloc) { \ 179 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 180 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 181 } \ 182 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 183 a->singlemalloc = PETSC_TRUE; \ 184 \ 185 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 186 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 187 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 188 a->maxnz += bs2*CHUNKSIZE; \ 189 a->reallocs++; \ 190 a->nz++; \ 191 } \ 192 N = nrow++ - 1; \ 193 /* shift up all the later entries in this row */ \ 194 for (ii=N; ii>=_i; ii--) { \ 195 rp[ii+1] = rp[ii]; \ 196 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 197 } \ 198 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 199 rp[_i] = bcol; \ 200 ap[bs2*_i + bs*cidx + ridx] = value; \ 201 a_noinsert:; \ 202 ailen[brow] = nrow; \ 203 } 204 205 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 206 { \ 207 brow = row/bs; \ 208 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 209 rmax = bimax[brow]; nrow = bilen[brow]; \ 210 bcol = col/bs; \ 211 ridx = row % bs; cidx = col % bs; \ 212 low = 0; high = nrow; \ 213 while (high-low > 3) { \ 214 t = (low+high)/2; \ 215 if (rp[t] > bcol) high = t; \ 216 else low = t; \ 217 } \ 218 for (_i=low; _i<high; _i++) { \ 219 if (rp[_i] > bcol) break; \ 220 if (rp[_i] == bcol) { \ 221 bap = ap + bs2*_i + bs*cidx + ridx; \ 222 if (addv == ADD_VALUES) *bap += value; \ 223 else *bap = value; \ 224 goto b_noinsert; \ 225 } \ 226 } \ 227 if (b->nonew == 1) goto b_noinsert; \ 228 else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 229 if (nrow >= rmax) { \ 230 /* there is no extra room in row, therefore enlarge */ \ 231 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 232 MatScalar *new_a; \ 233 \ 234 if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 235 \ 236 /* malloc new storage space */ \ 237 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 238 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 239 new_j = (int*)(new_a + bs2*new_nz); \ 240 new_i = new_j + new_nz; \ 241 \ 242 /* copy over old data into new slots */ \ 243 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 244 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 245 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 246 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 247 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 248 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 249 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 250 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 251 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 252 /* free up old matrix storage */ \ 253 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 254 if (!b->singlemalloc) { \ 255 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 256 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 257 } \ 258 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 259 b->singlemalloc = PETSC_TRUE; \ 260 \ 261 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 262 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 263 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 264 b->maxnz += bs2*CHUNKSIZE; \ 265 b->reallocs++; \ 266 b->nz++; \ 267 } \ 268 N = nrow++ - 1; \ 269 /* shift up all the later entries in this row */ \ 270 for (ii=N; ii>=_i; ii--) { \ 271 rp[ii+1] = rp[ii]; \ 272 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 273 } \ 274 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 275 rp[_i] = bcol; \ 276 ap[bs2*_i + bs*cidx + ridx] = value; \ 277 b_noinsert:; \ 278 bilen[brow] = nrow; \ 279 } 280 281 #if defined(PETSC_USE_MAT_SINGLE) 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatSetValues_MPIBAIJ" 284 int MatSetValues_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 285 { 286 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 287 int ierr,i,N = m*n; 288 MatScalar *vsingle; 289 290 PetscFunctionBegin; 291 if (N > b->setvalueslen) { 292 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 293 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 294 b->setvalueslen = N; 295 } 296 vsingle = b->setvaluescopy; 297 298 for (i=0; i<N; i++) { 299 vsingle[i] = v[i]; 300 } 301 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 302 PetscFunctionReturn(0); 303 } 304 305 #undef __FUNCT__ 306 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 307 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 308 { 309 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 310 int ierr,i,N = m*n*b->bs2; 311 MatScalar *vsingle; 312 313 PetscFunctionBegin; 314 if (N > b->setvalueslen) { 315 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 316 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 317 b->setvalueslen = N; 318 } 319 vsingle = b->setvaluescopy; 320 for (i=0; i<N; i++) { 321 vsingle[i] = v[i]; 322 } 323 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 324 PetscFunctionReturn(0); 325 } 326 327 #undef __FUNCT__ 328 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 329 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 330 { 331 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 332 int ierr,i,N = m*n; 333 MatScalar *vsingle; 334 335 PetscFunctionBegin; 336 if (N > b->setvalueslen) { 337 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 338 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 339 b->setvalueslen = N; 340 } 341 vsingle = b->setvaluescopy; 342 for (i=0; i<N; i++) { 343 vsingle[i] = v[i]; 344 } 345 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 346 PetscFunctionReturn(0); 347 } 348 349 #undef __FUNCT__ 350 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 351 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 352 { 353 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 354 int ierr,i,N = m*n*b->bs2; 355 MatScalar *vsingle; 356 357 PetscFunctionBegin; 358 if (N > b->setvalueslen) { 359 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 360 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 361 b->setvalueslen = N; 362 } 363 vsingle = b->setvaluescopy; 364 for (i=0; i<N; i++) { 365 vsingle[i] = v[i]; 366 } 367 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 368 PetscFunctionReturn(0); 369 } 370 #endif 371 372 #undef __FUNCT__ 373 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 374 int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 375 { 376 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 377 MatScalar value; 378 PetscTruth roworiented = baij->roworiented; 379 int ierr,i,j,row,col; 380 int rstart_orig=baij->rstart_bs; 381 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 382 int cend_orig=baij->cend_bs,bs=baij->bs; 383 384 /* Some Variables required in the macro */ 385 Mat A = baij->A; 386 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 387 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 388 MatScalar *aa=a->a; 389 390 Mat B = baij->B; 391 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 392 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 393 MatScalar *ba=b->a; 394 395 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 396 int low,high,t,ridx,cidx,bs2=a->bs2; 397 MatScalar *ap,*bap; 398 399 PetscFunctionBegin; 400 for (i=0; i<m; i++) { 401 if (im[i] < 0) continue; 402 #if defined(PETSC_USE_BOPT_g) 403 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 404 #endif 405 if (im[i] >= rstart_orig && im[i] < rend_orig) { 406 row = im[i] - rstart_orig; 407 for (j=0; j<n; j++) { 408 if (in[j] >= cstart_orig && in[j] < cend_orig){ 409 col = in[j] - cstart_orig; 410 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 411 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 412 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 413 } else if (in[j] < 0) continue; 414 #if defined(PETSC_USE_BOPT_g) 415 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[i],mat->N-1);} 416 #endif 417 else { 418 if (mat->was_assembled) { 419 if (!baij->colmap) { 420 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 421 } 422 #if defined (PETSC_USE_CTABLE) 423 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 424 col = col - 1; 425 #else 426 col = baij->colmap[in[j]/bs] - 1; 427 #endif 428 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 429 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 430 col = in[j]; 431 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 432 B = baij->B; 433 b = (Mat_SeqBAIJ*)(B)->data; 434 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 435 ba=b->a; 436 } else col += in[j]%bs; 437 } else col = in[j]; 438 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 439 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 440 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 441 } 442 } 443 } else { 444 if (!baij->donotstash) { 445 if (roworiented) { 446 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 447 } else { 448 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 449 } 450 } 451 } 452 } 453 PetscFunctionReturn(0); 454 } 455 456 #undef __FUNCT__ 457 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 458 int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 459 { 460 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 461 const MatScalar *value; 462 MatScalar *barray=baij->barray; 463 PetscTruth roworiented = baij->roworiented; 464 int ierr,i,j,ii,jj,row,col,rstart=baij->rstart; 465 int rend=baij->rend,cstart=baij->cstart,stepval; 466 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 467 468 PetscFunctionBegin; 469 if(!barray) { 470 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 471 baij->barray = barray; 472 } 473 474 if (roworiented) { 475 stepval = (n-1)*bs; 476 } else { 477 stepval = (m-1)*bs; 478 } 479 for (i=0; i<m; i++) { 480 if (im[i] < 0) continue; 481 #if defined(PETSC_USE_BOPT_g) 482 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1); 483 #endif 484 if (im[i] >= rstart && im[i] < rend) { 485 row = im[i] - rstart; 486 for (j=0; j<n; j++) { 487 /* If NumCol = 1 then a copy is not required */ 488 if ((roworiented) && (n == 1)) { 489 barray = (MatScalar*)v + i*bs2; 490 } else if((!roworiented) && (m == 1)) { 491 barray = (MatScalar*)v + j*bs2; 492 } else { /* Here a copy is required */ 493 if (roworiented) { 494 value = v + i*(stepval+bs)*bs + j*bs; 495 } else { 496 value = v + j*(stepval+bs)*bs + i*bs; 497 } 498 for (ii=0; ii<bs; ii++,value+=stepval) { 499 for (jj=0; jj<bs; jj++) { 500 *barray++ = *value++; 501 } 502 } 503 barray -=bs2; 504 } 505 506 if (in[j] >= cstart && in[j] < cend){ 507 col = in[j] - cstart; 508 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 509 } 510 else if (in[j] < 0) continue; 511 #if defined(PETSC_USE_BOPT_g) 512 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);} 513 #endif 514 else { 515 if (mat->was_assembled) { 516 if (!baij->colmap) { 517 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 518 } 519 520 #if defined(PETSC_USE_BOPT_g) 521 #if defined (PETSC_USE_CTABLE) 522 { int data; 523 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 524 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 525 } 526 #else 527 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 528 #endif 529 #endif 530 #if defined (PETSC_USE_CTABLE) 531 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 532 col = (col - 1)/bs; 533 #else 534 col = (baij->colmap[in[j]] - 1)/bs; 535 #endif 536 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 537 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 538 col = in[j]; 539 } 540 } 541 else col = in[j]; 542 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 543 } 544 } 545 } else { 546 if (!baij->donotstash) { 547 if (roworiented) { 548 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 549 } else { 550 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 551 } 552 } 553 } 554 } 555 PetscFunctionReturn(0); 556 } 557 558 #define HASH_KEY 0.6180339887 559 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 560 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 561 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 562 #undef __FUNCT__ 563 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 564 int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 565 { 566 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 567 PetscTruth roworiented = baij->roworiented; 568 int ierr,i,j,row,col; 569 int rstart_orig=baij->rstart_bs; 570 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 571 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 572 PetscReal tmp; 573 MatScalar **HD = baij->hd,value; 574 #if defined(PETSC_USE_BOPT_g) 575 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 576 #endif 577 578 PetscFunctionBegin; 579 580 for (i=0; i<m; i++) { 581 #if defined(PETSC_USE_BOPT_g) 582 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 583 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 584 #endif 585 row = im[i]; 586 if (row >= rstart_orig && row < rend_orig) { 587 for (j=0; j<n; j++) { 588 col = in[j]; 589 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 590 /* Look up into the Hash Table */ 591 key = (row/bs)*Nbs+(col/bs)+1; 592 h1 = HASH(size,key,tmp); 593 594 595 idx = h1; 596 #if defined(PETSC_USE_BOPT_g) 597 insert_ct++; 598 total_ct++; 599 if (HT[idx] != key) { 600 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 601 if (idx == size) { 602 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 603 if (idx == h1) { 604 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col); 605 } 606 } 607 } 608 #else 609 if (HT[idx] != key) { 610 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 611 if (idx == size) { 612 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 613 if (idx == h1) { 614 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col); 615 } 616 } 617 } 618 #endif 619 /* A HASH table entry is found, so insert the values at the correct address */ 620 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 621 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 622 } 623 } else { 624 if (!baij->donotstash) { 625 if (roworiented) { 626 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 627 } else { 628 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 629 } 630 } 631 } 632 } 633 #if defined(PETSC_USE_BOPT_g) 634 baij->ht_total_ct = total_ct; 635 baij->ht_insert_ct = insert_ct; 636 #endif 637 PetscFunctionReturn(0); 638 } 639 640 #undef __FUNCT__ 641 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 642 int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 643 { 644 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 645 PetscTruth roworiented = baij->roworiented; 646 int ierr,i,j,ii,jj,row,col; 647 int rstart=baij->rstart ; 648 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 649 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 650 PetscReal tmp; 651 MatScalar **HD = baij->hd,*baij_a; 652 const MatScalar *v_t,*value; 653 #if defined(PETSC_USE_BOPT_g) 654 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 655 #endif 656 657 PetscFunctionBegin; 658 659 if (roworiented) { 660 stepval = (n-1)*bs; 661 } else { 662 stepval = (m-1)*bs; 663 } 664 for (i=0; i<m; i++) { 665 #if defined(PETSC_USE_BOPT_g) 666 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",im[i]); 667 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],baij->Mbs-1); 668 #endif 669 row = im[i]; 670 v_t = v + i*bs2; 671 if (row >= rstart && row < rend) { 672 for (j=0; j<n; j++) { 673 col = in[j]; 674 675 /* Look up into the Hash Table */ 676 key = row*Nbs+col+1; 677 h1 = HASH(size,key,tmp); 678 679 idx = h1; 680 #if defined(PETSC_USE_BOPT_g) 681 total_ct++; 682 insert_ct++; 683 if (HT[idx] != key) { 684 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 685 if (idx == size) { 686 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 687 if (idx == h1) { 688 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col); 689 } 690 } 691 } 692 #else 693 if (HT[idx] != key) { 694 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 695 if (idx == size) { 696 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 697 if (idx == h1) { 698 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col); 699 } 700 } 701 } 702 #endif 703 baij_a = HD[idx]; 704 if (roworiented) { 705 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 706 /* value = v + (i*(stepval+bs)+j)*bs; */ 707 value = v_t; 708 v_t += bs; 709 if (addv == ADD_VALUES) { 710 for (ii=0; ii<bs; ii++,value+=stepval) { 711 for (jj=ii; jj<bs2; jj+=bs) { 712 baij_a[jj] += *value++; 713 } 714 } 715 } else { 716 for (ii=0; ii<bs; ii++,value+=stepval) { 717 for (jj=ii; jj<bs2; jj+=bs) { 718 baij_a[jj] = *value++; 719 } 720 } 721 } 722 } else { 723 value = v + j*(stepval+bs)*bs + i*bs; 724 if (addv == ADD_VALUES) { 725 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 726 for (jj=0; jj<bs; jj++) { 727 baij_a[jj] += *value++; 728 } 729 } 730 } else { 731 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 732 for (jj=0; jj<bs; jj++) { 733 baij_a[jj] = *value++; 734 } 735 } 736 } 737 } 738 } 739 } else { 740 if (!baij->donotstash) { 741 if (roworiented) { 742 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 743 } else { 744 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 745 } 746 } 747 } 748 } 749 #if defined(PETSC_USE_BOPT_g) 750 baij->ht_total_ct = total_ct; 751 baij->ht_insert_ct = insert_ct; 752 #endif 753 PetscFunctionReturn(0); 754 } 755 756 #undef __FUNCT__ 757 #define __FUNCT__ "MatGetValues_MPIBAIJ" 758 int MatGetValues_MPIBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 759 { 760 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 761 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 762 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 763 764 PetscFunctionBegin; 765 for (i=0; i<m; i++) { 766 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 767 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 768 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 769 row = idxm[i] - bsrstart; 770 for (j=0; j<n; j++) { 771 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 772 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 773 if (idxn[j] >= bscstart && idxn[j] < bscend){ 774 col = idxn[j] - bscstart; 775 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 776 } else { 777 if (!baij->colmap) { 778 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 779 } 780 #if defined (PETSC_USE_CTABLE) 781 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 782 data --; 783 #else 784 data = baij->colmap[idxn[j]/bs]-1; 785 #endif 786 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 787 else { 788 col = data + idxn[j]%bs; 789 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 790 } 791 } 792 } 793 } else { 794 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 795 } 796 } 797 PetscFunctionReturn(0); 798 } 799 800 #undef __FUNCT__ 801 #define __FUNCT__ "MatNorm_MPIBAIJ" 802 int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 803 { 804 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 805 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 806 int ierr,i,bs2=baij->bs2; 807 PetscReal sum = 0.0; 808 MatScalar *v; 809 810 PetscFunctionBegin; 811 if (baij->size == 1) { 812 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 813 } else { 814 if (type == NORM_FROBENIUS) { 815 v = amat->a; 816 for (i=0; i<amat->nz*bs2; i++) { 817 #if defined(PETSC_USE_COMPLEX) 818 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 819 #else 820 sum += (*v)*(*v); v++; 821 #endif 822 } 823 v = bmat->a; 824 for (i=0; i<bmat->nz*bs2; i++) { 825 #if defined(PETSC_USE_COMPLEX) 826 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 827 #else 828 sum += (*v)*(*v); v++; 829 #endif 830 } 831 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 832 *nrm = sqrt(*nrm); 833 } else { 834 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 835 } 836 } 837 PetscFunctionReturn(0); 838 } 839 840 841 /* 842 Creates the hash table, and sets the table 843 This table is created only once. 844 If new entried need to be added to the matrix 845 then the hash table has to be destroyed and 846 recreated. 847 */ 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 850 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 851 { 852 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 853 Mat A = baij->A,B=baij->B; 854 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 855 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 856 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 857 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 858 int *HT,key; 859 MatScalar **HD; 860 PetscReal tmp; 861 #if defined(PETSC_USE_BOPT_g) 862 int ct=0,max=0; 863 #endif 864 865 PetscFunctionBegin; 866 baij->ht_size=(int)(factor*nz); 867 size = baij->ht_size; 868 869 if (baij->ht) { 870 PetscFunctionReturn(0); 871 } 872 873 /* Allocate Memory for Hash Table */ 874 ierr = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 875 baij->ht = (int*)(baij->hd + size); 876 HD = baij->hd; 877 HT = baij->ht; 878 879 880 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(PetscScalar*)));CHKERRQ(ierr); 881 882 883 /* Loop Over A */ 884 for (i=0; i<a->mbs; i++) { 885 for (j=ai[i]; j<ai[i+1]; j++) { 886 row = i+rstart; 887 col = aj[j]+cstart; 888 889 key = row*Nbs + col + 1; 890 h1 = HASH(size,key,tmp); 891 for (k=0; k<size; k++){ 892 if (HT[(h1+k)%size] == 0.0) { 893 HT[(h1+k)%size] = key; 894 HD[(h1+k)%size] = a->a + j*bs2; 895 break; 896 #if defined(PETSC_USE_BOPT_g) 897 } else { 898 ct++; 899 #endif 900 } 901 } 902 #if defined(PETSC_USE_BOPT_g) 903 if (k> max) max = k; 904 #endif 905 } 906 } 907 /* Loop Over B */ 908 for (i=0; i<b->mbs; i++) { 909 for (j=bi[i]; j<bi[i+1]; j++) { 910 row = i+rstart; 911 col = garray[bj[j]]; 912 key = row*Nbs + col + 1; 913 h1 = HASH(size,key,tmp); 914 for (k=0; k<size; k++){ 915 if (HT[(h1+k)%size] == 0.0) { 916 HT[(h1+k)%size] = key; 917 HD[(h1+k)%size] = b->a + j*bs2; 918 break; 919 #if defined(PETSC_USE_BOPT_g) 920 } else { 921 ct++; 922 #endif 923 } 924 } 925 #if defined(PETSC_USE_BOPT_g) 926 if (k> max) max = k; 927 #endif 928 } 929 } 930 931 /* Print Summary */ 932 #if defined(PETSC_USE_BOPT_g) 933 for (i=0,j=0; i<size; i++) { 934 if (HT[i]) {j++;} 935 } 936 PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max); 937 #endif 938 PetscFunctionReturn(0); 939 } 940 941 #undef __FUNCT__ 942 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 943 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 944 { 945 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 946 int ierr,nstash,reallocs; 947 InsertMode addv; 948 949 PetscFunctionBegin; 950 if (baij->donotstash) { 951 PetscFunctionReturn(0); 952 } 953 954 /* make sure all processors are either in INSERTMODE or ADDMODE */ 955 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 956 if (addv == (ADD_VALUES|INSERT_VALUES)) { 957 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 958 } 959 mat->insertmode = addv; /* in case this processor had no cache */ 960 961 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 962 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 963 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 964 PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 965 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 966 PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 967 PetscFunctionReturn(0); 968 } 969 970 #undef __FUNCT__ 971 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 972 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 973 { 974 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 975 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 976 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 977 int *row,*col,other_disassembled; 978 PetscTruth r1,r2,r3; 979 MatScalar *val; 980 InsertMode addv = mat->insertmode; 981 982 PetscFunctionBegin; 983 if (!baij->donotstash) { 984 while (1) { 985 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 986 if (!flg) break; 987 988 for (i=0; i<n;) { 989 /* Now identify the consecutive vals belonging to the same row */ 990 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 991 if (j < n) ncols = j-i; 992 else ncols = n-i; 993 /* Now assemble all these values with a single function call */ 994 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 995 i = j; 996 } 997 } 998 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 999 /* Now process the block-stash. Since the values are stashed column-oriented, 1000 set the roworiented flag to column oriented, and after MatSetValues() 1001 restore the original flags */ 1002 r1 = baij->roworiented; 1003 r2 = a->roworiented; 1004 r3 = b->roworiented; 1005 baij->roworiented = PETSC_FALSE; 1006 a->roworiented = PETSC_FALSE; 1007 b->roworiented = PETSC_FALSE; 1008 while (1) { 1009 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1010 if (!flg) break; 1011 1012 for (i=0; i<n;) { 1013 /* Now identify the consecutive vals belonging to the same row */ 1014 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1015 if (j < n) ncols = j-i; 1016 else ncols = n-i; 1017 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1018 i = j; 1019 } 1020 } 1021 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1022 baij->roworiented = r1; 1023 a->roworiented = r2; 1024 b->roworiented = r3; 1025 } 1026 1027 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1028 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1029 1030 /* determine if any processor has disassembled, if so we must 1031 also disassemble ourselfs, in order that we may reassemble. */ 1032 /* 1033 if nonzero structure of submatrix B cannot change then we know that 1034 no processor disassembled thus we can skip this stuff 1035 */ 1036 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1037 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1038 if (mat->was_assembled && !other_disassembled) { 1039 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1040 } 1041 } 1042 1043 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1044 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1045 } 1046 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1047 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1048 1049 #if defined(PETSC_USE_BOPT_g) 1050 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1051 PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 1052 baij->ht_total_ct = 0; 1053 baij->ht_insert_ct = 0; 1054 } 1055 #endif 1056 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1057 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1058 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1059 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1060 } 1061 1062 if (baij->rowvalues) { 1063 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1064 baij->rowvalues = 0; 1065 } 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1071 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1072 { 1073 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1074 int ierr,bs = baij->bs,size = baij->size,rank = baij->rank; 1075 PetscTruth isascii,isdraw; 1076 PetscViewer sviewer; 1077 PetscViewerFormat format; 1078 1079 PetscFunctionBegin; 1080 /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */ 1081 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1082 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1083 if (isascii) { 1084 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1085 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1086 MatInfo info; 1087 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1088 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1089 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 1090 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 1091 baij->bs,(int)info.memory);CHKERRQ(ierr); 1092 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1093 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1094 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1095 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1096 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1097 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1098 PetscFunctionReturn(0); 1099 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1100 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 1101 PetscFunctionReturn(0); 1102 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1103 PetscFunctionReturn(0); 1104 } 1105 } 1106 1107 if (isdraw) { 1108 PetscDraw draw; 1109 PetscTruth isnull; 1110 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1111 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1112 } 1113 1114 if (size == 1) { 1115 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 1116 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1117 } else { 1118 /* assemble the entire matrix onto first processor. */ 1119 Mat A; 1120 Mat_SeqBAIJ *Aloc; 1121 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1122 MatScalar *a; 1123 1124 if (!rank) { 1125 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1126 } else { 1127 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1128 } 1129 PetscLogObjectParent(mat,A); 1130 1131 /* copy over the A part */ 1132 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1133 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1134 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 1135 1136 for (i=0; i<mbs; i++) { 1137 rvals[0] = bs*(baij->rstart + i); 1138 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1139 for (j=ai[i]; j<ai[i+1]; j++) { 1140 col = (baij->cstart+aj[j])*bs; 1141 for (k=0; k<bs; k++) { 1142 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1143 col++; a += bs; 1144 } 1145 } 1146 } 1147 /* copy over the B part */ 1148 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1149 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1150 for (i=0; i<mbs; i++) { 1151 rvals[0] = bs*(baij->rstart + i); 1152 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1153 for (j=ai[i]; j<ai[i+1]; j++) { 1154 col = baij->garray[aj[j]]*bs; 1155 for (k=0; k<bs; k++) { 1156 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1157 col++; a += bs; 1158 } 1159 } 1160 } 1161 ierr = PetscFree(rvals);CHKERRQ(ierr); 1162 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1163 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1164 /* 1165 Everyone has to call to draw the matrix since the graphics waits are 1166 synchronized across all processors that share the PetscDraw object 1167 */ 1168 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1169 if (!rank) { 1170 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1171 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1172 } 1173 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1174 ierr = MatDestroy(A);CHKERRQ(ierr); 1175 } 1176 PetscFunctionReturn(0); 1177 } 1178 1179 #undef __FUNCT__ 1180 #define __FUNCT__ "MatView_MPIBAIJ" 1181 int MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1182 { 1183 int ierr; 1184 PetscTruth isascii,isdraw,issocket,isbinary; 1185 1186 PetscFunctionBegin; 1187 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1188 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1189 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1190 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1191 if (isascii || isdraw || issocket || isbinary) { 1192 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1193 } else { 1194 SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1195 } 1196 PetscFunctionReturn(0); 1197 } 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1201 int MatDestroy_MPIBAIJ(Mat mat) 1202 { 1203 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1204 int ierr; 1205 1206 PetscFunctionBegin; 1207 #if defined(PETSC_USE_LOG) 1208 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 1209 #endif 1210 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1211 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1212 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1213 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1214 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1215 #if defined (PETSC_USE_CTABLE) 1216 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1217 #else 1218 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1219 #endif 1220 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1221 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1222 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1223 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1224 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1225 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1226 #if defined(PETSC_USE_MAT_SINGLE) 1227 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 1228 #endif 1229 ierr = PetscFree(baij);CHKERRQ(ierr); 1230 PetscFunctionReturn(0); 1231 } 1232 1233 #undef __FUNCT__ 1234 #define __FUNCT__ "MatMult_MPIBAIJ" 1235 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1236 { 1237 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1238 int ierr,nt; 1239 1240 PetscFunctionBegin; 1241 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1242 if (nt != A->n) { 1243 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1244 } 1245 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1246 if (nt != A->m) { 1247 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1248 } 1249 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1250 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1251 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1252 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1253 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1254 PetscFunctionReturn(0); 1255 } 1256 1257 #undef __FUNCT__ 1258 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1259 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1260 { 1261 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1262 int ierr; 1263 1264 PetscFunctionBegin; 1265 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1266 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1267 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1268 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1269 PetscFunctionReturn(0); 1270 } 1271 1272 #undef __FUNCT__ 1273 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1274 int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1275 { 1276 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1277 int ierr; 1278 1279 PetscFunctionBegin; 1280 /* do nondiagonal part */ 1281 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1282 /* send it on its way */ 1283 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1284 /* do local part */ 1285 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1286 /* receive remote parts: note this assumes the values are not actually */ 1287 /* inserted in yy until the next line, which is true for my implementation*/ 1288 /* but is not perhaps always true. */ 1289 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1290 PetscFunctionReturn(0); 1291 } 1292 1293 #undef __FUNCT__ 1294 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1295 int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1296 { 1297 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1298 int ierr; 1299 1300 PetscFunctionBegin; 1301 /* do nondiagonal part */ 1302 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1303 /* send it on its way */ 1304 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1305 /* do local part */ 1306 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1307 /* receive remote parts: note this assumes the values are not actually */ 1308 /* inserted in yy until the next line, which is true for my implementation*/ 1309 /* but is not perhaps always true. */ 1310 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1311 PetscFunctionReturn(0); 1312 } 1313 1314 /* 1315 This only works correctly for square matrices where the subblock A->A is the 1316 diagonal block 1317 */ 1318 #undef __FUNCT__ 1319 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1320 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1321 { 1322 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1323 int ierr; 1324 1325 PetscFunctionBegin; 1326 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1327 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1328 PetscFunctionReturn(0); 1329 } 1330 1331 #undef __FUNCT__ 1332 #define __FUNCT__ "MatScale_MPIBAIJ" 1333 int MatScale_MPIBAIJ(const PetscScalar *aa,Mat A) 1334 { 1335 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1336 int ierr; 1337 1338 PetscFunctionBegin; 1339 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1340 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1341 PetscFunctionReturn(0); 1342 } 1343 1344 #undef __FUNCT__ 1345 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1346 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1347 { 1348 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1349 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1350 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1351 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1352 int *cmap,*idx_p,cstart = mat->cstart; 1353 1354 PetscFunctionBegin; 1355 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1356 mat->getrowactive = PETSC_TRUE; 1357 1358 if (!mat->rowvalues && (idx || v)) { 1359 /* 1360 allocate enough space to hold information from the longest row. 1361 */ 1362 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1363 int max = 1,mbs = mat->mbs,tmp; 1364 for (i=0; i<mbs; i++) { 1365 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1366 if (max < tmp) { max = tmp; } 1367 } 1368 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1369 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1370 } 1371 1372 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1373 lrow = row - brstart; 1374 1375 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1376 if (!v) {pvA = 0; pvB = 0;} 1377 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1378 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1379 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1380 nztot = nzA + nzB; 1381 1382 cmap = mat->garray; 1383 if (v || idx) { 1384 if (nztot) { 1385 /* Sort by increasing column numbers, assuming A and B already sorted */ 1386 int imark = -1; 1387 if (v) { 1388 *v = v_p = mat->rowvalues; 1389 for (i=0; i<nzB; i++) { 1390 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1391 else break; 1392 } 1393 imark = i; 1394 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1395 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1396 } 1397 if (idx) { 1398 *idx = idx_p = mat->rowindices; 1399 if (imark > -1) { 1400 for (i=0; i<imark; i++) { 1401 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1402 } 1403 } else { 1404 for (i=0; i<nzB; i++) { 1405 if (cmap[cworkB[i]/bs] < cstart) 1406 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1407 else break; 1408 } 1409 imark = i; 1410 } 1411 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1412 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1413 } 1414 } else { 1415 if (idx) *idx = 0; 1416 if (v) *v = 0; 1417 } 1418 } 1419 *nz = nztot; 1420 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1421 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1422 PetscFunctionReturn(0); 1423 } 1424 1425 #undef __FUNCT__ 1426 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1427 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1428 { 1429 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1430 1431 PetscFunctionBegin; 1432 if (baij->getrowactive == PETSC_FALSE) { 1433 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1434 } 1435 baij->getrowactive = PETSC_FALSE; 1436 PetscFunctionReturn(0); 1437 } 1438 1439 #undef __FUNCT__ 1440 #define __FUNCT__ "MatGetBlockSize_MPIBAIJ" 1441 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1442 { 1443 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1444 1445 PetscFunctionBegin; 1446 *bs = baij->bs; 1447 PetscFunctionReturn(0); 1448 } 1449 1450 #undef __FUNCT__ 1451 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1452 int MatZeroEntries_MPIBAIJ(Mat A) 1453 { 1454 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1455 int ierr; 1456 1457 PetscFunctionBegin; 1458 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1459 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1460 PetscFunctionReturn(0); 1461 } 1462 1463 #undef __FUNCT__ 1464 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1465 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1466 { 1467 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1468 Mat A = a->A,B = a->B; 1469 int ierr; 1470 PetscReal isend[5],irecv[5]; 1471 1472 PetscFunctionBegin; 1473 info->block_size = (PetscReal)a->bs; 1474 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1475 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1476 isend[3] = info->memory; isend[4] = info->mallocs; 1477 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1478 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1479 isend[3] += info->memory; isend[4] += info->mallocs; 1480 if (flag == MAT_LOCAL) { 1481 info->nz_used = isend[0]; 1482 info->nz_allocated = isend[1]; 1483 info->nz_unneeded = isend[2]; 1484 info->memory = isend[3]; 1485 info->mallocs = isend[4]; 1486 } else if (flag == MAT_GLOBAL_MAX) { 1487 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1488 info->nz_used = irecv[0]; 1489 info->nz_allocated = irecv[1]; 1490 info->nz_unneeded = irecv[2]; 1491 info->memory = irecv[3]; 1492 info->mallocs = irecv[4]; 1493 } else if (flag == MAT_GLOBAL_SUM) { 1494 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1495 info->nz_used = irecv[0]; 1496 info->nz_allocated = irecv[1]; 1497 info->nz_unneeded = irecv[2]; 1498 info->memory = irecv[3]; 1499 info->mallocs = irecv[4]; 1500 } else { 1501 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1502 } 1503 info->rows_global = (PetscReal)A->M; 1504 info->columns_global = (PetscReal)A->N; 1505 info->rows_local = (PetscReal)A->m; 1506 info->columns_local = (PetscReal)A->N; 1507 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1508 info->fill_ratio_needed = 0; 1509 info->factor_mallocs = 0; 1510 PetscFunctionReturn(0); 1511 } 1512 1513 #undef __FUNCT__ 1514 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1515 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1516 { 1517 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1518 int ierr; 1519 1520 PetscFunctionBegin; 1521 switch (op) { 1522 case MAT_NO_NEW_NONZERO_LOCATIONS: 1523 case MAT_YES_NEW_NONZERO_LOCATIONS: 1524 case MAT_COLUMNS_UNSORTED: 1525 case MAT_COLUMNS_SORTED: 1526 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1527 case MAT_KEEP_ZEROED_ROWS: 1528 case MAT_NEW_NONZERO_LOCATION_ERR: 1529 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1530 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1531 break; 1532 case MAT_ROW_ORIENTED: 1533 a->roworiented = PETSC_TRUE; 1534 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1535 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1536 break; 1537 case MAT_ROWS_SORTED: 1538 case MAT_ROWS_UNSORTED: 1539 case MAT_YES_NEW_DIAGONALS: 1540 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1541 break; 1542 case MAT_COLUMN_ORIENTED: 1543 a->roworiented = PETSC_FALSE; 1544 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1545 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1546 break; 1547 case MAT_IGNORE_OFF_PROC_ENTRIES: 1548 a->donotstash = PETSC_TRUE; 1549 break; 1550 case MAT_NO_NEW_DIAGONALS: 1551 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1552 case MAT_USE_HASH_TABLE: 1553 a->ht_flag = PETSC_TRUE; 1554 break; 1555 case MAT_SYMMETRIC: 1556 case MAT_STRUCTURALLY_SYMMETRIC: 1557 case MAT_NOT_SYMMETRIC: 1558 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1559 case MAT_HERMITIAN: 1560 case MAT_NOT_HERMITIAN: 1561 case MAT_SYMMETRY_ETERNAL: 1562 case MAT_NOT_SYMMETRY_ETERNAL: 1563 break; 1564 default: 1565 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1566 } 1567 PetscFunctionReturn(0); 1568 } 1569 1570 #undef __FUNCT__ 1571 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1572 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1573 { 1574 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1575 Mat_SeqBAIJ *Aloc; 1576 Mat B; 1577 int ierr,M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col; 1578 int bs=baij->bs,mbs=baij->mbs; 1579 MatScalar *a; 1580 1581 PetscFunctionBegin; 1582 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1583 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1584 1585 /* copy over the A part */ 1586 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1587 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1588 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 1589 1590 for (i=0; i<mbs; i++) { 1591 rvals[0] = bs*(baij->rstart + i); 1592 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1593 for (j=ai[i]; j<ai[i+1]; j++) { 1594 col = (baij->cstart+aj[j])*bs; 1595 for (k=0; k<bs; k++) { 1596 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1597 col++; a += bs; 1598 } 1599 } 1600 } 1601 /* copy over the B part */ 1602 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1603 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1604 for (i=0; i<mbs; i++) { 1605 rvals[0] = bs*(baij->rstart + i); 1606 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1607 for (j=ai[i]; j<ai[i+1]; j++) { 1608 col = baij->garray[aj[j]]*bs; 1609 for (k=0; k<bs; k++) { 1610 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1611 col++; a += bs; 1612 } 1613 } 1614 } 1615 ierr = PetscFree(rvals);CHKERRQ(ierr); 1616 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1617 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1618 1619 if (matout) { 1620 *matout = B; 1621 } else { 1622 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1623 } 1624 PetscFunctionReturn(0); 1625 } 1626 1627 #undef __FUNCT__ 1628 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1629 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1630 { 1631 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1632 Mat a = baij->A,b = baij->B; 1633 int ierr,s1,s2,s3; 1634 1635 PetscFunctionBegin; 1636 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1637 if (rr) { 1638 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1639 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1640 /* Overlap communication with computation. */ 1641 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1642 } 1643 if (ll) { 1644 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1645 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1646 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1647 } 1648 /* scale the diagonal block */ 1649 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1650 1651 if (rr) { 1652 /* Do a scatter end and then right scale the off-diagonal block */ 1653 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1654 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1655 } 1656 1657 PetscFunctionReturn(0); 1658 } 1659 1660 #undef __FUNCT__ 1661 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1662 int MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag) 1663 { 1664 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1665 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1666 int *nprocs,j,idx,nsends,row; 1667 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1668 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1669 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1670 MPI_Comm comm = A->comm; 1671 MPI_Request *send_waits,*recv_waits; 1672 MPI_Status recv_status,*send_status; 1673 IS istmp; 1674 PetscTruth found; 1675 1676 PetscFunctionBegin; 1677 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 1678 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1679 1680 /* first count number of contributors to each processor */ 1681 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 1682 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1683 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 1684 for (i=0; i<N; i++) { 1685 idx = rows[i]; 1686 found = PETSC_FALSE; 1687 for (j=0; j<size; j++) { 1688 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1689 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 1690 } 1691 } 1692 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1693 } 1694 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1695 1696 /* inform other processors of number of messages and max length*/ 1697 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1698 1699 /* post receives: */ 1700 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 1701 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1702 for (i=0; i<nrecvs; i++) { 1703 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1704 } 1705 1706 /* do sends: 1707 1) starts[i] gives the starting index in svalues for stuff going to 1708 the ith processor 1709 */ 1710 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 1711 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1712 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 1713 starts[0] = 0; 1714 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1715 for (i=0; i<N; i++) { 1716 svalues[starts[owner[i]]++] = rows[i]; 1717 } 1718 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1719 1720 starts[0] = 0; 1721 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1722 count = 0; 1723 for (i=0; i<size; i++) { 1724 if (nprocs[2*i+1]) { 1725 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1726 } 1727 } 1728 ierr = PetscFree(starts);CHKERRQ(ierr); 1729 1730 base = owners[rank]*bs; 1731 1732 /* wait on receives */ 1733 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 1734 source = lens + nrecvs; 1735 count = nrecvs; slen = 0; 1736 while (count) { 1737 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1738 /* unpack receives into our local space */ 1739 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1740 source[imdex] = recv_status.MPI_SOURCE; 1741 lens[imdex] = n; 1742 slen += n; 1743 count--; 1744 } 1745 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1746 1747 /* move the data into the send scatter */ 1748 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 1749 count = 0; 1750 for (i=0; i<nrecvs; i++) { 1751 values = rvalues + i*nmax; 1752 for (j=0; j<lens[i]; j++) { 1753 lrows[count++] = values[j] - base; 1754 } 1755 } 1756 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1757 ierr = PetscFree(lens);CHKERRQ(ierr); 1758 ierr = PetscFree(owner);CHKERRQ(ierr); 1759 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1760 1761 /* actually zap the local rows */ 1762 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1763 PetscLogObjectParent(A,istmp); 1764 1765 /* 1766 Zero the required rows. If the "diagonal block" of the matrix 1767 is square and the user wishes to set the diagonal we use seperate 1768 code so that MatSetValues() is not called for each diagonal allocating 1769 new memory, thus calling lots of mallocs and slowing things down. 1770 1771 Contributed by: Mathew Knepley 1772 */ 1773 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1774 ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1775 if (diag && (l->A->M == l->A->N)) { 1776 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1777 } else if (diag) { 1778 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1779 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1780 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1781 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1782 } 1783 for (i=0; i<slen; i++) { 1784 row = lrows[i] + rstart_bs; 1785 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1786 } 1787 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1788 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1789 } else { 1790 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1791 } 1792 1793 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1794 ierr = PetscFree(lrows);CHKERRQ(ierr); 1795 1796 /* wait on sends */ 1797 if (nsends) { 1798 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1799 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1800 ierr = PetscFree(send_status);CHKERRQ(ierr); 1801 } 1802 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1803 ierr = PetscFree(svalues);CHKERRQ(ierr); 1804 1805 PetscFunctionReturn(0); 1806 } 1807 1808 #undef __FUNCT__ 1809 #define __FUNCT__ "MatPrintHelp_MPIBAIJ" 1810 int MatPrintHelp_MPIBAIJ(Mat A) 1811 { 1812 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1813 MPI_Comm comm = A->comm; 1814 static int called = 0; 1815 int ierr; 1816 1817 PetscFunctionBegin; 1818 if (!a->rank) { 1819 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1820 } 1821 if (called) {PetscFunctionReturn(0);} else called = 1; 1822 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1823 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1824 PetscFunctionReturn(0); 1825 } 1826 1827 #undef __FUNCT__ 1828 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1829 int MatSetUnfactored_MPIBAIJ(Mat A) 1830 { 1831 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1832 int ierr; 1833 1834 PetscFunctionBegin; 1835 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1836 PetscFunctionReturn(0); 1837 } 1838 1839 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1840 1841 #undef __FUNCT__ 1842 #define __FUNCT__ "MatEqual_MPIBAIJ" 1843 int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1844 { 1845 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1846 Mat a,b,c,d; 1847 PetscTruth flg; 1848 int ierr; 1849 1850 PetscFunctionBegin; 1851 a = matA->A; b = matA->B; 1852 c = matB->A; d = matB->B; 1853 1854 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1855 if (flg == PETSC_TRUE) { 1856 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1857 } 1858 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1859 PetscFunctionReturn(0); 1860 } 1861 1862 1863 #undef __FUNCT__ 1864 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1865 int MatSetUpPreallocation_MPIBAIJ(Mat A) 1866 { 1867 int ierr; 1868 1869 PetscFunctionBegin; 1870 ierr = MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1871 PetscFunctionReturn(0); 1872 } 1873 1874 /* -------------------------------------------------------------------*/ 1875 static struct _MatOps MatOps_Values = { 1876 MatSetValues_MPIBAIJ, 1877 MatGetRow_MPIBAIJ, 1878 MatRestoreRow_MPIBAIJ, 1879 MatMult_MPIBAIJ, 1880 /* 4*/ MatMultAdd_MPIBAIJ, 1881 MatMultTranspose_MPIBAIJ, 1882 MatMultTransposeAdd_MPIBAIJ, 1883 0, 1884 0, 1885 0, 1886 /*10*/ 0, 1887 0, 1888 0, 1889 0, 1890 MatTranspose_MPIBAIJ, 1891 /*15*/ MatGetInfo_MPIBAIJ, 1892 MatEqual_MPIBAIJ, 1893 MatGetDiagonal_MPIBAIJ, 1894 MatDiagonalScale_MPIBAIJ, 1895 MatNorm_MPIBAIJ, 1896 /*20*/ MatAssemblyBegin_MPIBAIJ, 1897 MatAssemblyEnd_MPIBAIJ, 1898 0, 1899 MatSetOption_MPIBAIJ, 1900 MatZeroEntries_MPIBAIJ, 1901 /*25*/ MatZeroRows_MPIBAIJ, 1902 0, 1903 0, 1904 0, 1905 0, 1906 /*30*/ MatSetUpPreallocation_MPIBAIJ, 1907 0, 1908 0, 1909 0, 1910 0, 1911 /*35*/ MatDuplicate_MPIBAIJ, 1912 0, 1913 0, 1914 0, 1915 0, 1916 /*40*/ 0, 1917 MatGetSubMatrices_MPIBAIJ, 1918 MatIncreaseOverlap_MPIBAIJ, 1919 MatGetValues_MPIBAIJ, 1920 0, 1921 /*45*/ MatPrintHelp_MPIBAIJ, 1922 MatScale_MPIBAIJ, 1923 0, 1924 0, 1925 0, 1926 /*50*/ MatGetBlockSize_MPIBAIJ, 1927 0, 1928 0, 1929 0, 1930 0, 1931 /*55*/ 0, 1932 0, 1933 MatSetUnfactored_MPIBAIJ, 1934 0, 1935 MatSetValuesBlocked_MPIBAIJ, 1936 /*60*/ 0, 1937 MatDestroy_MPIBAIJ, 1938 MatView_MPIBAIJ, 1939 MatGetPetscMaps_Petsc, 1940 0, 1941 /*65*/ 0, 1942 0, 1943 0, 1944 0, 1945 0, 1946 /*70*/ MatGetRowMax_MPIBAIJ, 1947 0, 1948 0, 1949 0, 1950 0, 1951 /*75*/ 0, 1952 0, 1953 0, 1954 0, 1955 0, 1956 /*80*/ 0, 1957 0, 1958 0, 1959 0, 1960 /*85*/ MatLoad_MPIBAIJ 1961 }; 1962 1963 1964 EXTERN_C_BEGIN 1965 #undef __FUNCT__ 1966 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 1967 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1968 { 1969 PetscFunctionBegin; 1970 *a = ((Mat_MPIBAIJ *)A->data)->A; 1971 *iscopy = PETSC_FALSE; 1972 PetscFunctionReturn(0); 1973 } 1974 EXTERN_C_END 1975 1976 EXTERN_C_BEGIN 1977 extern int MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,Mat*); 1978 EXTERN_C_END 1979 1980 EXTERN_C_BEGIN 1981 #undef __FUNCT__ 1982 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 1983 int MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1984 { 1985 Mat_MPIBAIJ *b; 1986 int ierr,i; 1987 1988 PetscFunctionBegin; 1989 B->preallocated = PETSC_TRUE; 1990 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1991 1992 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1993 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1994 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1995 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1996 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1997 if (d_nnz) { 1998 for (i=0; i<B->m/bs; i++) { 1999 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 2000 } 2001 } 2002 if (o_nnz) { 2003 for (i=0; i<B->m/bs; i++) { 2004 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 2005 } 2006 } 2007 2008 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 2009 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 2010 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 2011 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 2012 2013 b = (Mat_MPIBAIJ*)B->data; 2014 b->bs = bs; 2015 b->bs2 = bs*bs; 2016 b->mbs = B->m/bs; 2017 b->nbs = B->n/bs; 2018 b->Mbs = B->M/bs; 2019 b->Nbs = B->N/bs; 2020 2021 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 2022 b->rowners[0] = 0; 2023 for (i=2; i<=b->size; i++) { 2024 b->rowners[i] += b->rowners[i-1]; 2025 } 2026 b->rstart = b->rowners[b->rank]; 2027 b->rend = b->rowners[b->rank+1]; 2028 2029 ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 2030 b->cowners[0] = 0; 2031 for (i=2; i<=b->size; i++) { 2032 b->cowners[i] += b->cowners[i-1]; 2033 } 2034 b->cstart = b->cowners[b->rank]; 2035 b->cend = b->cowners[b->rank+1]; 2036 2037 for (i=0; i<=b->size; i++) { 2038 b->rowners_bs[i] = b->rowners[i]*bs; 2039 } 2040 b->rstart_bs = b->rstart*bs; 2041 b->rend_bs = b->rend*bs; 2042 b->cstart_bs = b->cstart*bs; 2043 b->cend_bs = b->cend*bs; 2044 2045 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 2046 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2047 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2048 PetscLogObjectParent(B,b->A); 2049 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 2050 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2051 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2052 PetscLogObjectParent(B,b->B); 2053 2054 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2055 2056 PetscFunctionReturn(0); 2057 } 2058 EXTERN_C_END 2059 2060 EXTERN_C_BEGIN 2061 EXTERN int MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2062 EXTERN int MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2063 EXTERN_C_END 2064 2065 /*MC 2066 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2067 2068 Options Database Keys: 2069 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2070 2071 Level: beginner 2072 2073 .seealso: MatCreateMPIBAIJ 2074 M*/ 2075 2076 EXTERN_C_BEGIN 2077 #undef __FUNCT__ 2078 #define __FUNCT__ "MatCreate_MPIBAIJ" 2079 int MatCreate_MPIBAIJ(Mat B) 2080 { 2081 Mat_MPIBAIJ *b; 2082 int ierr; 2083 PetscTruth flg; 2084 2085 PetscFunctionBegin; 2086 ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2087 B->data = (void*)b; 2088 2089 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2090 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2091 B->mapping = 0; 2092 B->factor = 0; 2093 B->assembled = PETSC_FALSE; 2094 2095 B->insertmode = NOT_SET_VALUES; 2096 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2097 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2098 2099 /* build local table of row and column ownerships */ 2100 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 2101 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2102 b->cowners = b->rowners + b->size + 2; 2103 b->rowners_bs = b->cowners + b->size + 2; 2104 2105 /* build cache for off array entries formed */ 2106 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2107 b->donotstash = PETSC_FALSE; 2108 b->colmap = PETSC_NULL; 2109 b->garray = PETSC_NULL; 2110 b->roworiented = PETSC_TRUE; 2111 2112 #if defined(PETSC_USE_MAT_SINGLE) 2113 /* stuff for MatSetValues_XXX in single precision */ 2114 b->setvalueslen = 0; 2115 b->setvaluescopy = PETSC_NULL; 2116 #endif 2117 2118 /* stuff used in block assembly */ 2119 b->barray = 0; 2120 2121 /* stuff used for matrix vector multiply */ 2122 b->lvec = 0; 2123 b->Mvctx = 0; 2124 2125 /* stuff for MatGetRow() */ 2126 b->rowindices = 0; 2127 b->rowvalues = 0; 2128 b->getrowactive = PETSC_FALSE; 2129 2130 /* hash table stuff */ 2131 b->ht = 0; 2132 b->hd = 0; 2133 b->ht_size = 0; 2134 b->ht_flag = PETSC_FALSE; 2135 b->ht_fact = 0; 2136 b->ht_total_ct = 0; 2137 b->ht_insert_ct = 0; 2138 2139 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2140 if (flg) { 2141 PetscReal fact = 1.39; 2142 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2143 ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2144 if (fact <= 1.0) fact = 1.39; 2145 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2146 PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2147 } 2148 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2149 "MatStoreValues_MPIBAIJ", 2150 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2151 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2152 "MatRetrieveValues_MPIBAIJ", 2153 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2154 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2155 "MatGetDiagonalBlock_MPIBAIJ", 2156 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2157 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2158 "MatMPIBAIJSetPreallocation_MPIBAIJ", 2159 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2160 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 2161 "MatDiagonalScaleLocal_MPIBAIJ", 2162 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2163 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 2164 "MatSetHashTableFactor_MPIBAIJ", 2165 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2166 PetscFunctionReturn(0); 2167 } 2168 EXTERN_C_END 2169 2170 /*MC 2171 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2172 2173 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2174 and MATMPIBAIJ otherwise. 2175 2176 Options Database Keys: 2177 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2178 2179 Level: beginner 2180 2181 .seealso: MatCreateMPIBAIJ,MATSEQBAIJ,MATMPIBAIJ 2182 M*/ 2183 2184 EXTERN_C_BEGIN 2185 #undef __FUNCT__ 2186 #define __FUNCT__ "MatCreate_BAIJ" 2187 int MatCreate_BAIJ(Mat A) { 2188 int ierr,size; 2189 2190 PetscFunctionBegin; 2191 ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr); 2192 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2193 if (size == 1) { 2194 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2195 } else { 2196 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2197 } 2198 PetscFunctionReturn(0); 2199 } 2200 EXTERN_C_END 2201 2202 #undef __FUNCT__ 2203 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2204 /*@C 2205 MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format 2206 (block compressed row). For good matrix assembly performance 2207 the user should preallocate the matrix storage by setting the parameters 2208 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2209 performance can be increased by more than a factor of 50. 2210 2211 Collective on Mat 2212 2213 Input Parameters: 2214 + A - the matrix 2215 . bs - size of blockk 2216 . d_nz - number of block nonzeros per block row in diagonal portion of local 2217 submatrix (same for all local rows) 2218 . d_nnz - array containing the number of block nonzeros in the various block rows 2219 of the in diagonal portion of the local (possibly different for each block 2220 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2221 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2222 submatrix (same for all local rows). 2223 - o_nnz - array containing the number of nonzeros in the various block rows of the 2224 off-diagonal portion of the local submatrix (possibly different for 2225 each block row) or PETSC_NULL. 2226 2227 Output Parameter: 2228 2229 2230 Options Database Keys: 2231 . -mat_no_unroll - uses code that does not unroll the loops in the 2232 block calculations (much slower) 2233 . -mat_block_size - size of the blocks to use 2234 2235 Notes: 2236 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2237 than it must be used on all processors that share the object for that argument. 2238 2239 Storage Information: 2240 For a square global matrix we define each processor's diagonal portion 2241 to be its local rows and the corresponding columns (a square submatrix); 2242 each processor's off-diagonal portion encompasses the remainder of the 2243 local matrix (a rectangular submatrix). 2244 2245 The user can specify preallocated storage for the diagonal part of 2246 the local submatrix with either d_nz or d_nnz (not both). Set 2247 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2248 memory allocation. Likewise, specify preallocated storage for the 2249 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2250 2251 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2252 the figure below we depict these three local rows and all columns (0-11). 2253 2254 .vb 2255 0 1 2 3 4 5 6 7 8 9 10 11 2256 ------------------- 2257 row 3 | o o o d d d o o o o o o 2258 row 4 | o o o d d d o o o o o o 2259 row 5 | o o o d d d o o o o o o 2260 ------------------- 2261 .ve 2262 2263 Thus, any entries in the d locations are stored in the d (diagonal) 2264 submatrix, and any entries in the o locations are stored in the 2265 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2266 stored simply in the MATSEQBAIJ format for compressed row storage. 2267 2268 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2269 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2270 In general, for PDE problems in which most nonzeros are near the diagonal, 2271 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2272 or you will get TERRIBLE performance; see the users' manual chapter on 2273 matrices. 2274 2275 Level: intermediate 2276 2277 .keywords: matrix, block, aij, compressed row, sparse, parallel 2278 2279 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2280 @*/ 2281 int MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2282 { 2283 int ierr,(*f)(Mat,int,int,const int[],int,const int[]); 2284 2285 PetscFunctionBegin; 2286 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2287 if (f) { 2288 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2289 } 2290 PetscFunctionReturn(0); 2291 } 2292 2293 #undef __FUNCT__ 2294 #define __FUNCT__ "MatCreateMPIBAIJ" 2295 /*@C 2296 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2297 (block compressed row). For good matrix assembly performance 2298 the user should preallocate the matrix storage by setting the parameters 2299 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2300 performance can be increased by more than a factor of 50. 2301 2302 Collective on MPI_Comm 2303 2304 Input Parameters: 2305 + comm - MPI communicator 2306 . bs - size of blockk 2307 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2308 This value should be the same as the local size used in creating the 2309 y vector for the matrix-vector product y = Ax. 2310 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2311 This value should be the same as the local size used in creating the 2312 x vector for the matrix-vector product y = Ax. 2313 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2314 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2315 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2316 submatrix (same for all local rows) 2317 . d_nnz - array containing the number of nonzero blocks in the various block rows 2318 of the in diagonal portion of the local (possibly different for each block 2319 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2320 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2321 submatrix (same for all local rows). 2322 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2323 off-diagonal portion of the local submatrix (possibly different for 2324 each block row) or PETSC_NULL. 2325 2326 Output Parameter: 2327 . A - the matrix 2328 2329 Options Database Keys: 2330 . -mat_no_unroll - uses code that does not unroll the loops in the 2331 block calculations (much slower) 2332 . -mat_block_size - size of the blocks to use 2333 2334 Notes: 2335 A nonzero block is any block that as 1 or more nonzeros in it 2336 2337 The user MUST specify either the local or global matrix dimensions 2338 (possibly both). 2339 2340 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2341 than it must be used on all processors that share the object for that argument. 2342 2343 Storage Information: 2344 For a square global matrix we define each processor's diagonal portion 2345 to be its local rows and the corresponding columns (a square submatrix); 2346 each processor's off-diagonal portion encompasses the remainder of the 2347 local matrix (a rectangular submatrix). 2348 2349 The user can specify preallocated storage for the diagonal part of 2350 the local submatrix with either d_nz or d_nnz (not both). Set 2351 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2352 memory allocation. Likewise, specify preallocated storage for the 2353 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2354 2355 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2356 the figure below we depict these three local rows and all columns (0-11). 2357 2358 .vb 2359 0 1 2 3 4 5 6 7 8 9 10 11 2360 ------------------- 2361 row 3 | o o o d d d o o o o o o 2362 row 4 | o o o d d d o o o o o o 2363 row 5 | o o o d d d o o o o o o 2364 ------------------- 2365 .ve 2366 2367 Thus, any entries in the d locations are stored in the d (diagonal) 2368 submatrix, and any entries in the o locations are stored in the 2369 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2370 stored simply in the MATSEQBAIJ format for compressed row storage. 2371 2372 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2373 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2374 In general, for PDE problems in which most nonzeros are near the diagonal, 2375 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2376 or you will get TERRIBLE performance; see the users' manual chapter on 2377 matrices. 2378 2379 Level: intermediate 2380 2381 .keywords: matrix, block, aij, compressed row, sparse, parallel 2382 2383 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2384 @*/ 2385 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2386 { 2387 int ierr,size; 2388 2389 PetscFunctionBegin; 2390 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2391 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2392 if (size > 1) { 2393 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2394 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2395 } else { 2396 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2397 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2398 } 2399 PetscFunctionReturn(0); 2400 } 2401 2402 #undef __FUNCT__ 2403 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 2404 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2405 { 2406 Mat mat; 2407 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2408 int ierr,len=0; 2409 2410 PetscFunctionBegin; 2411 *newmat = 0; 2412 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2413 ierr = MatSetType(mat,MATMPIBAIJ);CHKERRQ(ierr); 2414 2415 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2416 mat->factor = matin->factor; 2417 mat->preallocated = PETSC_TRUE; 2418 mat->assembled = PETSC_TRUE; 2419 mat->insertmode = NOT_SET_VALUES; 2420 2421 a = (Mat_MPIBAIJ*)mat->data; 2422 a->bs = oldmat->bs; 2423 a->bs2 = oldmat->bs2; 2424 a->mbs = oldmat->mbs; 2425 a->nbs = oldmat->nbs; 2426 a->Mbs = oldmat->Mbs; 2427 a->Nbs = oldmat->Nbs; 2428 2429 a->rstart = oldmat->rstart; 2430 a->rend = oldmat->rend; 2431 a->cstart = oldmat->cstart; 2432 a->cend = oldmat->cend; 2433 a->size = oldmat->size; 2434 a->rank = oldmat->rank; 2435 a->donotstash = oldmat->donotstash; 2436 a->roworiented = oldmat->roworiented; 2437 a->rowindices = 0; 2438 a->rowvalues = 0; 2439 a->getrowactive = PETSC_FALSE; 2440 a->barray = 0; 2441 a->rstart_bs = oldmat->rstart_bs; 2442 a->rend_bs = oldmat->rend_bs; 2443 a->cstart_bs = oldmat->cstart_bs; 2444 a->cend_bs = oldmat->cend_bs; 2445 2446 /* hash table stuff */ 2447 a->ht = 0; 2448 a->hd = 0; 2449 a->ht_size = 0; 2450 a->ht_flag = oldmat->ht_flag; 2451 a->ht_fact = oldmat->ht_fact; 2452 a->ht_total_ct = 0; 2453 a->ht_insert_ct = 0; 2454 2455 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2456 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2457 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2458 if (oldmat->colmap) { 2459 #if defined (PETSC_USE_CTABLE) 2460 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2461 #else 2462 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2463 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2464 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2465 #endif 2466 } else a->colmap = 0; 2467 2468 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2469 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2470 PetscLogObjectMemory(mat,len*sizeof(int)); 2471 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2472 } else a->garray = 0; 2473 2474 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2475 PetscLogObjectParent(mat,a->lvec); 2476 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2477 PetscLogObjectParent(mat,a->Mvctx); 2478 2479 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2480 PetscLogObjectParent(mat,a->A); 2481 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2482 PetscLogObjectParent(mat,a->B); 2483 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2484 *newmat = mat; 2485 2486 PetscFunctionReturn(0); 2487 } 2488 2489 #include "petscsys.h" 2490 2491 #undef __FUNCT__ 2492 #define __FUNCT__ "MatLoad_MPIBAIJ" 2493 int MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 2494 { 2495 Mat A; 2496 int i,nz,ierr,j,rstart,rend,fd; 2497 PetscScalar *vals,*buf; 2498 MPI_Comm comm = ((PetscObject)viewer)->comm; 2499 MPI_Status status; 2500 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2501 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2502 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2503 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2504 int dcount,kmax,k,nzcount,tmp; 2505 2506 PetscFunctionBegin; 2507 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2508 2509 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2510 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2511 if (!rank) { 2512 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2513 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2514 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2515 if (header[3] < 0) { 2516 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ"); 2517 } 2518 } 2519 2520 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2521 M = header[1]; N = header[2]; 2522 2523 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2524 2525 /* 2526 This code adds extra rows to make sure the number of rows is 2527 divisible by the blocksize 2528 */ 2529 Mbs = M/bs; 2530 extra_rows = bs - M + bs*(Mbs); 2531 if (extra_rows == bs) extra_rows = 0; 2532 else Mbs++; 2533 if (extra_rows &&!rank) { 2534 PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2535 } 2536 2537 /* determine ownership of all rows */ 2538 mbs = Mbs/size + ((Mbs % size) > rank); 2539 m = mbs*bs; 2540 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2541 browners = rowners + size + 1; 2542 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2543 rowners[0] = 0; 2544 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2545 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2546 rstart = rowners[rank]; 2547 rend = rowners[rank+1]; 2548 2549 /* distribute row lengths to all processors */ 2550 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2551 if (!rank) { 2552 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2553 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2554 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2555 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2556 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2557 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2558 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2559 } else { 2560 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2561 } 2562 2563 if (!rank) { 2564 /* calculate the number of nonzeros on each processor */ 2565 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2566 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2567 for (i=0; i<size; i++) { 2568 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2569 procsnz[i] += rowlengths[j]; 2570 } 2571 } 2572 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2573 2574 /* determine max buffer needed and allocate it */ 2575 maxnz = 0; 2576 for (i=0; i<size; i++) { 2577 maxnz = PetscMax(maxnz,procsnz[i]); 2578 } 2579 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2580 2581 /* read in my part of the matrix column indices */ 2582 nz = procsnz[0]; 2583 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2584 mycols = ibuf; 2585 if (size == 1) nz -= extra_rows; 2586 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2587 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2588 2589 /* read in every ones (except the last) and ship off */ 2590 for (i=1; i<size-1; i++) { 2591 nz = procsnz[i]; 2592 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2593 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2594 } 2595 /* read in the stuff for the last proc */ 2596 if (size != 1) { 2597 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2598 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2599 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2600 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2601 } 2602 ierr = PetscFree(cols);CHKERRQ(ierr); 2603 } else { 2604 /* determine buffer space needed for message */ 2605 nz = 0; 2606 for (i=0; i<m; i++) { 2607 nz += locrowlens[i]; 2608 } 2609 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2610 mycols = ibuf; 2611 /* receive message of column indices*/ 2612 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2613 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2614 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2615 } 2616 2617 /* loop over local rows, determining number of off diagonal entries */ 2618 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2619 odlens = dlens + (rend-rstart); 2620 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2621 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2622 masked1 = mask + Mbs; 2623 masked2 = masked1 + Mbs; 2624 rowcount = 0; nzcount = 0; 2625 for (i=0; i<mbs; i++) { 2626 dcount = 0; 2627 odcount = 0; 2628 for (j=0; j<bs; j++) { 2629 kmax = locrowlens[rowcount]; 2630 for (k=0; k<kmax; k++) { 2631 tmp = mycols[nzcount++]/bs; 2632 if (!mask[tmp]) { 2633 mask[tmp] = 1; 2634 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2635 else masked1[dcount++] = tmp; 2636 } 2637 } 2638 rowcount++; 2639 } 2640 2641 dlens[i] = dcount; 2642 odlens[i] = odcount; 2643 2644 /* zero out the mask elements we set */ 2645 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2646 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2647 } 2648 2649 /* create our matrix */ 2650 ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr); 2651 ierr = MatSetType(A,type);CHKERRQ(ierr) 2652 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2653 2654 /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */ 2655 MatSetOption(A,MAT_COLUMNS_SORTED); 2656 2657 if (!rank) { 2658 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2659 /* read in my part of the matrix numerical values */ 2660 nz = procsnz[0]; 2661 vals = buf; 2662 mycols = ibuf; 2663 if (size == 1) nz -= extra_rows; 2664 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2665 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2666 2667 /* insert into matrix */ 2668 jj = rstart*bs; 2669 for (i=0; i<m; i++) { 2670 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2671 mycols += locrowlens[i]; 2672 vals += locrowlens[i]; 2673 jj++; 2674 } 2675 /* read in other processors (except the last one) and ship out */ 2676 for (i=1; i<size-1; i++) { 2677 nz = procsnz[i]; 2678 vals = buf; 2679 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2680 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2681 } 2682 /* the last proc */ 2683 if (size != 1){ 2684 nz = procsnz[i] - extra_rows; 2685 vals = buf; 2686 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2687 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2688 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2689 } 2690 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2691 } else { 2692 /* receive numeric values */ 2693 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2694 2695 /* receive message of values*/ 2696 vals = buf; 2697 mycols = ibuf; 2698 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2699 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2700 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2701 2702 /* insert into matrix */ 2703 jj = rstart*bs; 2704 for (i=0; i<m; i++) { 2705 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2706 mycols += locrowlens[i]; 2707 vals += locrowlens[i]; 2708 jj++; 2709 } 2710 } 2711 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2712 ierr = PetscFree(buf);CHKERRQ(ierr); 2713 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2714 ierr = PetscFree(rowners);CHKERRQ(ierr); 2715 ierr = PetscFree(dlens);CHKERRQ(ierr); 2716 ierr = PetscFree(mask);CHKERRQ(ierr); 2717 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2718 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2719 2720 *newmat = A; 2721 PetscFunctionReturn(0); 2722 } 2723 2724 #undef __FUNCT__ 2725 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2726 /*@ 2727 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2728 2729 Input Parameters: 2730 . mat - the matrix 2731 . fact - factor 2732 2733 Collective on Mat 2734 2735 Level: advanced 2736 2737 Notes: 2738 This can also be set by the command line option: -mat_use_hash_table fact 2739 2740 .keywords: matrix, hashtable, factor, HT 2741 2742 .seealso: MatSetOption() 2743 @*/ 2744 int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2745 { 2746 int ierr,(*f)(Mat,PetscReal); 2747 2748 PetscFunctionBegin; 2749 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 2750 if (f) { 2751 ierr = (*f)(mat,fact);CHKERRQ(ierr); 2752 } 2753 PetscFunctionReturn(0); 2754 } 2755 2756 #undef __FUNCT__ 2757 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 2758 int MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 2759 { 2760 Mat_MPIBAIJ *baij; 2761 2762 PetscFunctionBegin; 2763 baij = (Mat_MPIBAIJ*)mat->data; 2764 baij->ht_fact = fact; 2765 PetscFunctionReturn(0); 2766 } 2767 2768 #undef __FUNCT__ 2769 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2770 int MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2771 { 2772 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2773 PetscFunctionBegin; 2774 *Ad = a->A; 2775 *Ao = a->B; 2776 *colmap = a->garray; 2777 PetscFunctionReturn(0); 2778 } 2779