1 /*$Id: mpibaij.c,v 1.234 2001/09/25 22:56:49 balay Exp $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 5 EXTERN int MatSetUpMultiply_MPIBAIJ(Mat); 6 EXTERN int DisAssemble_MPIBAIJ(Mat); 7 EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS[],int); 8 EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat *[]); 9 EXTERN int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int [],PetscScalar []); 10 EXTERN int MatSetValues_SeqBAIJ(Mat,int,const int[],int,const int [],const PetscScalar [],InsertMode); 11 EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode); 12 EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]); 13 EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]); 14 EXTERN int MatPrintHelp_SeqBAIJ(Mat); 15 EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*); 16 17 /* UGLY, ugly, ugly 18 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 19 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 20 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 21 converts the entries into single precision and then calls ..._MatScalar() to put them 22 into the single precision data structures. 23 */ 24 #if defined(PETSC_USE_MAT_SINGLE) 25 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 26 EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 27 EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 28 EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 29 EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 30 #else 31 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 32 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 33 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 34 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 35 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 36 #endif 37 38 #undef __FUNCT__ 39 #define __FUNCT__ "MatGetRowMax_MPIBAIJ" 40 int MatGetRowMax_MPIBAIJ(Mat A,Vec v) 41 { 42 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 43 int ierr,i; 44 PetscScalar *va,*vb; 45 Vec vtmp; 46 47 PetscFunctionBegin; 48 49 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 50 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 51 52 ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr); 53 ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 54 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 55 56 for (i=0; i<A->m; i++){ 57 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 58 } 59 60 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 61 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 62 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 63 64 PetscFunctionReturn(0); 65 } 66 67 EXTERN_C_BEGIN 68 #undef __FUNCT__ 69 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 70 int MatStoreValues_MPIBAIJ(Mat mat) 71 { 72 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 73 int ierr; 74 75 PetscFunctionBegin; 76 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 77 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 EXTERN_C_END 81 82 EXTERN_C_BEGIN 83 #undef __FUNCT__ 84 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 85 int MatRetrieveValues_MPIBAIJ(Mat mat) 86 { 87 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 88 int ierr; 89 90 PetscFunctionBegin; 91 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 92 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 93 PetscFunctionReturn(0); 94 } 95 EXTERN_C_END 96 97 /* 98 Local utility routine that creates a mapping from the global column 99 number to the local number in the off-diagonal part of the local 100 storage of the matrix. This is done in a non scable way since the 101 length of colmap equals the global matrix length. 102 */ 103 #undef __FUNCT__ 104 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 105 int CreateColmap_MPIBAIJ_Private(Mat mat) 106 { 107 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 108 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 109 int nbs = B->nbs,i,bs=B->bs,ierr; 110 111 PetscFunctionBegin; 112 #if defined (PETSC_USE_CTABLE) 113 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 114 for (i=0; i<nbs; i++){ 115 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 116 } 117 #else 118 ierr = PetscMalloc((baij->Nbs+1)*sizeof(int),&baij->colmap);CHKERRQ(ierr); 119 PetscLogObjectMemory(mat,baij->Nbs*sizeof(int)); 120 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 121 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 122 #endif 123 PetscFunctionReturn(0); 124 } 125 126 #define CHUNKSIZE 10 127 128 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 129 { \ 130 \ 131 brow = row/bs; \ 132 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 133 rmax = aimax[brow]; nrow = ailen[brow]; \ 134 bcol = col/bs; \ 135 ridx = row % bs; cidx = col % bs; \ 136 low = 0; high = nrow; \ 137 while (high-low > 3) { \ 138 t = (low+high)/2; \ 139 if (rp[t] > bcol) high = t; \ 140 else low = t; \ 141 } \ 142 for (_i=low; _i<high; _i++) { \ 143 if (rp[_i] > bcol) break; \ 144 if (rp[_i] == bcol) { \ 145 bap = ap + bs2*_i + bs*cidx + ridx; \ 146 if (addv == ADD_VALUES) *bap += value; \ 147 else *bap = value; \ 148 goto a_noinsert; \ 149 } \ 150 } \ 151 if (a->nonew == 1) goto a_noinsert; \ 152 else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 153 if (nrow >= rmax) { \ 154 /* there is no extra room in row, therefore enlarge */ \ 155 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 156 MatScalar *new_a; \ 157 \ 158 if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 159 \ 160 /* malloc new storage space */ \ 161 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 162 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 163 new_j = (int*)(new_a + bs2*new_nz); \ 164 new_i = new_j + new_nz; \ 165 \ 166 /* copy over old data into new slots */ \ 167 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 168 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 169 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 170 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 171 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 172 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 173 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 174 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 175 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 176 /* free up old matrix storage */ \ 177 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 178 if (!a->singlemalloc) { \ 179 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 180 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 181 } \ 182 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 183 a->singlemalloc = PETSC_TRUE; \ 184 \ 185 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 186 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 187 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 188 a->maxnz += bs2*CHUNKSIZE; \ 189 a->reallocs++; \ 190 a->nz++; \ 191 } \ 192 N = nrow++ - 1; \ 193 /* shift up all the later entries in this row */ \ 194 for (ii=N; ii>=_i; ii--) { \ 195 rp[ii+1] = rp[ii]; \ 196 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 197 } \ 198 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 199 rp[_i] = bcol; \ 200 ap[bs2*_i + bs*cidx + ridx] = value; \ 201 a_noinsert:; \ 202 ailen[brow] = nrow; \ 203 } 204 205 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 206 { \ 207 brow = row/bs; \ 208 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 209 rmax = bimax[brow]; nrow = bilen[brow]; \ 210 bcol = col/bs; \ 211 ridx = row % bs; cidx = col % bs; \ 212 low = 0; high = nrow; \ 213 while (high-low > 3) { \ 214 t = (low+high)/2; \ 215 if (rp[t] > bcol) high = t; \ 216 else low = t; \ 217 } \ 218 for (_i=low; _i<high; _i++) { \ 219 if (rp[_i] > bcol) break; \ 220 if (rp[_i] == bcol) { \ 221 bap = ap + bs2*_i + bs*cidx + ridx; \ 222 if (addv == ADD_VALUES) *bap += value; \ 223 else *bap = value; \ 224 goto b_noinsert; \ 225 } \ 226 } \ 227 if (b->nonew == 1) goto b_noinsert; \ 228 else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 229 if (nrow >= rmax) { \ 230 /* there is no extra room in row, therefore enlarge */ \ 231 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 232 MatScalar *new_a; \ 233 \ 234 if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 235 \ 236 /* malloc new storage space */ \ 237 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 238 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 239 new_j = (int*)(new_a + bs2*new_nz); \ 240 new_i = new_j + new_nz; \ 241 \ 242 /* copy over old data into new slots */ \ 243 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 244 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 245 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 246 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 247 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 248 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 249 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 250 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 251 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 252 /* free up old matrix storage */ \ 253 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 254 if (!b->singlemalloc) { \ 255 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 256 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 257 } \ 258 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 259 b->singlemalloc = PETSC_TRUE; \ 260 \ 261 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 262 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 263 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 264 b->maxnz += bs2*CHUNKSIZE; \ 265 b->reallocs++; \ 266 b->nz++; \ 267 } \ 268 N = nrow++ - 1; \ 269 /* shift up all the later entries in this row */ \ 270 for (ii=N; ii>=_i; ii--) { \ 271 rp[ii+1] = rp[ii]; \ 272 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 273 } \ 274 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 275 rp[_i] = bcol; \ 276 ap[bs2*_i + bs*cidx + ridx] = value; \ 277 b_noinsert:; \ 278 bilen[brow] = nrow; \ 279 } 280 281 #if defined(PETSC_USE_MAT_SINGLE) 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatSetValues_MPIBAIJ" 284 int MatSetValues_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 285 { 286 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 287 int ierr,i,N = m*n; 288 MatScalar *vsingle; 289 290 PetscFunctionBegin; 291 if (N > b->setvalueslen) { 292 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 293 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 294 b->setvalueslen = N; 295 } 296 vsingle = b->setvaluescopy; 297 298 for (i=0; i<N; i++) { 299 vsingle[i] = v[i]; 300 } 301 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 302 PetscFunctionReturn(0); 303 } 304 305 #undef __FUNCT__ 306 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 307 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 308 { 309 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 310 int ierr,i,N = m*n*b->bs2; 311 MatScalar *vsingle; 312 313 PetscFunctionBegin; 314 if (N > b->setvalueslen) { 315 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 316 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 317 b->setvalueslen = N; 318 } 319 vsingle = b->setvaluescopy; 320 for (i=0; i<N; i++) { 321 vsingle[i] = v[i]; 322 } 323 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 324 PetscFunctionReturn(0); 325 } 326 327 #undef __FUNCT__ 328 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 329 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 330 { 331 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 332 int ierr,i,N = m*n; 333 MatScalar *vsingle; 334 335 PetscFunctionBegin; 336 if (N > b->setvalueslen) { 337 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 338 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 339 b->setvalueslen = N; 340 } 341 vsingle = b->setvaluescopy; 342 for (i=0; i<N; i++) { 343 vsingle[i] = v[i]; 344 } 345 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 346 PetscFunctionReturn(0); 347 } 348 349 #undef __FUNCT__ 350 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 351 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 352 { 353 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 354 int ierr,i,N = m*n*b->bs2; 355 MatScalar *vsingle; 356 357 PetscFunctionBegin; 358 if (N > b->setvalueslen) { 359 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 360 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 361 b->setvalueslen = N; 362 } 363 vsingle = b->setvaluescopy; 364 for (i=0; i<N; i++) { 365 vsingle[i] = v[i]; 366 } 367 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 368 PetscFunctionReturn(0); 369 } 370 #endif 371 372 #undef __FUNCT__ 373 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 374 int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 375 { 376 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 377 MatScalar value; 378 PetscTruth roworiented = baij->roworiented; 379 int ierr,i,j,row,col; 380 int rstart_orig=baij->rstart_bs; 381 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 382 int cend_orig=baij->cend_bs,bs=baij->bs; 383 384 /* Some Variables required in the macro */ 385 Mat A = baij->A; 386 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 387 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 388 MatScalar *aa=a->a; 389 390 Mat B = baij->B; 391 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 392 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 393 MatScalar *ba=b->a; 394 395 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 396 int low,high,t,ridx,cidx,bs2=a->bs2; 397 MatScalar *ap,*bap; 398 399 PetscFunctionBegin; 400 for (i=0; i<m; i++) { 401 if (im[i] < 0) continue; 402 #if defined(PETSC_USE_BOPT_g) 403 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 404 #endif 405 if (im[i] >= rstart_orig && im[i] < rend_orig) { 406 row = im[i] - rstart_orig; 407 for (j=0; j<n; j++) { 408 if (in[j] >= cstart_orig && in[j] < cend_orig){ 409 col = in[j] - cstart_orig; 410 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 411 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 412 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 413 } else if (in[j] < 0) continue; 414 #if defined(PETSC_USE_BOPT_g) 415 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[i],mat->N-1);} 416 #endif 417 else { 418 if (mat->was_assembled) { 419 if (!baij->colmap) { 420 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 421 } 422 #if defined (PETSC_USE_CTABLE) 423 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 424 col = col - 1; 425 #else 426 col = baij->colmap[in[j]/bs] - 1; 427 #endif 428 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 429 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 430 col = in[j]; 431 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 432 B = baij->B; 433 b = (Mat_SeqBAIJ*)(B)->data; 434 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 435 ba=b->a; 436 } else col += in[j]%bs; 437 } else col = in[j]; 438 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 439 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 440 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 441 } 442 } 443 } else { 444 if (!baij->donotstash) { 445 if (roworiented) { 446 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 447 } else { 448 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 449 } 450 } 451 } 452 } 453 PetscFunctionReturn(0); 454 } 455 456 #undef __FUNCT__ 457 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 458 int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 459 { 460 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 461 const MatScalar *value; 462 MatScalar *barray=baij->barray; 463 PetscTruth roworiented = baij->roworiented; 464 int ierr,i,j,ii,jj,row,col,rstart=baij->rstart; 465 int rend=baij->rend,cstart=baij->cstart,stepval; 466 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 467 468 PetscFunctionBegin; 469 if(!barray) { 470 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 471 baij->barray = barray; 472 } 473 474 if (roworiented) { 475 stepval = (n-1)*bs; 476 } else { 477 stepval = (m-1)*bs; 478 } 479 for (i=0; i<m; i++) { 480 if (im[i] < 0) continue; 481 #if defined(PETSC_USE_BOPT_g) 482 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1); 483 #endif 484 if (im[i] >= rstart && im[i] < rend) { 485 row = im[i] - rstart; 486 for (j=0; j<n; j++) { 487 /* If NumCol = 1 then a copy is not required */ 488 if ((roworiented) && (n == 1)) { 489 barray = (MatScalar*)v + i*bs2; 490 } else if((!roworiented) && (m == 1)) { 491 barray = (MatScalar*)v + j*bs2; 492 } else { /* Here a copy is required */ 493 if (roworiented) { 494 value = v + i*(stepval+bs)*bs + j*bs; 495 } else { 496 value = v + j*(stepval+bs)*bs + i*bs; 497 } 498 for (ii=0; ii<bs; ii++,value+=stepval) { 499 for (jj=0; jj<bs; jj++) { 500 *barray++ = *value++; 501 } 502 } 503 barray -=bs2; 504 } 505 506 if (in[j] >= cstart && in[j] < cend){ 507 col = in[j] - cstart; 508 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 509 } 510 else if (in[j] < 0) continue; 511 #if defined(PETSC_USE_BOPT_g) 512 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);} 513 #endif 514 else { 515 if (mat->was_assembled) { 516 if (!baij->colmap) { 517 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 518 } 519 520 #if defined(PETSC_USE_BOPT_g) 521 #if defined (PETSC_USE_CTABLE) 522 { int data; 523 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 524 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 525 } 526 #else 527 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 528 #endif 529 #endif 530 #if defined (PETSC_USE_CTABLE) 531 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 532 col = (col - 1)/bs; 533 #else 534 col = (baij->colmap[in[j]] - 1)/bs; 535 #endif 536 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 537 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 538 col = in[j]; 539 } 540 } 541 else col = in[j]; 542 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 543 } 544 } 545 } else { 546 if (!baij->donotstash) { 547 if (roworiented) { 548 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 549 } else { 550 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 551 } 552 } 553 } 554 } 555 PetscFunctionReturn(0); 556 } 557 558 #define HASH_KEY 0.6180339887 559 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 560 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 561 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 562 #undef __FUNCT__ 563 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 564 int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 565 { 566 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 567 PetscTruth roworiented = baij->roworiented; 568 int ierr,i,j,row,col; 569 int rstart_orig=baij->rstart_bs; 570 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 571 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 572 PetscReal tmp; 573 MatScalar **HD = baij->hd,value; 574 #if defined(PETSC_USE_BOPT_g) 575 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 576 #endif 577 578 PetscFunctionBegin; 579 580 for (i=0; i<m; i++) { 581 #if defined(PETSC_USE_BOPT_g) 582 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 583 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 584 #endif 585 row = im[i]; 586 if (row >= rstart_orig && row < rend_orig) { 587 for (j=0; j<n; j++) { 588 col = in[j]; 589 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 590 /* Look up into the Hash Table */ 591 key = (row/bs)*Nbs+(col/bs)+1; 592 h1 = HASH(size,key,tmp); 593 594 595 idx = h1; 596 #if defined(PETSC_USE_BOPT_g) 597 insert_ct++; 598 total_ct++; 599 if (HT[idx] != key) { 600 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 601 if (idx == size) { 602 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 603 if (idx == h1) { 604 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col); 605 } 606 } 607 } 608 #else 609 if (HT[idx] != key) { 610 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 611 if (idx == size) { 612 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 613 if (idx == h1) { 614 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col); 615 } 616 } 617 } 618 #endif 619 /* A HASH table entry is found, so insert the values at the correct address */ 620 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 621 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 622 } 623 } else { 624 if (!baij->donotstash) { 625 if (roworiented) { 626 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 627 } else { 628 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 629 } 630 } 631 } 632 } 633 #if defined(PETSC_USE_BOPT_g) 634 baij->ht_total_ct = total_ct; 635 baij->ht_insert_ct = insert_ct; 636 #endif 637 PetscFunctionReturn(0); 638 } 639 640 #undef __FUNCT__ 641 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 642 int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 643 { 644 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 645 PetscTruth roworiented = baij->roworiented; 646 int ierr,i,j,ii,jj,row,col; 647 int rstart=baij->rstart ; 648 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 649 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 650 PetscReal tmp; 651 MatScalar **HD = baij->hd,*baij_a; 652 const MatScalar *v_t,*value; 653 #if defined(PETSC_USE_BOPT_g) 654 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 655 #endif 656 657 PetscFunctionBegin; 658 659 if (roworiented) { 660 stepval = (n-1)*bs; 661 } else { 662 stepval = (m-1)*bs; 663 } 664 for (i=0; i<m; i++) { 665 #if defined(PETSC_USE_BOPT_g) 666 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",im[i]); 667 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],baij->Mbs-1); 668 #endif 669 row = im[i]; 670 v_t = v + i*bs2; 671 if (row >= rstart && row < rend) { 672 for (j=0; j<n; j++) { 673 col = in[j]; 674 675 /* Look up into the Hash Table */ 676 key = row*Nbs+col+1; 677 h1 = HASH(size,key,tmp); 678 679 idx = h1; 680 #if defined(PETSC_USE_BOPT_g) 681 total_ct++; 682 insert_ct++; 683 if (HT[idx] != key) { 684 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 685 if (idx == size) { 686 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 687 if (idx == h1) { 688 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col); 689 } 690 } 691 } 692 #else 693 if (HT[idx] != key) { 694 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 695 if (idx == size) { 696 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 697 if (idx == h1) { 698 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col); 699 } 700 } 701 } 702 #endif 703 baij_a = HD[idx]; 704 if (roworiented) { 705 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 706 /* value = v + (i*(stepval+bs)+j)*bs; */ 707 value = v_t; 708 v_t += bs; 709 if (addv == ADD_VALUES) { 710 for (ii=0; ii<bs; ii++,value+=stepval) { 711 for (jj=ii; jj<bs2; jj+=bs) { 712 baij_a[jj] += *value++; 713 } 714 } 715 } else { 716 for (ii=0; ii<bs; ii++,value+=stepval) { 717 for (jj=ii; jj<bs2; jj+=bs) { 718 baij_a[jj] = *value++; 719 } 720 } 721 } 722 } else { 723 value = v + j*(stepval+bs)*bs + i*bs; 724 if (addv == ADD_VALUES) { 725 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 726 for (jj=0; jj<bs; jj++) { 727 baij_a[jj] += *value++; 728 } 729 } 730 } else { 731 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 732 for (jj=0; jj<bs; jj++) { 733 baij_a[jj] = *value++; 734 } 735 } 736 } 737 } 738 } 739 } else { 740 if (!baij->donotstash) { 741 if (roworiented) { 742 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 743 } else { 744 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 745 } 746 } 747 } 748 } 749 #if defined(PETSC_USE_BOPT_g) 750 baij->ht_total_ct = total_ct; 751 baij->ht_insert_ct = insert_ct; 752 #endif 753 PetscFunctionReturn(0); 754 } 755 756 #undef __FUNCT__ 757 #define __FUNCT__ "MatGetValues_MPIBAIJ" 758 int MatGetValues_MPIBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 759 { 760 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 761 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 762 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 763 764 PetscFunctionBegin; 765 for (i=0; i<m; i++) { 766 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 767 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 768 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 769 row = idxm[i] - bsrstart; 770 for (j=0; j<n; j++) { 771 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 772 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 773 if (idxn[j] >= bscstart && idxn[j] < bscend){ 774 col = idxn[j] - bscstart; 775 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 776 } else { 777 if (!baij->colmap) { 778 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 779 } 780 #if defined (PETSC_USE_CTABLE) 781 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 782 data --; 783 #else 784 data = baij->colmap[idxn[j]/bs]-1; 785 #endif 786 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 787 else { 788 col = data + idxn[j]%bs; 789 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 790 } 791 } 792 } 793 } else { 794 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 795 } 796 } 797 PetscFunctionReturn(0); 798 } 799 800 #undef __FUNCT__ 801 #define __FUNCT__ "MatNorm_MPIBAIJ" 802 int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 803 { 804 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 805 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 806 int ierr,i,bs2=baij->bs2; 807 PetscReal sum = 0.0; 808 MatScalar *v; 809 810 PetscFunctionBegin; 811 if (baij->size == 1) { 812 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 813 } else { 814 if (type == NORM_FROBENIUS) { 815 v = amat->a; 816 for (i=0; i<amat->nz*bs2; i++) { 817 #if defined(PETSC_USE_COMPLEX) 818 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 819 #else 820 sum += (*v)*(*v); v++; 821 #endif 822 } 823 v = bmat->a; 824 for (i=0; i<bmat->nz*bs2; i++) { 825 #if defined(PETSC_USE_COMPLEX) 826 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 827 #else 828 sum += (*v)*(*v); v++; 829 #endif 830 } 831 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 832 *nrm = sqrt(*nrm); 833 } else { 834 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 835 } 836 } 837 PetscFunctionReturn(0); 838 } 839 840 841 /* 842 Creates the hash table, and sets the table 843 This table is created only once. 844 If new entried need to be added to the matrix 845 then the hash table has to be destroyed and 846 recreated. 847 */ 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 850 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 851 { 852 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 853 Mat A = baij->A,B=baij->B; 854 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 855 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 856 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 857 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 858 int *HT,key; 859 MatScalar **HD; 860 PetscReal tmp; 861 #if defined(PETSC_USE_BOPT_g) 862 int ct=0,max=0; 863 #endif 864 865 PetscFunctionBegin; 866 baij->ht_size=(int)(factor*nz); 867 size = baij->ht_size; 868 869 if (baij->ht) { 870 PetscFunctionReturn(0); 871 } 872 873 /* Allocate Memory for Hash Table */ 874 ierr = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 875 baij->ht = (int*)(baij->hd + size); 876 HD = baij->hd; 877 HT = baij->ht; 878 879 880 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(PetscScalar*)));CHKERRQ(ierr); 881 882 883 /* Loop Over A */ 884 for (i=0; i<a->mbs; i++) { 885 for (j=ai[i]; j<ai[i+1]; j++) { 886 row = i+rstart; 887 col = aj[j]+cstart; 888 889 key = row*Nbs + col + 1; 890 h1 = HASH(size,key,tmp); 891 for (k=0; k<size; k++){ 892 if (HT[(h1+k)%size] == 0.0) { 893 HT[(h1+k)%size] = key; 894 HD[(h1+k)%size] = a->a + j*bs2; 895 break; 896 #if defined(PETSC_USE_BOPT_g) 897 } else { 898 ct++; 899 #endif 900 } 901 } 902 #if defined(PETSC_USE_BOPT_g) 903 if (k> max) max = k; 904 #endif 905 } 906 } 907 /* Loop Over B */ 908 for (i=0; i<b->mbs; i++) { 909 for (j=bi[i]; j<bi[i+1]; j++) { 910 row = i+rstart; 911 col = garray[bj[j]]; 912 key = row*Nbs + col + 1; 913 h1 = HASH(size,key,tmp); 914 for (k=0; k<size; k++){ 915 if (HT[(h1+k)%size] == 0.0) { 916 HT[(h1+k)%size] = key; 917 HD[(h1+k)%size] = b->a + j*bs2; 918 break; 919 #if defined(PETSC_USE_BOPT_g) 920 } else { 921 ct++; 922 #endif 923 } 924 } 925 #if defined(PETSC_USE_BOPT_g) 926 if (k> max) max = k; 927 #endif 928 } 929 } 930 931 /* Print Summary */ 932 #if defined(PETSC_USE_BOPT_g) 933 for (i=0,j=0; i<size; i++) { 934 if (HT[i]) {j++;} 935 } 936 PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max); 937 #endif 938 PetscFunctionReturn(0); 939 } 940 941 #undef __FUNCT__ 942 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 943 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 944 { 945 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 946 int ierr,nstash,reallocs; 947 InsertMode addv; 948 949 PetscFunctionBegin; 950 if (baij->donotstash) { 951 PetscFunctionReturn(0); 952 } 953 954 /* make sure all processors are either in INSERTMODE or ADDMODE */ 955 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 956 if (addv == (ADD_VALUES|INSERT_VALUES)) { 957 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 958 } 959 mat->insertmode = addv; /* in case this processor had no cache */ 960 961 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 962 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 963 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 964 PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 965 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 966 PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 967 PetscFunctionReturn(0); 968 } 969 970 #undef __FUNCT__ 971 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 972 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 973 { 974 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 975 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 976 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 977 int *row,*col,other_disassembled; 978 PetscTruth r1,r2,r3; 979 MatScalar *val; 980 InsertMode addv = mat->insertmode; 981 982 PetscFunctionBegin; 983 if (!baij->donotstash) { 984 while (1) { 985 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 986 if (!flg) break; 987 988 for (i=0; i<n;) { 989 /* Now identify the consecutive vals belonging to the same row */ 990 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 991 if (j < n) ncols = j-i; 992 else ncols = n-i; 993 /* Now assemble all these values with a single function call */ 994 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 995 i = j; 996 } 997 } 998 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 999 /* Now process the block-stash. Since the values are stashed column-oriented, 1000 set the roworiented flag to column oriented, and after MatSetValues() 1001 restore the original flags */ 1002 r1 = baij->roworiented; 1003 r2 = a->roworiented; 1004 r3 = b->roworiented; 1005 baij->roworiented = PETSC_FALSE; 1006 a->roworiented = PETSC_FALSE; 1007 b->roworiented = PETSC_FALSE; 1008 while (1) { 1009 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1010 if (!flg) break; 1011 1012 for (i=0; i<n;) { 1013 /* Now identify the consecutive vals belonging to the same row */ 1014 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1015 if (j < n) ncols = j-i; 1016 else ncols = n-i; 1017 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1018 i = j; 1019 } 1020 } 1021 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1022 baij->roworiented = r1; 1023 a->roworiented = r2; 1024 b->roworiented = r3; 1025 } 1026 1027 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1028 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1029 1030 /* determine if any processor has disassembled, if so we must 1031 also disassemble ourselfs, in order that we may reassemble. */ 1032 /* 1033 if nonzero structure of submatrix B cannot change then we know that 1034 no processor disassembled thus we can skip this stuff 1035 */ 1036 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1037 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1038 if (mat->was_assembled && !other_disassembled) { 1039 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1040 } 1041 } 1042 1043 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1044 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1045 } 1046 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1047 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1048 1049 #if defined(PETSC_USE_BOPT_g) 1050 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1051 PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 1052 baij->ht_total_ct = 0; 1053 baij->ht_insert_ct = 0; 1054 } 1055 #endif 1056 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1057 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1058 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1059 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1060 } 1061 1062 if (baij->rowvalues) { 1063 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1064 baij->rowvalues = 0; 1065 } 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1071 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1072 { 1073 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1074 int ierr,bs = baij->bs,size = baij->size,rank = baij->rank; 1075 PetscTruth isascii,isdraw; 1076 PetscViewer sviewer; 1077 PetscViewerFormat format; 1078 1079 PetscFunctionBegin; 1080 /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */ 1081 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1082 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1083 if (isascii) { 1084 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1085 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1086 MatInfo info; 1087 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1088 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1089 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 1090 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 1091 baij->bs,(int)info.memory);CHKERRQ(ierr); 1092 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1093 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1094 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1095 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1096 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1097 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1098 PetscFunctionReturn(0); 1099 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1100 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 1101 PetscFunctionReturn(0); 1102 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1103 PetscFunctionReturn(0); 1104 } 1105 } 1106 1107 if (isdraw) { 1108 PetscDraw draw; 1109 PetscTruth isnull; 1110 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1111 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1112 } 1113 1114 if (size == 1) { 1115 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 1116 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1117 } else { 1118 /* assemble the entire matrix onto first processor. */ 1119 Mat A; 1120 Mat_SeqBAIJ *Aloc; 1121 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1122 MatScalar *a; 1123 1124 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1125 /* Perhaps this should be the type of mat? */ 1126 if (!rank) { 1127 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 1128 } else { 1129 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 1130 } 1131 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1132 ierr = MatMPIBAIJSetPreallocation(A,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1133 PetscLogObjectParent(mat,A); 1134 1135 /* copy over the A part */ 1136 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1137 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1138 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 1139 1140 for (i=0; i<mbs; i++) { 1141 rvals[0] = bs*(baij->rstart + i); 1142 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1143 for (j=ai[i]; j<ai[i+1]; j++) { 1144 col = (baij->cstart+aj[j])*bs; 1145 for (k=0; k<bs; k++) { 1146 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1147 col++; a += bs; 1148 } 1149 } 1150 } 1151 /* copy over the B part */ 1152 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1153 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1154 for (i=0; i<mbs; i++) { 1155 rvals[0] = bs*(baij->rstart + i); 1156 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1157 for (j=ai[i]; j<ai[i+1]; j++) { 1158 col = baij->garray[aj[j]]*bs; 1159 for (k=0; k<bs; k++) { 1160 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1161 col++; a += bs; 1162 } 1163 } 1164 } 1165 ierr = PetscFree(rvals);CHKERRQ(ierr); 1166 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1167 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1168 /* 1169 Everyone has to call to draw the matrix since the graphics waits are 1170 synchronized across all processors that share the PetscDraw object 1171 */ 1172 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1173 if (!rank) { 1174 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1175 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1176 } 1177 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1178 ierr = MatDestroy(A);CHKERRQ(ierr); 1179 } 1180 PetscFunctionReturn(0); 1181 } 1182 1183 #undef __FUNCT__ 1184 #define __FUNCT__ "MatView_MPIBAIJ" 1185 int MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1186 { 1187 int ierr; 1188 PetscTruth isascii,isdraw,issocket,isbinary; 1189 1190 PetscFunctionBegin; 1191 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1192 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1193 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1194 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1195 if (isascii || isdraw || issocket || isbinary) { 1196 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1197 } else { 1198 SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1199 } 1200 PetscFunctionReturn(0); 1201 } 1202 1203 #undef __FUNCT__ 1204 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1205 int MatDestroy_MPIBAIJ(Mat mat) 1206 { 1207 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1208 int ierr; 1209 1210 PetscFunctionBegin; 1211 #if defined(PETSC_USE_LOG) 1212 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 1213 #endif 1214 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1215 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1216 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1217 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1218 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1219 #if defined (PETSC_USE_CTABLE) 1220 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1221 #else 1222 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1223 #endif 1224 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1225 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1226 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1227 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1228 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1229 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1230 #if defined(PETSC_USE_MAT_SINGLE) 1231 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 1232 #endif 1233 ierr = PetscFree(baij);CHKERRQ(ierr); 1234 PetscFunctionReturn(0); 1235 } 1236 1237 #undef __FUNCT__ 1238 #define __FUNCT__ "MatMult_MPIBAIJ" 1239 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1240 { 1241 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1242 int ierr,nt; 1243 1244 PetscFunctionBegin; 1245 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1246 if (nt != A->n) { 1247 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1248 } 1249 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1250 if (nt != A->m) { 1251 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1252 } 1253 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1254 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1255 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1256 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1257 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1258 PetscFunctionReturn(0); 1259 } 1260 1261 #undef __FUNCT__ 1262 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1263 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1264 { 1265 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1266 int ierr; 1267 1268 PetscFunctionBegin; 1269 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1270 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1271 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1272 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1273 PetscFunctionReturn(0); 1274 } 1275 1276 #undef __FUNCT__ 1277 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1278 int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1279 { 1280 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1281 int ierr; 1282 1283 PetscFunctionBegin; 1284 /* do nondiagonal part */ 1285 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1286 /* send it on its way */ 1287 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1288 /* do local part */ 1289 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1290 /* receive remote parts: note this assumes the values are not actually */ 1291 /* inserted in yy until the next line, which is true for my implementation*/ 1292 /* but is not perhaps always true. */ 1293 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1294 PetscFunctionReturn(0); 1295 } 1296 1297 #undef __FUNCT__ 1298 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1299 int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1300 { 1301 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1302 int ierr; 1303 1304 PetscFunctionBegin; 1305 /* do nondiagonal part */ 1306 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1307 /* send it on its way */ 1308 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1309 /* do local part */ 1310 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1311 /* receive remote parts: note this assumes the values are not actually */ 1312 /* inserted in yy until the next line, which is true for my implementation*/ 1313 /* but is not perhaps always true. */ 1314 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1315 PetscFunctionReturn(0); 1316 } 1317 1318 /* 1319 This only works correctly for square matrices where the subblock A->A is the 1320 diagonal block 1321 */ 1322 #undef __FUNCT__ 1323 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1324 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1325 { 1326 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1327 int ierr; 1328 1329 PetscFunctionBegin; 1330 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1331 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1332 PetscFunctionReturn(0); 1333 } 1334 1335 #undef __FUNCT__ 1336 #define __FUNCT__ "MatScale_MPIBAIJ" 1337 int MatScale_MPIBAIJ(const PetscScalar *aa,Mat A) 1338 { 1339 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1340 int ierr; 1341 1342 PetscFunctionBegin; 1343 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1344 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1345 PetscFunctionReturn(0); 1346 } 1347 1348 #undef __FUNCT__ 1349 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1350 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1351 { 1352 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1353 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1354 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1355 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1356 int *cmap,*idx_p,cstart = mat->cstart; 1357 1358 PetscFunctionBegin; 1359 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1360 mat->getrowactive = PETSC_TRUE; 1361 1362 if (!mat->rowvalues && (idx || v)) { 1363 /* 1364 allocate enough space to hold information from the longest row. 1365 */ 1366 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1367 int max = 1,mbs = mat->mbs,tmp; 1368 for (i=0; i<mbs; i++) { 1369 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1370 if (max < tmp) { max = tmp; } 1371 } 1372 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1373 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1374 } 1375 1376 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1377 lrow = row - brstart; 1378 1379 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1380 if (!v) {pvA = 0; pvB = 0;} 1381 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1382 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1383 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1384 nztot = nzA + nzB; 1385 1386 cmap = mat->garray; 1387 if (v || idx) { 1388 if (nztot) { 1389 /* Sort by increasing column numbers, assuming A and B already sorted */ 1390 int imark = -1; 1391 if (v) { 1392 *v = v_p = mat->rowvalues; 1393 for (i=0; i<nzB; i++) { 1394 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1395 else break; 1396 } 1397 imark = i; 1398 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1399 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1400 } 1401 if (idx) { 1402 *idx = idx_p = mat->rowindices; 1403 if (imark > -1) { 1404 for (i=0; i<imark; i++) { 1405 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1406 } 1407 } else { 1408 for (i=0; i<nzB; i++) { 1409 if (cmap[cworkB[i]/bs] < cstart) 1410 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1411 else break; 1412 } 1413 imark = i; 1414 } 1415 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1416 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1417 } 1418 } else { 1419 if (idx) *idx = 0; 1420 if (v) *v = 0; 1421 } 1422 } 1423 *nz = nztot; 1424 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1425 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1426 PetscFunctionReturn(0); 1427 } 1428 1429 #undef __FUNCT__ 1430 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1431 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1432 { 1433 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1434 1435 PetscFunctionBegin; 1436 if (baij->getrowactive == PETSC_FALSE) { 1437 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1438 } 1439 baij->getrowactive = PETSC_FALSE; 1440 PetscFunctionReturn(0); 1441 } 1442 1443 #undef __FUNCT__ 1444 #define __FUNCT__ "MatGetBlockSize_MPIBAIJ" 1445 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1446 { 1447 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1448 1449 PetscFunctionBegin; 1450 *bs = baij->bs; 1451 PetscFunctionReturn(0); 1452 } 1453 1454 #undef __FUNCT__ 1455 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1456 int MatZeroEntries_MPIBAIJ(Mat A) 1457 { 1458 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1459 int ierr; 1460 1461 PetscFunctionBegin; 1462 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1463 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1464 PetscFunctionReturn(0); 1465 } 1466 1467 #undef __FUNCT__ 1468 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1469 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1470 { 1471 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1472 Mat A = a->A,B = a->B; 1473 int ierr; 1474 PetscReal isend[5],irecv[5]; 1475 1476 PetscFunctionBegin; 1477 info->block_size = (PetscReal)a->bs; 1478 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1479 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1480 isend[3] = info->memory; isend[4] = info->mallocs; 1481 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1482 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1483 isend[3] += info->memory; isend[4] += info->mallocs; 1484 if (flag == MAT_LOCAL) { 1485 info->nz_used = isend[0]; 1486 info->nz_allocated = isend[1]; 1487 info->nz_unneeded = isend[2]; 1488 info->memory = isend[3]; 1489 info->mallocs = isend[4]; 1490 } else if (flag == MAT_GLOBAL_MAX) { 1491 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1492 info->nz_used = irecv[0]; 1493 info->nz_allocated = irecv[1]; 1494 info->nz_unneeded = irecv[2]; 1495 info->memory = irecv[3]; 1496 info->mallocs = irecv[4]; 1497 } else if (flag == MAT_GLOBAL_SUM) { 1498 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1499 info->nz_used = irecv[0]; 1500 info->nz_allocated = irecv[1]; 1501 info->nz_unneeded = irecv[2]; 1502 info->memory = irecv[3]; 1503 info->mallocs = irecv[4]; 1504 } else { 1505 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1506 } 1507 info->rows_global = (PetscReal)A->M; 1508 info->columns_global = (PetscReal)A->N; 1509 info->rows_local = (PetscReal)A->m; 1510 info->columns_local = (PetscReal)A->N; 1511 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1512 info->fill_ratio_needed = 0; 1513 info->factor_mallocs = 0; 1514 PetscFunctionReturn(0); 1515 } 1516 1517 #undef __FUNCT__ 1518 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1519 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1520 { 1521 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1522 int ierr; 1523 1524 PetscFunctionBegin; 1525 switch (op) { 1526 case MAT_NO_NEW_NONZERO_LOCATIONS: 1527 case MAT_YES_NEW_NONZERO_LOCATIONS: 1528 case MAT_COLUMNS_UNSORTED: 1529 case MAT_COLUMNS_SORTED: 1530 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1531 case MAT_KEEP_ZEROED_ROWS: 1532 case MAT_NEW_NONZERO_LOCATION_ERR: 1533 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1534 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1535 break; 1536 case MAT_ROW_ORIENTED: 1537 a->roworiented = PETSC_TRUE; 1538 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1539 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1540 break; 1541 case MAT_ROWS_SORTED: 1542 case MAT_ROWS_UNSORTED: 1543 case MAT_YES_NEW_DIAGONALS: 1544 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1545 break; 1546 case MAT_COLUMN_ORIENTED: 1547 a->roworiented = PETSC_FALSE; 1548 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1549 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1550 break; 1551 case MAT_IGNORE_OFF_PROC_ENTRIES: 1552 a->donotstash = PETSC_TRUE; 1553 break; 1554 case MAT_NO_NEW_DIAGONALS: 1555 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1556 case MAT_USE_HASH_TABLE: 1557 a->ht_flag = PETSC_TRUE; 1558 break; 1559 case MAT_SYMMETRIC: 1560 case MAT_STRUCTURALLY_SYMMETRIC: 1561 case MAT_NOT_SYMMETRIC: 1562 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1563 case MAT_HERMITIAN: 1564 case MAT_NOT_HERMITIAN: 1565 case MAT_SYMMETRY_ETERNAL: 1566 case MAT_NOT_SYMMETRY_ETERNAL: 1567 break; 1568 default: 1569 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1570 } 1571 PetscFunctionReturn(0); 1572 } 1573 1574 #undef __FUNCT__ 1575 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1576 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1577 { 1578 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1579 Mat_SeqBAIJ *Aloc; 1580 Mat B; 1581 int ierr,M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col; 1582 int bs=baij->bs,mbs=baij->mbs; 1583 MatScalar *a; 1584 1585 PetscFunctionBegin; 1586 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1587 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1588 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1589 ierr = MatMPIBAIJSetPreallocation(B,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1590 1591 /* copy over the A part */ 1592 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1593 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1594 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 1595 1596 for (i=0; i<mbs; i++) { 1597 rvals[0] = bs*(baij->rstart + i); 1598 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1599 for (j=ai[i]; j<ai[i+1]; j++) { 1600 col = (baij->cstart+aj[j])*bs; 1601 for (k=0; k<bs; k++) { 1602 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1603 col++; a += bs; 1604 } 1605 } 1606 } 1607 /* copy over the B part */ 1608 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1609 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1610 for (i=0; i<mbs; i++) { 1611 rvals[0] = bs*(baij->rstart + i); 1612 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1613 for (j=ai[i]; j<ai[i+1]; j++) { 1614 col = baij->garray[aj[j]]*bs; 1615 for (k=0; k<bs; k++) { 1616 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1617 col++; a += bs; 1618 } 1619 } 1620 } 1621 ierr = PetscFree(rvals);CHKERRQ(ierr); 1622 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1623 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1624 1625 if (matout) { 1626 *matout = B; 1627 } else { 1628 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1629 } 1630 PetscFunctionReturn(0); 1631 } 1632 1633 #undef __FUNCT__ 1634 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1635 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1636 { 1637 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1638 Mat a = baij->A,b = baij->B; 1639 int ierr,s1,s2,s3; 1640 1641 PetscFunctionBegin; 1642 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1643 if (rr) { 1644 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1645 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1646 /* Overlap communication with computation. */ 1647 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1648 } 1649 if (ll) { 1650 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1651 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1652 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1653 } 1654 /* scale the diagonal block */ 1655 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1656 1657 if (rr) { 1658 /* Do a scatter end and then right scale the off-diagonal block */ 1659 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1660 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1661 } 1662 1663 PetscFunctionReturn(0); 1664 } 1665 1666 #undef __FUNCT__ 1667 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1668 int MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag) 1669 { 1670 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1671 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1672 int *nprocs,j,idx,nsends,row; 1673 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1674 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1675 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1676 MPI_Comm comm = A->comm; 1677 MPI_Request *send_waits,*recv_waits; 1678 MPI_Status recv_status,*send_status; 1679 IS istmp; 1680 PetscTruth found; 1681 1682 PetscFunctionBegin; 1683 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 1684 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1685 1686 /* first count number of contributors to each processor */ 1687 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 1688 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1689 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 1690 for (i=0; i<N; i++) { 1691 idx = rows[i]; 1692 found = PETSC_FALSE; 1693 for (j=0; j<size; j++) { 1694 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1695 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 1696 } 1697 } 1698 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1699 } 1700 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1701 1702 /* inform other processors of number of messages and max length*/ 1703 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1704 1705 /* post receives: */ 1706 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 1707 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1708 for (i=0; i<nrecvs; i++) { 1709 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1710 } 1711 1712 /* do sends: 1713 1) starts[i] gives the starting index in svalues for stuff going to 1714 the ith processor 1715 */ 1716 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 1717 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1718 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 1719 starts[0] = 0; 1720 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1721 for (i=0; i<N; i++) { 1722 svalues[starts[owner[i]]++] = rows[i]; 1723 } 1724 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1725 1726 starts[0] = 0; 1727 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1728 count = 0; 1729 for (i=0; i<size; i++) { 1730 if (nprocs[2*i+1]) { 1731 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1732 } 1733 } 1734 ierr = PetscFree(starts);CHKERRQ(ierr); 1735 1736 base = owners[rank]*bs; 1737 1738 /* wait on receives */ 1739 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 1740 source = lens + nrecvs; 1741 count = nrecvs; slen = 0; 1742 while (count) { 1743 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1744 /* unpack receives into our local space */ 1745 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1746 source[imdex] = recv_status.MPI_SOURCE; 1747 lens[imdex] = n; 1748 slen += n; 1749 count--; 1750 } 1751 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1752 1753 /* move the data into the send scatter */ 1754 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 1755 count = 0; 1756 for (i=0; i<nrecvs; i++) { 1757 values = rvalues + i*nmax; 1758 for (j=0; j<lens[i]; j++) { 1759 lrows[count++] = values[j] - base; 1760 } 1761 } 1762 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1763 ierr = PetscFree(lens);CHKERRQ(ierr); 1764 ierr = PetscFree(owner);CHKERRQ(ierr); 1765 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1766 1767 /* actually zap the local rows */ 1768 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1769 PetscLogObjectParent(A,istmp); 1770 1771 /* 1772 Zero the required rows. If the "diagonal block" of the matrix 1773 is square and the user wishes to set the diagonal we use seperate 1774 code so that MatSetValues() is not called for each diagonal allocating 1775 new memory, thus calling lots of mallocs and slowing things down. 1776 1777 Contributed by: Mathew Knepley 1778 */ 1779 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1780 ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1781 if (diag && (l->A->M == l->A->N)) { 1782 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1783 } else if (diag) { 1784 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1785 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1786 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1787 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1788 } 1789 for (i=0; i<slen; i++) { 1790 row = lrows[i] + rstart_bs; 1791 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1792 } 1793 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1794 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1795 } else { 1796 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1797 } 1798 1799 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1800 ierr = PetscFree(lrows);CHKERRQ(ierr); 1801 1802 /* wait on sends */ 1803 if (nsends) { 1804 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1805 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1806 ierr = PetscFree(send_status);CHKERRQ(ierr); 1807 } 1808 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1809 ierr = PetscFree(svalues);CHKERRQ(ierr); 1810 1811 PetscFunctionReturn(0); 1812 } 1813 1814 #undef __FUNCT__ 1815 #define __FUNCT__ "MatPrintHelp_MPIBAIJ" 1816 int MatPrintHelp_MPIBAIJ(Mat A) 1817 { 1818 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1819 MPI_Comm comm = A->comm; 1820 static int called = 0; 1821 int ierr; 1822 1823 PetscFunctionBegin; 1824 if (!a->rank) { 1825 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1826 } 1827 if (called) {PetscFunctionReturn(0);} else called = 1; 1828 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1829 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1830 PetscFunctionReturn(0); 1831 } 1832 1833 #undef __FUNCT__ 1834 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1835 int MatSetUnfactored_MPIBAIJ(Mat A) 1836 { 1837 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1838 int ierr; 1839 1840 PetscFunctionBegin; 1841 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1842 PetscFunctionReturn(0); 1843 } 1844 1845 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1846 1847 #undef __FUNCT__ 1848 #define __FUNCT__ "MatEqual_MPIBAIJ" 1849 int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1850 { 1851 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1852 Mat a,b,c,d; 1853 PetscTruth flg; 1854 int ierr; 1855 1856 PetscFunctionBegin; 1857 a = matA->A; b = matA->B; 1858 c = matB->A; d = matB->B; 1859 1860 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1861 if (flg == PETSC_TRUE) { 1862 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1863 } 1864 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1865 PetscFunctionReturn(0); 1866 } 1867 1868 1869 #undef __FUNCT__ 1870 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1871 int MatSetUpPreallocation_MPIBAIJ(Mat A) 1872 { 1873 int ierr; 1874 1875 PetscFunctionBegin; 1876 ierr = MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1877 PetscFunctionReturn(0); 1878 } 1879 1880 /* -------------------------------------------------------------------*/ 1881 static struct _MatOps MatOps_Values = { 1882 MatSetValues_MPIBAIJ, 1883 MatGetRow_MPIBAIJ, 1884 MatRestoreRow_MPIBAIJ, 1885 MatMult_MPIBAIJ, 1886 /* 4*/ MatMultAdd_MPIBAIJ, 1887 MatMultTranspose_MPIBAIJ, 1888 MatMultTransposeAdd_MPIBAIJ, 1889 0, 1890 0, 1891 0, 1892 /*10*/ 0, 1893 0, 1894 0, 1895 0, 1896 MatTranspose_MPIBAIJ, 1897 /*15*/ MatGetInfo_MPIBAIJ, 1898 MatEqual_MPIBAIJ, 1899 MatGetDiagonal_MPIBAIJ, 1900 MatDiagonalScale_MPIBAIJ, 1901 MatNorm_MPIBAIJ, 1902 /*20*/ MatAssemblyBegin_MPIBAIJ, 1903 MatAssemblyEnd_MPIBAIJ, 1904 0, 1905 MatSetOption_MPIBAIJ, 1906 MatZeroEntries_MPIBAIJ, 1907 /*25*/ MatZeroRows_MPIBAIJ, 1908 0, 1909 0, 1910 0, 1911 0, 1912 /*30*/ MatSetUpPreallocation_MPIBAIJ, 1913 0, 1914 0, 1915 0, 1916 0, 1917 /*35*/ MatDuplicate_MPIBAIJ, 1918 0, 1919 0, 1920 0, 1921 0, 1922 /*40*/ 0, 1923 MatGetSubMatrices_MPIBAIJ, 1924 MatIncreaseOverlap_MPIBAIJ, 1925 MatGetValues_MPIBAIJ, 1926 0, 1927 /*45*/ MatPrintHelp_MPIBAIJ, 1928 MatScale_MPIBAIJ, 1929 0, 1930 0, 1931 0, 1932 /*50*/ MatGetBlockSize_MPIBAIJ, 1933 0, 1934 0, 1935 0, 1936 0, 1937 /*55*/ 0, 1938 0, 1939 MatSetUnfactored_MPIBAIJ, 1940 0, 1941 MatSetValuesBlocked_MPIBAIJ, 1942 /*60*/ 0, 1943 MatDestroy_MPIBAIJ, 1944 MatView_MPIBAIJ, 1945 MatGetPetscMaps_Petsc, 1946 0, 1947 /*65*/ 0, 1948 0, 1949 0, 1950 0, 1951 0, 1952 /*70*/ MatGetRowMax_MPIBAIJ, 1953 0, 1954 0, 1955 0, 1956 0, 1957 /*75*/ 0, 1958 0, 1959 0, 1960 0, 1961 0, 1962 /*80*/ 0, 1963 0, 1964 0, 1965 0, 1966 /*85*/ MatLoad_MPIBAIJ 1967 }; 1968 1969 1970 EXTERN_C_BEGIN 1971 #undef __FUNCT__ 1972 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 1973 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1974 { 1975 PetscFunctionBegin; 1976 *a = ((Mat_MPIBAIJ *)A->data)->A; 1977 *iscopy = PETSC_FALSE; 1978 PetscFunctionReturn(0); 1979 } 1980 EXTERN_C_END 1981 1982 EXTERN_C_BEGIN 1983 extern int MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,Mat*); 1984 EXTERN_C_END 1985 1986 EXTERN_C_BEGIN 1987 #undef __FUNCT__ 1988 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 1989 int MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1990 { 1991 Mat_MPIBAIJ *b; 1992 int ierr,i; 1993 1994 PetscFunctionBegin; 1995 B->preallocated = PETSC_TRUE; 1996 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1997 1998 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1999 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2000 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2001 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2002 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2003 if (d_nnz) { 2004 for (i=0; i<B->m/bs; i++) { 2005 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 2006 } 2007 } 2008 if (o_nnz) { 2009 for (i=0; i<B->m/bs; i++) { 2010 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 2011 } 2012 } 2013 2014 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 2015 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 2016 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 2017 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 2018 2019 b = (Mat_MPIBAIJ*)B->data; 2020 b->bs = bs; 2021 b->bs2 = bs*bs; 2022 b->mbs = B->m/bs; 2023 b->nbs = B->n/bs; 2024 b->Mbs = B->M/bs; 2025 b->Nbs = B->N/bs; 2026 2027 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 2028 b->rowners[0] = 0; 2029 for (i=2; i<=b->size; i++) { 2030 b->rowners[i] += b->rowners[i-1]; 2031 } 2032 b->rstart = b->rowners[b->rank]; 2033 b->rend = b->rowners[b->rank+1]; 2034 2035 ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 2036 b->cowners[0] = 0; 2037 for (i=2; i<=b->size; i++) { 2038 b->cowners[i] += b->cowners[i-1]; 2039 } 2040 b->cstart = b->cowners[b->rank]; 2041 b->cend = b->cowners[b->rank+1]; 2042 2043 for (i=0; i<=b->size; i++) { 2044 b->rowners_bs[i] = b->rowners[i]*bs; 2045 } 2046 b->rstart_bs = b->rstart*bs; 2047 b->rend_bs = b->rend*bs; 2048 b->cstart_bs = b->cstart*bs; 2049 b->cend_bs = b->cend*bs; 2050 2051 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 2052 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2053 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2054 PetscLogObjectParent(B,b->A); 2055 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 2056 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2057 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2058 PetscLogObjectParent(B,b->B); 2059 2060 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2061 2062 PetscFunctionReturn(0); 2063 } 2064 EXTERN_C_END 2065 2066 EXTERN_C_BEGIN 2067 EXTERN int MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2068 EXTERN int MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2069 EXTERN_C_END 2070 2071 /*MC 2072 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2073 2074 Options Database Keys: 2075 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2076 2077 Level: beginner 2078 2079 .seealso: MatCreateMPIBAIJ 2080 M*/ 2081 2082 EXTERN_C_BEGIN 2083 #undef __FUNCT__ 2084 #define __FUNCT__ "MatCreate_MPIBAIJ" 2085 int MatCreate_MPIBAIJ(Mat B) 2086 { 2087 Mat_MPIBAIJ *b; 2088 int ierr; 2089 PetscTruth flg; 2090 2091 PetscFunctionBegin; 2092 ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2093 B->data = (void*)b; 2094 2095 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2096 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2097 B->mapping = 0; 2098 B->factor = 0; 2099 B->assembled = PETSC_FALSE; 2100 2101 B->insertmode = NOT_SET_VALUES; 2102 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2103 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2104 2105 /* build local table of row and column ownerships */ 2106 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 2107 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2108 b->cowners = b->rowners + b->size + 2; 2109 b->rowners_bs = b->cowners + b->size + 2; 2110 2111 /* build cache for off array entries formed */ 2112 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2113 b->donotstash = PETSC_FALSE; 2114 b->colmap = PETSC_NULL; 2115 b->garray = PETSC_NULL; 2116 b->roworiented = PETSC_TRUE; 2117 2118 #if defined(PETSC_USE_MAT_SINGLE) 2119 /* stuff for MatSetValues_XXX in single precision */ 2120 b->setvalueslen = 0; 2121 b->setvaluescopy = PETSC_NULL; 2122 #endif 2123 2124 /* stuff used in block assembly */ 2125 b->barray = 0; 2126 2127 /* stuff used for matrix vector multiply */ 2128 b->lvec = 0; 2129 b->Mvctx = 0; 2130 2131 /* stuff for MatGetRow() */ 2132 b->rowindices = 0; 2133 b->rowvalues = 0; 2134 b->getrowactive = PETSC_FALSE; 2135 2136 /* hash table stuff */ 2137 b->ht = 0; 2138 b->hd = 0; 2139 b->ht_size = 0; 2140 b->ht_flag = PETSC_FALSE; 2141 b->ht_fact = 0; 2142 b->ht_total_ct = 0; 2143 b->ht_insert_ct = 0; 2144 2145 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2146 if (flg) { 2147 PetscReal fact = 1.39; 2148 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2149 ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2150 if (fact <= 1.0) fact = 1.39; 2151 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2152 PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2153 } 2154 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2155 "MatStoreValues_MPIBAIJ", 2156 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2157 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2158 "MatRetrieveValues_MPIBAIJ", 2159 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2160 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2161 "MatGetDiagonalBlock_MPIBAIJ", 2162 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2163 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2164 "MatMPIBAIJSetPreallocation_MPIBAIJ", 2165 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2166 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 2167 "MatDiagonalScaleLocal_MPIBAIJ", 2168 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2169 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 2170 "MatSetHashTableFactor_MPIBAIJ", 2171 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2172 PetscFunctionReturn(0); 2173 } 2174 EXTERN_C_END 2175 2176 /*MC 2177 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2178 2179 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2180 and MATMPIBAIJ otherwise. 2181 2182 Options Database Keys: 2183 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2184 2185 Level: beginner 2186 2187 .seealso: MatCreateMPIBAIJ,MATSEQBAIJ,MATMPIBAIJ 2188 M*/ 2189 2190 EXTERN_C_BEGIN 2191 #undef __FUNCT__ 2192 #define __FUNCT__ "MatCreate_BAIJ" 2193 int MatCreate_BAIJ(Mat A) { 2194 int ierr,size; 2195 2196 PetscFunctionBegin; 2197 ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr); 2198 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2199 if (size == 1) { 2200 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2201 } else { 2202 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2203 } 2204 PetscFunctionReturn(0); 2205 } 2206 EXTERN_C_END 2207 2208 #undef __FUNCT__ 2209 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2210 /*@C 2211 MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format 2212 (block compressed row). For good matrix assembly performance 2213 the user should preallocate the matrix storage by setting the parameters 2214 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2215 performance can be increased by more than a factor of 50. 2216 2217 Collective on Mat 2218 2219 Input Parameters: 2220 + A - the matrix 2221 . bs - size of blockk 2222 . d_nz - number of block nonzeros per block row in diagonal portion of local 2223 submatrix (same for all local rows) 2224 . d_nnz - array containing the number of block nonzeros in the various block rows 2225 of the in diagonal portion of the local (possibly different for each block 2226 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2227 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2228 submatrix (same for all local rows). 2229 - o_nnz - array containing the number of nonzeros in the various block rows of the 2230 off-diagonal portion of the local submatrix (possibly different for 2231 each block row) or PETSC_NULL. 2232 2233 Output Parameter: 2234 2235 2236 Options Database Keys: 2237 . -mat_no_unroll - uses code that does not unroll the loops in the 2238 block calculations (much slower) 2239 . -mat_block_size - size of the blocks to use 2240 2241 Notes: 2242 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2243 than it must be used on all processors that share the object for that argument. 2244 2245 Storage Information: 2246 For a square global matrix we define each processor's diagonal portion 2247 to be its local rows and the corresponding columns (a square submatrix); 2248 each processor's off-diagonal portion encompasses the remainder of the 2249 local matrix (a rectangular submatrix). 2250 2251 The user can specify preallocated storage for the diagonal part of 2252 the local submatrix with either d_nz or d_nnz (not both). Set 2253 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2254 memory allocation. Likewise, specify preallocated storage for the 2255 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2256 2257 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2258 the figure below we depict these three local rows and all columns (0-11). 2259 2260 .vb 2261 0 1 2 3 4 5 6 7 8 9 10 11 2262 ------------------- 2263 row 3 | o o o d d d o o o o o o 2264 row 4 | o o o d d d o o o o o o 2265 row 5 | o o o d d d o o o o o o 2266 ------------------- 2267 .ve 2268 2269 Thus, any entries in the d locations are stored in the d (diagonal) 2270 submatrix, and any entries in the o locations are stored in the 2271 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2272 stored simply in the MATSEQBAIJ format for compressed row storage. 2273 2274 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2275 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2276 In general, for PDE problems in which most nonzeros are near the diagonal, 2277 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2278 or you will get TERRIBLE performance; see the users' manual chapter on 2279 matrices. 2280 2281 Level: intermediate 2282 2283 .keywords: matrix, block, aij, compressed row, sparse, parallel 2284 2285 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2286 @*/ 2287 int MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2288 { 2289 int ierr,(*f)(Mat,int,int,const int[],int,const int[]); 2290 2291 PetscFunctionBegin; 2292 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2293 if (f) { 2294 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2295 } 2296 PetscFunctionReturn(0); 2297 } 2298 2299 #undef __FUNCT__ 2300 #define __FUNCT__ "MatCreateMPIBAIJ" 2301 /*@C 2302 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2303 (block compressed row). For good matrix assembly performance 2304 the user should preallocate the matrix storage by setting the parameters 2305 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2306 performance can be increased by more than a factor of 50. 2307 2308 Collective on MPI_Comm 2309 2310 Input Parameters: 2311 + comm - MPI communicator 2312 . bs - size of blockk 2313 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2314 This value should be the same as the local size used in creating the 2315 y vector for the matrix-vector product y = Ax. 2316 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2317 This value should be the same as the local size used in creating the 2318 x vector for the matrix-vector product y = Ax. 2319 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2320 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2321 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2322 submatrix (same for all local rows) 2323 . d_nnz - array containing the number of nonzero blocks in the various block rows 2324 of the in diagonal portion of the local (possibly different for each block 2325 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2326 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2327 submatrix (same for all local rows). 2328 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2329 off-diagonal portion of the local submatrix (possibly different for 2330 each block row) or PETSC_NULL. 2331 2332 Output Parameter: 2333 . A - the matrix 2334 2335 Options Database Keys: 2336 . -mat_no_unroll - uses code that does not unroll the loops in the 2337 block calculations (much slower) 2338 . -mat_block_size - size of the blocks to use 2339 2340 Notes: 2341 A nonzero block is any block that as 1 or more nonzeros in it 2342 2343 The user MUST specify either the local or global matrix dimensions 2344 (possibly both). 2345 2346 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2347 than it must be used on all processors that share the object for that argument. 2348 2349 Storage Information: 2350 For a square global matrix we define each processor's diagonal portion 2351 to be its local rows and the corresponding columns (a square submatrix); 2352 each processor's off-diagonal portion encompasses the remainder of the 2353 local matrix (a rectangular submatrix). 2354 2355 The user can specify preallocated storage for the diagonal part of 2356 the local submatrix with either d_nz or d_nnz (not both). Set 2357 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2358 memory allocation. Likewise, specify preallocated storage for the 2359 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2360 2361 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2362 the figure below we depict these three local rows and all columns (0-11). 2363 2364 .vb 2365 0 1 2 3 4 5 6 7 8 9 10 11 2366 ------------------- 2367 row 3 | o o o d d d o o o o o o 2368 row 4 | o o o d d d o o o o o o 2369 row 5 | o o o d d d o o o o o o 2370 ------------------- 2371 .ve 2372 2373 Thus, any entries in the d locations are stored in the d (diagonal) 2374 submatrix, and any entries in the o locations are stored in the 2375 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2376 stored simply in the MATSEQBAIJ format for compressed row storage. 2377 2378 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2379 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2380 In general, for PDE problems in which most nonzeros are near the diagonal, 2381 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2382 or you will get TERRIBLE performance; see the users' manual chapter on 2383 matrices. 2384 2385 Level: intermediate 2386 2387 .keywords: matrix, block, aij, compressed row, sparse, parallel 2388 2389 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2390 @*/ 2391 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2392 { 2393 int ierr,size; 2394 2395 PetscFunctionBegin; 2396 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2397 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2398 if (size > 1) { 2399 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2400 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2401 } else { 2402 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2403 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2404 } 2405 PetscFunctionReturn(0); 2406 } 2407 2408 #undef __FUNCT__ 2409 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 2410 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2411 { 2412 Mat mat; 2413 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2414 int ierr,len=0; 2415 2416 PetscFunctionBegin; 2417 *newmat = 0; 2418 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2419 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2420 2421 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2422 mat->factor = matin->factor; 2423 mat->preallocated = PETSC_TRUE; 2424 mat->assembled = PETSC_TRUE; 2425 mat->insertmode = NOT_SET_VALUES; 2426 2427 a = (Mat_MPIBAIJ*)mat->data; 2428 a->bs = oldmat->bs; 2429 a->bs2 = oldmat->bs2; 2430 a->mbs = oldmat->mbs; 2431 a->nbs = oldmat->nbs; 2432 a->Mbs = oldmat->Mbs; 2433 a->Nbs = oldmat->Nbs; 2434 2435 a->rstart = oldmat->rstart; 2436 a->rend = oldmat->rend; 2437 a->cstart = oldmat->cstart; 2438 a->cend = oldmat->cend; 2439 a->size = oldmat->size; 2440 a->rank = oldmat->rank; 2441 a->donotstash = oldmat->donotstash; 2442 a->roworiented = oldmat->roworiented; 2443 a->rowindices = 0; 2444 a->rowvalues = 0; 2445 a->getrowactive = PETSC_FALSE; 2446 a->barray = 0; 2447 a->rstart_bs = oldmat->rstart_bs; 2448 a->rend_bs = oldmat->rend_bs; 2449 a->cstart_bs = oldmat->cstart_bs; 2450 a->cend_bs = oldmat->cend_bs; 2451 2452 /* hash table stuff */ 2453 a->ht = 0; 2454 a->hd = 0; 2455 a->ht_size = 0; 2456 a->ht_flag = oldmat->ht_flag; 2457 a->ht_fact = oldmat->ht_fact; 2458 a->ht_total_ct = 0; 2459 a->ht_insert_ct = 0; 2460 2461 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2462 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2463 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2464 if (oldmat->colmap) { 2465 #if defined (PETSC_USE_CTABLE) 2466 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2467 #else 2468 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2469 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2470 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2471 #endif 2472 } else a->colmap = 0; 2473 2474 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2475 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2476 PetscLogObjectMemory(mat,len*sizeof(int)); 2477 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2478 } else a->garray = 0; 2479 2480 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2481 PetscLogObjectParent(mat,a->lvec); 2482 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2483 PetscLogObjectParent(mat,a->Mvctx); 2484 2485 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2486 PetscLogObjectParent(mat,a->A); 2487 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2488 PetscLogObjectParent(mat,a->B); 2489 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2490 *newmat = mat; 2491 2492 PetscFunctionReturn(0); 2493 } 2494 2495 #include "petscsys.h" 2496 2497 #undef __FUNCT__ 2498 #define __FUNCT__ "MatLoad_MPIBAIJ" 2499 int MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 2500 { 2501 Mat A; 2502 int i,nz,ierr,j,rstart,rend,fd; 2503 PetscScalar *vals,*buf; 2504 MPI_Comm comm = ((PetscObject)viewer)->comm; 2505 MPI_Status status; 2506 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2507 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2508 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2509 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2510 int dcount,kmax,k,nzcount,tmp; 2511 2512 PetscFunctionBegin; 2513 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2514 2515 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2516 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2517 if (!rank) { 2518 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2519 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2520 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2521 if (header[3] < 0) { 2522 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ"); 2523 } 2524 } 2525 2526 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2527 M = header[1]; N = header[2]; 2528 2529 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2530 2531 /* 2532 This code adds extra rows to make sure the number of rows is 2533 divisible by the blocksize 2534 */ 2535 Mbs = M/bs; 2536 extra_rows = bs - M + bs*(Mbs); 2537 if (extra_rows == bs) extra_rows = 0; 2538 else Mbs++; 2539 if (extra_rows &&!rank) { 2540 PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2541 } 2542 2543 /* determine ownership of all rows */ 2544 mbs = Mbs/size + ((Mbs % size) > rank); 2545 m = mbs*bs; 2546 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2547 browners = rowners + size + 1; 2548 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2549 rowners[0] = 0; 2550 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2551 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2552 rstart = rowners[rank]; 2553 rend = rowners[rank+1]; 2554 2555 /* distribute row lengths to all processors */ 2556 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2557 if (!rank) { 2558 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2559 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2560 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2561 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2562 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2563 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2564 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2565 } else { 2566 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2567 } 2568 2569 if (!rank) { 2570 /* calculate the number of nonzeros on each processor */ 2571 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2572 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2573 for (i=0; i<size; i++) { 2574 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2575 procsnz[i] += rowlengths[j]; 2576 } 2577 } 2578 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2579 2580 /* determine max buffer needed and allocate it */ 2581 maxnz = 0; 2582 for (i=0; i<size; i++) { 2583 maxnz = PetscMax(maxnz,procsnz[i]); 2584 } 2585 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2586 2587 /* read in my part of the matrix column indices */ 2588 nz = procsnz[0]; 2589 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2590 mycols = ibuf; 2591 if (size == 1) nz -= extra_rows; 2592 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2593 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2594 2595 /* read in every ones (except the last) and ship off */ 2596 for (i=1; i<size-1; i++) { 2597 nz = procsnz[i]; 2598 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2599 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2600 } 2601 /* read in the stuff for the last proc */ 2602 if (size != 1) { 2603 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2604 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2605 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2606 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2607 } 2608 ierr = PetscFree(cols);CHKERRQ(ierr); 2609 } else { 2610 /* determine buffer space needed for message */ 2611 nz = 0; 2612 for (i=0; i<m; i++) { 2613 nz += locrowlens[i]; 2614 } 2615 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2616 mycols = ibuf; 2617 /* receive message of column indices*/ 2618 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2619 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2620 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2621 } 2622 2623 /* loop over local rows, determining number of off diagonal entries */ 2624 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2625 odlens = dlens + (rend-rstart); 2626 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2627 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2628 masked1 = mask + Mbs; 2629 masked2 = masked1 + Mbs; 2630 rowcount = 0; nzcount = 0; 2631 for (i=0; i<mbs; i++) { 2632 dcount = 0; 2633 odcount = 0; 2634 for (j=0; j<bs; j++) { 2635 kmax = locrowlens[rowcount]; 2636 for (k=0; k<kmax; k++) { 2637 tmp = mycols[nzcount++]/bs; 2638 if (!mask[tmp]) { 2639 mask[tmp] = 1; 2640 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2641 else masked1[dcount++] = tmp; 2642 } 2643 } 2644 rowcount++; 2645 } 2646 2647 dlens[i] = dcount; 2648 odlens[i] = odcount; 2649 2650 /* zero out the mask elements we set */ 2651 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2652 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2653 } 2654 2655 /* create our matrix */ 2656 ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr); 2657 ierr = MatSetType(A,type);CHKERRQ(ierr) 2658 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2659 2660 /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */ 2661 MatSetOption(A,MAT_COLUMNS_SORTED); 2662 2663 if (!rank) { 2664 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2665 /* read in my part of the matrix numerical values */ 2666 nz = procsnz[0]; 2667 vals = buf; 2668 mycols = ibuf; 2669 if (size == 1) nz -= extra_rows; 2670 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2671 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2672 2673 /* insert into matrix */ 2674 jj = rstart*bs; 2675 for (i=0; i<m; i++) { 2676 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2677 mycols += locrowlens[i]; 2678 vals += locrowlens[i]; 2679 jj++; 2680 } 2681 /* read in other processors (except the last one) and ship out */ 2682 for (i=1; i<size-1; i++) { 2683 nz = procsnz[i]; 2684 vals = buf; 2685 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2686 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2687 } 2688 /* the last proc */ 2689 if (size != 1){ 2690 nz = procsnz[i] - extra_rows; 2691 vals = buf; 2692 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2693 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2694 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2695 } 2696 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2697 } else { 2698 /* receive numeric values */ 2699 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2700 2701 /* receive message of values*/ 2702 vals = buf; 2703 mycols = ibuf; 2704 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2705 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2706 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2707 2708 /* insert into matrix */ 2709 jj = rstart*bs; 2710 for (i=0; i<m; i++) { 2711 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2712 mycols += locrowlens[i]; 2713 vals += locrowlens[i]; 2714 jj++; 2715 } 2716 } 2717 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2718 ierr = PetscFree(buf);CHKERRQ(ierr); 2719 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2720 ierr = PetscFree(rowners);CHKERRQ(ierr); 2721 ierr = PetscFree(dlens);CHKERRQ(ierr); 2722 ierr = PetscFree(mask);CHKERRQ(ierr); 2723 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2724 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2725 2726 *newmat = A; 2727 PetscFunctionReturn(0); 2728 } 2729 2730 #undef __FUNCT__ 2731 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2732 /*@ 2733 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2734 2735 Input Parameters: 2736 . mat - the matrix 2737 . fact - factor 2738 2739 Collective on Mat 2740 2741 Level: advanced 2742 2743 Notes: 2744 This can also be set by the command line option: -mat_use_hash_table fact 2745 2746 .keywords: matrix, hashtable, factor, HT 2747 2748 .seealso: MatSetOption() 2749 @*/ 2750 int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2751 { 2752 int ierr,(*f)(Mat,PetscReal); 2753 2754 PetscFunctionBegin; 2755 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 2756 if (f) { 2757 ierr = (*f)(mat,fact);CHKERRQ(ierr); 2758 } 2759 PetscFunctionReturn(0); 2760 } 2761 2762 #undef __FUNCT__ 2763 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 2764 int MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 2765 { 2766 Mat_MPIBAIJ *baij; 2767 2768 PetscFunctionBegin; 2769 baij = (Mat_MPIBAIJ*)mat->data; 2770 baij->ht_fact = fact; 2771 PetscFunctionReturn(0); 2772 } 2773 2774 #undef __FUNCT__ 2775 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2776 int MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2777 { 2778 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2779 PetscFunctionBegin; 2780 *Ad = a->A; 2781 *Ao = a->B; 2782 *colmap = a->garray; 2783 PetscFunctionReturn(0); 2784 } 2785