1 2 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 3 4 EXTERN int MatSetUpMultiply_MPIBAIJ(Mat); 5 EXTERN int DisAssemble_MPIBAIJ(Mat); 6 EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS[],int); 7 EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat *[]); 8 EXTERN int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int [],PetscScalar []); 9 EXTERN int MatSetValues_SeqBAIJ(Mat,int,const int[],int,const int [],const PetscScalar [],InsertMode); 10 EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode); 11 EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]); 12 EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]); 13 EXTERN int MatPrintHelp_SeqBAIJ(Mat); 14 EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*); 15 16 /* UGLY, ugly, ugly 17 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 18 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 19 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 20 converts the entries into single precision and then calls ..._MatScalar() to put them 21 into the single precision data structures. 22 */ 23 #if defined(PETSC_USE_MAT_SINGLE) 24 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 25 EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 26 EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 27 EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 28 EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 29 #else 30 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 31 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 32 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 33 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 34 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 35 #endif 36 37 #undef __FUNCT__ 38 #define __FUNCT__ "MatGetRowMax_MPIBAIJ" 39 int MatGetRowMax_MPIBAIJ(Mat A,Vec v) 40 { 41 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 42 int ierr,i; 43 PetscScalar *va,*vb; 44 Vec vtmp; 45 46 PetscFunctionBegin; 47 48 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 49 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 50 51 ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr); 52 ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 53 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 54 55 for (i=0; i<A->m; i++){ 56 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 57 } 58 59 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 60 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 61 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 62 63 PetscFunctionReturn(0); 64 } 65 66 EXTERN_C_BEGIN 67 #undef __FUNCT__ 68 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 69 int MatStoreValues_MPIBAIJ(Mat mat) 70 { 71 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 72 int ierr; 73 74 PetscFunctionBegin; 75 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 76 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 77 PetscFunctionReturn(0); 78 } 79 EXTERN_C_END 80 81 EXTERN_C_BEGIN 82 #undef __FUNCT__ 83 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 84 int MatRetrieveValues_MPIBAIJ(Mat mat) 85 { 86 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 87 int ierr; 88 89 PetscFunctionBegin; 90 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 91 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 92 PetscFunctionReturn(0); 93 } 94 EXTERN_C_END 95 96 /* 97 Local utility routine that creates a mapping from the global column 98 number to the local number in the off-diagonal part of the local 99 storage of the matrix. This is done in a non scable way since the 100 length of colmap equals the global matrix length. 101 */ 102 #undef __FUNCT__ 103 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 104 int CreateColmap_MPIBAIJ_Private(Mat mat) 105 { 106 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 107 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 108 int nbs = B->nbs,i,bs=B->bs,ierr; 109 110 PetscFunctionBegin; 111 #if defined (PETSC_USE_CTABLE) 112 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 113 for (i=0; i<nbs; i++){ 114 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 115 } 116 #else 117 ierr = PetscMalloc((baij->Nbs+1)*sizeof(int),&baij->colmap);CHKERRQ(ierr); 118 PetscLogObjectMemory(mat,baij->Nbs*sizeof(int)); 119 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 120 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 121 #endif 122 PetscFunctionReturn(0); 123 } 124 125 #define CHUNKSIZE 10 126 127 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 128 { \ 129 \ 130 brow = row/bs; \ 131 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 132 rmax = aimax[brow]; nrow = ailen[brow]; \ 133 bcol = col/bs; \ 134 ridx = row % bs; cidx = col % bs; \ 135 low = 0; high = nrow; \ 136 while (high-low > 3) { \ 137 t = (low+high)/2; \ 138 if (rp[t] > bcol) high = t; \ 139 else low = t; \ 140 } \ 141 for (_i=low; _i<high; _i++) { \ 142 if (rp[_i] > bcol) break; \ 143 if (rp[_i] == bcol) { \ 144 bap = ap + bs2*_i + bs*cidx + ridx; \ 145 if (addv == ADD_VALUES) *bap += value; \ 146 else *bap = value; \ 147 goto a_noinsert; \ 148 } \ 149 } \ 150 if (a->nonew == 1) goto a_noinsert; \ 151 else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 152 if (nrow >= rmax) { \ 153 /* there is no extra room in row, therefore enlarge */ \ 154 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 155 MatScalar *new_a; \ 156 \ 157 if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 158 \ 159 /* malloc new storage space */ \ 160 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 161 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 162 new_j = (int*)(new_a + bs2*new_nz); \ 163 new_i = new_j + new_nz; \ 164 \ 165 /* copy over old data into new slots */ \ 166 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 167 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 168 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 169 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 170 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 171 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 172 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 173 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 174 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 175 /* free up old matrix storage */ \ 176 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 177 if (!a->singlemalloc) { \ 178 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 179 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 180 } \ 181 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 182 a->singlemalloc = PETSC_TRUE; \ 183 \ 184 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 185 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 186 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 187 a->maxnz += bs2*CHUNKSIZE; \ 188 a->reallocs++; \ 189 a->nz++; \ 190 } \ 191 N = nrow++ - 1; \ 192 /* shift up all the later entries in this row */ \ 193 for (ii=N; ii>=_i; ii--) { \ 194 rp[ii+1] = rp[ii]; \ 195 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 196 } \ 197 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 198 rp[_i] = bcol; \ 199 ap[bs2*_i + bs*cidx + ridx] = value; \ 200 a_noinsert:; \ 201 ailen[brow] = nrow; \ 202 } 203 204 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 205 { \ 206 brow = row/bs; \ 207 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 208 rmax = bimax[brow]; nrow = bilen[brow]; \ 209 bcol = col/bs; \ 210 ridx = row % bs; cidx = col % bs; \ 211 low = 0; high = nrow; \ 212 while (high-low > 3) { \ 213 t = (low+high)/2; \ 214 if (rp[t] > bcol) high = t; \ 215 else low = t; \ 216 } \ 217 for (_i=low; _i<high; _i++) { \ 218 if (rp[_i] > bcol) break; \ 219 if (rp[_i] == bcol) { \ 220 bap = ap + bs2*_i + bs*cidx + ridx; \ 221 if (addv == ADD_VALUES) *bap += value; \ 222 else *bap = value; \ 223 goto b_noinsert; \ 224 } \ 225 } \ 226 if (b->nonew == 1) goto b_noinsert; \ 227 else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 228 if (nrow >= rmax) { \ 229 /* there is no extra room in row, therefore enlarge */ \ 230 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 231 MatScalar *new_a; \ 232 \ 233 if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 234 \ 235 /* malloc new storage space */ \ 236 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 237 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 238 new_j = (int*)(new_a + bs2*new_nz); \ 239 new_i = new_j + new_nz; \ 240 \ 241 /* copy over old data into new slots */ \ 242 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 243 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 244 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 245 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 246 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 247 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 248 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 249 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 250 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 251 /* free up old matrix storage */ \ 252 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 253 if (!b->singlemalloc) { \ 254 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 255 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 256 } \ 257 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 258 b->singlemalloc = PETSC_TRUE; \ 259 \ 260 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 261 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 262 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 263 b->maxnz += bs2*CHUNKSIZE; \ 264 b->reallocs++; \ 265 b->nz++; \ 266 } \ 267 N = nrow++ - 1; \ 268 /* shift up all the later entries in this row */ \ 269 for (ii=N; ii>=_i; ii--) { \ 270 rp[ii+1] = rp[ii]; \ 271 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 272 } \ 273 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 274 rp[_i] = bcol; \ 275 ap[bs2*_i + bs*cidx + ridx] = value; \ 276 b_noinsert:; \ 277 bilen[brow] = nrow; \ 278 } 279 280 #if defined(PETSC_USE_MAT_SINGLE) 281 #undef __FUNCT__ 282 #define __FUNCT__ "MatSetValues_MPIBAIJ" 283 int MatSetValues_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 284 { 285 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 286 int ierr,i,N = m*n; 287 MatScalar *vsingle; 288 289 PetscFunctionBegin; 290 if (N > b->setvalueslen) { 291 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 292 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 293 b->setvalueslen = N; 294 } 295 vsingle = b->setvaluescopy; 296 297 for (i=0; i<N; i++) { 298 vsingle[i] = v[i]; 299 } 300 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 301 PetscFunctionReturn(0); 302 } 303 304 #undef __FUNCT__ 305 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 306 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 307 { 308 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 309 int ierr,i,N = m*n*b->bs2; 310 MatScalar *vsingle; 311 312 PetscFunctionBegin; 313 if (N > b->setvalueslen) { 314 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 315 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 316 b->setvalueslen = N; 317 } 318 vsingle = b->setvaluescopy; 319 for (i=0; i<N; i++) { 320 vsingle[i] = v[i]; 321 } 322 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 323 PetscFunctionReturn(0); 324 } 325 326 #undef __FUNCT__ 327 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 328 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 329 { 330 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 331 int ierr,i,N = m*n; 332 MatScalar *vsingle; 333 334 PetscFunctionBegin; 335 if (N > b->setvalueslen) { 336 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 337 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 338 b->setvalueslen = N; 339 } 340 vsingle = b->setvaluescopy; 341 for (i=0; i<N; i++) { 342 vsingle[i] = v[i]; 343 } 344 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 345 PetscFunctionReturn(0); 346 } 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 350 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 351 { 352 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 353 int ierr,i,N = m*n*b->bs2; 354 MatScalar *vsingle; 355 356 PetscFunctionBegin; 357 if (N > b->setvalueslen) { 358 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 359 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 360 b->setvalueslen = N; 361 } 362 vsingle = b->setvaluescopy; 363 for (i=0; i<N; i++) { 364 vsingle[i] = v[i]; 365 } 366 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 367 PetscFunctionReturn(0); 368 } 369 #endif 370 371 #undef __FUNCT__ 372 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 373 int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 374 { 375 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 376 MatScalar value; 377 PetscTruth roworiented = baij->roworiented; 378 int ierr,i,j,row,col; 379 int rstart_orig=baij->rstart_bs; 380 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 381 int cend_orig=baij->cend_bs,bs=baij->bs; 382 383 /* Some Variables required in the macro */ 384 Mat A = baij->A; 385 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 386 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 387 MatScalar *aa=a->a; 388 389 Mat B = baij->B; 390 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 391 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 392 MatScalar *ba=b->a; 393 394 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 395 int low,high,t,ridx,cidx,bs2=a->bs2; 396 MatScalar *ap,*bap; 397 398 PetscFunctionBegin; 399 for (i=0; i<m; i++) { 400 if (im[i] < 0) continue; 401 #if defined(PETSC_USE_BOPT_g) 402 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 403 #endif 404 if (im[i] >= rstart_orig && im[i] < rend_orig) { 405 row = im[i] - rstart_orig; 406 for (j=0; j<n; j++) { 407 if (in[j] >= cstart_orig && in[j] < cend_orig){ 408 col = in[j] - cstart_orig; 409 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 410 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 411 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 412 } else if (in[j] < 0) continue; 413 #if defined(PETSC_USE_BOPT_g) 414 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[i],mat->N-1);} 415 #endif 416 else { 417 if (mat->was_assembled) { 418 if (!baij->colmap) { 419 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 420 } 421 #if defined (PETSC_USE_CTABLE) 422 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 423 col = col - 1; 424 #else 425 col = baij->colmap[in[j]/bs] - 1; 426 #endif 427 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 428 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 429 col = in[j]; 430 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 431 B = baij->B; 432 b = (Mat_SeqBAIJ*)(B)->data; 433 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 434 ba=b->a; 435 } else col += in[j]%bs; 436 } else col = in[j]; 437 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 438 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 439 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 440 } 441 } 442 } else { 443 if (!baij->donotstash) { 444 if (roworiented) { 445 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 446 } else { 447 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 448 } 449 } 450 } 451 } 452 PetscFunctionReturn(0); 453 } 454 455 #undef __FUNCT__ 456 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 457 int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 458 { 459 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 460 const MatScalar *value; 461 MatScalar *barray=baij->barray; 462 PetscTruth roworiented = baij->roworiented; 463 int ierr,i,j,ii,jj,row,col,rstart=baij->rstart; 464 int rend=baij->rend,cstart=baij->cstart,stepval; 465 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 466 467 PetscFunctionBegin; 468 if(!barray) { 469 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 470 baij->barray = barray; 471 } 472 473 if (roworiented) { 474 stepval = (n-1)*bs; 475 } else { 476 stepval = (m-1)*bs; 477 } 478 for (i=0; i<m; i++) { 479 if (im[i] < 0) continue; 480 #if defined(PETSC_USE_BOPT_g) 481 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1); 482 #endif 483 if (im[i] >= rstart && im[i] < rend) { 484 row = im[i] - rstart; 485 for (j=0; j<n; j++) { 486 /* If NumCol = 1 then a copy is not required */ 487 if ((roworiented) && (n == 1)) { 488 barray = (MatScalar*)v + i*bs2; 489 } else if((!roworiented) && (m == 1)) { 490 barray = (MatScalar*)v + j*bs2; 491 } else { /* Here a copy is required */ 492 if (roworiented) { 493 value = v + i*(stepval+bs)*bs + j*bs; 494 } else { 495 value = v + j*(stepval+bs)*bs + i*bs; 496 } 497 for (ii=0; ii<bs; ii++,value+=stepval) { 498 for (jj=0; jj<bs; jj++) { 499 *barray++ = *value++; 500 } 501 } 502 barray -=bs2; 503 } 504 505 if (in[j] >= cstart && in[j] < cend){ 506 col = in[j] - cstart; 507 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 508 } 509 else if (in[j] < 0) continue; 510 #if defined(PETSC_USE_BOPT_g) 511 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);} 512 #endif 513 else { 514 if (mat->was_assembled) { 515 if (!baij->colmap) { 516 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 517 } 518 519 #if defined(PETSC_USE_BOPT_g) 520 #if defined (PETSC_USE_CTABLE) 521 { int data; 522 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 523 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 524 } 525 #else 526 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 527 #endif 528 #endif 529 #if defined (PETSC_USE_CTABLE) 530 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 531 col = (col - 1)/bs; 532 #else 533 col = (baij->colmap[in[j]] - 1)/bs; 534 #endif 535 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 536 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 537 col = in[j]; 538 } 539 } 540 else col = in[j]; 541 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 542 } 543 } 544 } else { 545 if (!baij->donotstash) { 546 if (roworiented) { 547 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 548 } else { 549 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 550 } 551 } 552 } 553 } 554 PetscFunctionReturn(0); 555 } 556 557 #define HASH_KEY 0.6180339887 558 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 559 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 560 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 563 int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 564 { 565 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 566 PetscTruth roworiented = baij->roworiented; 567 int ierr,i,j,row,col; 568 int rstart_orig=baij->rstart_bs; 569 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 570 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 571 PetscReal tmp; 572 MatScalar **HD = baij->hd,value; 573 #if defined(PETSC_USE_BOPT_g) 574 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 575 #endif 576 577 PetscFunctionBegin; 578 579 for (i=0; i<m; i++) { 580 #if defined(PETSC_USE_BOPT_g) 581 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 582 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 583 #endif 584 row = im[i]; 585 if (row >= rstart_orig && row < rend_orig) { 586 for (j=0; j<n; j++) { 587 col = in[j]; 588 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 589 /* Look up into the Hash Table */ 590 key = (row/bs)*Nbs+(col/bs)+1; 591 h1 = HASH(size,key,tmp); 592 593 594 idx = h1; 595 #if defined(PETSC_USE_BOPT_g) 596 insert_ct++; 597 total_ct++; 598 if (HT[idx] != key) { 599 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 600 if (idx == size) { 601 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 602 if (idx == h1) { 603 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col); 604 } 605 } 606 } 607 #else 608 if (HT[idx] != key) { 609 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 610 if (idx == size) { 611 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 612 if (idx == h1) { 613 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col); 614 } 615 } 616 } 617 #endif 618 /* A HASH table entry is found, so insert the values at the correct address */ 619 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 620 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 621 } 622 } else { 623 if (!baij->donotstash) { 624 if (roworiented) { 625 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 626 } else { 627 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 628 } 629 } 630 } 631 } 632 #if defined(PETSC_USE_BOPT_g) 633 baij->ht_total_ct = total_ct; 634 baij->ht_insert_ct = insert_ct; 635 #endif 636 PetscFunctionReturn(0); 637 } 638 639 #undef __FUNCT__ 640 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 641 int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 642 { 643 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 644 PetscTruth roworiented = baij->roworiented; 645 int ierr,i,j,ii,jj,row,col; 646 int rstart=baij->rstart ; 647 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 648 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 649 PetscReal tmp; 650 MatScalar **HD = baij->hd,*baij_a; 651 const MatScalar *v_t,*value; 652 #if defined(PETSC_USE_BOPT_g) 653 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 654 #endif 655 656 PetscFunctionBegin; 657 658 if (roworiented) { 659 stepval = (n-1)*bs; 660 } else { 661 stepval = (m-1)*bs; 662 } 663 for (i=0; i<m; i++) { 664 #if defined(PETSC_USE_BOPT_g) 665 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",im[i]); 666 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],baij->Mbs-1); 667 #endif 668 row = im[i]; 669 v_t = v + i*bs2; 670 if (row >= rstart && row < rend) { 671 for (j=0; j<n; j++) { 672 col = in[j]; 673 674 /* Look up into the Hash Table */ 675 key = row*Nbs+col+1; 676 h1 = HASH(size,key,tmp); 677 678 idx = h1; 679 #if defined(PETSC_USE_BOPT_g) 680 total_ct++; 681 insert_ct++; 682 if (HT[idx] != key) { 683 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 684 if (idx == size) { 685 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 686 if (idx == h1) { 687 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col); 688 } 689 } 690 } 691 #else 692 if (HT[idx] != key) { 693 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 694 if (idx == size) { 695 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 696 if (idx == h1) { 697 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col); 698 } 699 } 700 } 701 #endif 702 baij_a = HD[idx]; 703 if (roworiented) { 704 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 705 /* value = v + (i*(stepval+bs)+j)*bs; */ 706 value = v_t; 707 v_t += bs; 708 if (addv == ADD_VALUES) { 709 for (ii=0; ii<bs; ii++,value+=stepval) { 710 for (jj=ii; jj<bs2; jj+=bs) { 711 baij_a[jj] += *value++; 712 } 713 } 714 } else { 715 for (ii=0; ii<bs; ii++,value+=stepval) { 716 for (jj=ii; jj<bs2; jj+=bs) { 717 baij_a[jj] = *value++; 718 } 719 } 720 } 721 } else { 722 value = v + j*(stepval+bs)*bs + i*bs; 723 if (addv == ADD_VALUES) { 724 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 725 for (jj=0; jj<bs; jj++) { 726 baij_a[jj] += *value++; 727 } 728 } 729 } else { 730 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 731 for (jj=0; jj<bs; jj++) { 732 baij_a[jj] = *value++; 733 } 734 } 735 } 736 } 737 } 738 } else { 739 if (!baij->donotstash) { 740 if (roworiented) { 741 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 742 } else { 743 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 744 } 745 } 746 } 747 } 748 #if defined(PETSC_USE_BOPT_g) 749 baij->ht_total_ct = total_ct; 750 baij->ht_insert_ct = insert_ct; 751 #endif 752 PetscFunctionReturn(0); 753 } 754 755 #undef __FUNCT__ 756 #define __FUNCT__ "MatGetValues_MPIBAIJ" 757 int MatGetValues_MPIBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 758 { 759 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 760 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 761 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 762 763 PetscFunctionBegin; 764 for (i=0; i<m; i++) { 765 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 766 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 767 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 768 row = idxm[i] - bsrstart; 769 for (j=0; j<n; j++) { 770 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 771 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 772 if (idxn[j] >= bscstart && idxn[j] < bscend){ 773 col = idxn[j] - bscstart; 774 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 775 } else { 776 if (!baij->colmap) { 777 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 778 } 779 #if defined (PETSC_USE_CTABLE) 780 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 781 data --; 782 #else 783 data = baij->colmap[idxn[j]/bs]-1; 784 #endif 785 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 786 else { 787 col = data + idxn[j]%bs; 788 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 789 } 790 } 791 } 792 } else { 793 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 794 } 795 } 796 PetscFunctionReturn(0); 797 } 798 799 #undef __FUNCT__ 800 #define __FUNCT__ "MatNorm_MPIBAIJ" 801 int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 802 { 803 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 804 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 805 int ierr,i,bs2=baij->bs2; 806 PetscReal sum = 0.0; 807 MatScalar *v; 808 809 PetscFunctionBegin; 810 if (baij->size == 1) { 811 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 812 } else { 813 if (type == NORM_FROBENIUS) { 814 v = amat->a; 815 for (i=0; i<amat->nz*bs2; i++) { 816 #if defined(PETSC_USE_COMPLEX) 817 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 818 #else 819 sum += (*v)*(*v); v++; 820 #endif 821 } 822 v = bmat->a; 823 for (i=0; i<bmat->nz*bs2; i++) { 824 #if defined(PETSC_USE_COMPLEX) 825 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 826 #else 827 sum += (*v)*(*v); v++; 828 #endif 829 } 830 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 831 *nrm = sqrt(*nrm); 832 } else { 833 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 834 } 835 } 836 PetscFunctionReturn(0); 837 } 838 839 840 /* 841 Creates the hash table, and sets the table 842 This table is created only once. 843 If new entried need to be added to the matrix 844 then the hash table has to be destroyed and 845 recreated. 846 */ 847 #undef __FUNCT__ 848 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 849 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 850 { 851 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 852 Mat A = baij->A,B=baij->B; 853 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 854 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 855 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 856 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 857 int *HT,key; 858 MatScalar **HD; 859 PetscReal tmp; 860 #if defined(PETSC_USE_BOPT_g) 861 int ct=0,max=0; 862 #endif 863 864 PetscFunctionBegin; 865 baij->ht_size=(int)(factor*nz); 866 size = baij->ht_size; 867 868 if (baij->ht) { 869 PetscFunctionReturn(0); 870 } 871 872 /* Allocate Memory for Hash Table */ 873 ierr = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 874 baij->ht = (int*)(baij->hd + size); 875 HD = baij->hd; 876 HT = baij->ht; 877 878 879 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(PetscScalar*)));CHKERRQ(ierr); 880 881 882 /* Loop Over A */ 883 for (i=0; i<a->mbs; i++) { 884 for (j=ai[i]; j<ai[i+1]; j++) { 885 row = i+rstart; 886 col = aj[j]+cstart; 887 888 key = row*Nbs + col + 1; 889 h1 = HASH(size,key,tmp); 890 for (k=0; k<size; k++){ 891 if (!HT[(h1+k)%size]) { 892 HT[(h1+k)%size] = key; 893 HD[(h1+k)%size] = a->a + j*bs2; 894 break; 895 #if defined(PETSC_USE_BOPT_g) 896 } else { 897 ct++; 898 #endif 899 } 900 } 901 #if defined(PETSC_USE_BOPT_g) 902 if (k> max) max = k; 903 #endif 904 } 905 } 906 /* Loop Over B */ 907 for (i=0; i<b->mbs; i++) { 908 for (j=bi[i]; j<bi[i+1]; j++) { 909 row = i+rstart; 910 col = garray[bj[j]]; 911 key = row*Nbs + col + 1; 912 h1 = HASH(size,key,tmp); 913 for (k=0; k<size; k++){ 914 if (!HT[(h1+k)%size]) { 915 HT[(h1+k)%size] = key; 916 HD[(h1+k)%size] = b->a + j*bs2; 917 break; 918 #if defined(PETSC_USE_BOPT_g) 919 } else { 920 ct++; 921 #endif 922 } 923 } 924 #if defined(PETSC_USE_BOPT_g) 925 if (k> max) max = k; 926 #endif 927 } 928 } 929 930 /* Print Summary */ 931 #if defined(PETSC_USE_BOPT_g) 932 for (i=0,j=0; i<size; i++) { 933 if (HT[i]) {j++;} 934 } 935 PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max); 936 #endif 937 PetscFunctionReturn(0); 938 } 939 940 #undef __FUNCT__ 941 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 942 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 943 { 944 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 945 int ierr,nstash,reallocs; 946 InsertMode addv; 947 948 PetscFunctionBegin; 949 if (baij->donotstash) { 950 PetscFunctionReturn(0); 951 } 952 953 /* make sure all processors are either in INSERTMODE or ADDMODE */ 954 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 955 if (addv == (ADD_VALUES|INSERT_VALUES)) { 956 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 957 } 958 mat->insertmode = addv; /* in case this processor had no cache */ 959 960 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 961 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 962 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 963 PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 964 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 965 PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 966 PetscFunctionReturn(0); 967 } 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 971 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 972 { 973 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 974 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 975 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 976 int *row,*col,other_disassembled; 977 PetscTruth r1,r2,r3; 978 MatScalar *val; 979 InsertMode addv = mat->insertmode; 980 981 PetscFunctionBegin; 982 if (!baij->donotstash) { 983 while (1) { 984 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 985 if (!flg) break; 986 987 for (i=0; i<n;) { 988 /* Now identify the consecutive vals belonging to the same row */ 989 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 990 if (j < n) ncols = j-i; 991 else ncols = n-i; 992 /* Now assemble all these values with a single function call */ 993 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 994 i = j; 995 } 996 } 997 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 998 /* Now process the block-stash. Since the values are stashed column-oriented, 999 set the roworiented flag to column oriented, and after MatSetValues() 1000 restore the original flags */ 1001 r1 = baij->roworiented; 1002 r2 = a->roworiented; 1003 r3 = b->roworiented; 1004 baij->roworiented = PETSC_FALSE; 1005 a->roworiented = PETSC_FALSE; 1006 b->roworiented = PETSC_FALSE; 1007 while (1) { 1008 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1009 if (!flg) break; 1010 1011 for (i=0; i<n;) { 1012 /* Now identify the consecutive vals belonging to the same row */ 1013 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1014 if (j < n) ncols = j-i; 1015 else ncols = n-i; 1016 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1017 i = j; 1018 } 1019 } 1020 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1021 baij->roworiented = r1; 1022 a->roworiented = r2; 1023 b->roworiented = r3; 1024 } 1025 1026 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1027 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1028 1029 /* determine if any processor has disassembled, if so we must 1030 also disassemble ourselfs, in order that we may reassemble. */ 1031 /* 1032 if nonzero structure of submatrix B cannot change then we know that 1033 no processor disassembled thus we can skip this stuff 1034 */ 1035 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1036 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1037 if (mat->was_assembled && !other_disassembled) { 1038 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1039 } 1040 } 1041 1042 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1043 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1044 } 1045 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1046 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1047 1048 #if defined(PETSC_USE_BOPT_g) 1049 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1050 PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 1051 baij->ht_total_ct = 0; 1052 baij->ht_insert_ct = 0; 1053 } 1054 #endif 1055 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1056 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1057 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1058 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1059 } 1060 1061 if (baij->rowvalues) { 1062 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1063 baij->rowvalues = 0; 1064 } 1065 PetscFunctionReturn(0); 1066 } 1067 1068 #undef __FUNCT__ 1069 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1070 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1071 { 1072 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1073 int ierr,bs = baij->bs,size = baij->size,rank = baij->rank; 1074 PetscTruth iascii,isdraw; 1075 PetscViewer sviewer; 1076 PetscViewerFormat format; 1077 1078 PetscFunctionBegin; 1079 /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */ 1080 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1081 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1082 if (iascii) { 1083 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1084 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1085 MatInfo info; 1086 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1087 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1088 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 1089 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 1090 baij->bs,(int)info.memory);CHKERRQ(ierr); 1091 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1092 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1093 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1094 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1095 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1096 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1097 PetscFunctionReturn(0); 1098 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1099 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1102 PetscFunctionReturn(0); 1103 } 1104 } 1105 1106 if (isdraw) { 1107 PetscDraw draw; 1108 PetscTruth isnull; 1109 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1110 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1111 } 1112 1113 if (size == 1) { 1114 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 1115 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1116 } else { 1117 /* assemble the entire matrix onto first processor. */ 1118 Mat A; 1119 Mat_SeqBAIJ *Aloc; 1120 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1121 MatScalar *a; 1122 1123 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1124 /* Perhaps this should be the type of mat? */ 1125 if (!rank) { 1126 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 1127 } else { 1128 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 1129 } 1130 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1131 ierr = MatMPIBAIJSetPreallocation(A,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1132 PetscLogObjectParent(mat,A); 1133 1134 /* copy over the A part */ 1135 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1136 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1137 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 1138 1139 for (i=0; i<mbs; i++) { 1140 rvals[0] = bs*(baij->rstart + i); 1141 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1142 for (j=ai[i]; j<ai[i+1]; j++) { 1143 col = (baij->cstart+aj[j])*bs; 1144 for (k=0; k<bs; k++) { 1145 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1146 col++; a += bs; 1147 } 1148 } 1149 } 1150 /* copy over the B part */ 1151 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1152 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1153 for (i=0; i<mbs; i++) { 1154 rvals[0] = bs*(baij->rstart + i); 1155 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1156 for (j=ai[i]; j<ai[i+1]; j++) { 1157 col = baij->garray[aj[j]]*bs; 1158 for (k=0; k<bs; k++) { 1159 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1160 col++; a += bs; 1161 } 1162 } 1163 } 1164 ierr = PetscFree(rvals);CHKERRQ(ierr); 1165 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1166 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1167 /* 1168 Everyone has to call to draw the matrix since the graphics waits are 1169 synchronized across all processors that share the PetscDraw object 1170 */ 1171 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1172 if (!rank) { 1173 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1174 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1175 } 1176 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1177 ierr = MatDestroy(A);CHKERRQ(ierr); 1178 } 1179 PetscFunctionReturn(0); 1180 } 1181 1182 #undef __FUNCT__ 1183 #define __FUNCT__ "MatView_MPIBAIJ" 1184 int MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1185 { 1186 int ierr; 1187 PetscTruth iascii,isdraw,issocket,isbinary; 1188 1189 PetscFunctionBegin; 1190 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1191 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1192 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1193 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1194 if (iascii || isdraw || issocket || isbinary) { 1195 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1196 } else { 1197 SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1198 } 1199 PetscFunctionReturn(0); 1200 } 1201 1202 #undef __FUNCT__ 1203 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1204 int MatDestroy_MPIBAIJ(Mat mat) 1205 { 1206 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1207 int ierr; 1208 1209 PetscFunctionBegin; 1210 #if defined(PETSC_USE_LOG) 1211 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 1212 #endif 1213 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1214 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1215 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1216 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1217 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1218 #if defined (PETSC_USE_CTABLE) 1219 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1220 #else 1221 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1222 #endif 1223 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1224 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1225 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1226 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1227 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1228 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1229 #if defined(PETSC_USE_MAT_SINGLE) 1230 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 1231 #endif 1232 ierr = PetscFree(baij);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "MatMult_MPIBAIJ" 1238 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1239 { 1240 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1241 int ierr,nt; 1242 1243 PetscFunctionBegin; 1244 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1245 if (nt != A->n) { 1246 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1247 } 1248 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1249 if (nt != A->m) { 1250 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1251 } 1252 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1253 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1254 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1255 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1256 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1257 PetscFunctionReturn(0); 1258 } 1259 1260 #undef __FUNCT__ 1261 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1262 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1263 { 1264 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1265 int ierr; 1266 1267 PetscFunctionBegin; 1268 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1269 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1270 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1271 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1272 PetscFunctionReturn(0); 1273 } 1274 1275 #undef __FUNCT__ 1276 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1277 int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1278 { 1279 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1280 int ierr; 1281 1282 PetscFunctionBegin; 1283 /* do nondiagonal part */ 1284 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1285 /* send it on its way */ 1286 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1287 /* do local part */ 1288 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1289 /* receive remote parts: note this assumes the values are not actually */ 1290 /* inserted in yy until the next line, which is true for my implementation*/ 1291 /* but is not perhaps always true. */ 1292 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1293 PetscFunctionReturn(0); 1294 } 1295 1296 #undef __FUNCT__ 1297 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1298 int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1299 { 1300 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1301 int ierr; 1302 1303 PetscFunctionBegin; 1304 /* do nondiagonal part */ 1305 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1306 /* send it on its way */ 1307 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1308 /* do local part */ 1309 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1310 /* receive remote parts: note this assumes the values are not actually */ 1311 /* inserted in yy until the next line, which is true for my implementation*/ 1312 /* but is not perhaps always true. */ 1313 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1314 PetscFunctionReturn(0); 1315 } 1316 1317 /* 1318 This only works correctly for square matrices where the subblock A->A is the 1319 diagonal block 1320 */ 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1323 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1324 { 1325 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1326 int ierr; 1327 1328 PetscFunctionBegin; 1329 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1330 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1331 PetscFunctionReturn(0); 1332 } 1333 1334 #undef __FUNCT__ 1335 #define __FUNCT__ "MatScale_MPIBAIJ" 1336 int MatScale_MPIBAIJ(const PetscScalar *aa,Mat A) 1337 { 1338 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1339 int ierr; 1340 1341 PetscFunctionBegin; 1342 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1343 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1344 PetscFunctionReturn(0); 1345 } 1346 1347 #undef __FUNCT__ 1348 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1349 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1350 { 1351 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1352 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1353 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1354 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1355 int *cmap,*idx_p,cstart = mat->cstart; 1356 1357 PetscFunctionBegin; 1358 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1359 mat->getrowactive = PETSC_TRUE; 1360 1361 if (!mat->rowvalues && (idx || v)) { 1362 /* 1363 allocate enough space to hold information from the longest row. 1364 */ 1365 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1366 int max = 1,mbs = mat->mbs,tmp; 1367 for (i=0; i<mbs; i++) { 1368 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1369 if (max < tmp) { max = tmp; } 1370 } 1371 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1372 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1373 } 1374 1375 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1376 lrow = row - brstart; 1377 1378 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1379 if (!v) {pvA = 0; pvB = 0;} 1380 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1381 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1382 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1383 nztot = nzA + nzB; 1384 1385 cmap = mat->garray; 1386 if (v || idx) { 1387 if (nztot) { 1388 /* Sort by increasing column numbers, assuming A and B already sorted */ 1389 int imark = -1; 1390 if (v) { 1391 *v = v_p = mat->rowvalues; 1392 for (i=0; i<nzB; i++) { 1393 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1394 else break; 1395 } 1396 imark = i; 1397 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1398 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1399 } 1400 if (idx) { 1401 *idx = idx_p = mat->rowindices; 1402 if (imark > -1) { 1403 for (i=0; i<imark; i++) { 1404 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1405 } 1406 } else { 1407 for (i=0; i<nzB; i++) { 1408 if (cmap[cworkB[i]/bs] < cstart) 1409 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1410 else break; 1411 } 1412 imark = i; 1413 } 1414 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1415 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1416 } 1417 } else { 1418 if (idx) *idx = 0; 1419 if (v) *v = 0; 1420 } 1421 } 1422 *nz = nztot; 1423 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1424 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1430 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1431 { 1432 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1433 1434 PetscFunctionBegin; 1435 if (baij->getrowactive == PETSC_FALSE) { 1436 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1437 } 1438 baij->getrowactive = PETSC_FALSE; 1439 PetscFunctionReturn(0); 1440 } 1441 1442 #undef __FUNCT__ 1443 #define __FUNCT__ "MatGetBlockSize_MPIBAIJ" 1444 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1445 { 1446 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1447 1448 PetscFunctionBegin; 1449 *bs = baij->bs; 1450 PetscFunctionReturn(0); 1451 } 1452 1453 #undef __FUNCT__ 1454 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1455 int MatZeroEntries_MPIBAIJ(Mat A) 1456 { 1457 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1458 int ierr; 1459 1460 PetscFunctionBegin; 1461 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1462 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1463 PetscFunctionReturn(0); 1464 } 1465 1466 #undef __FUNCT__ 1467 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1468 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1469 { 1470 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1471 Mat A = a->A,B = a->B; 1472 int ierr; 1473 PetscReal isend[5],irecv[5]; 1474 1475 PetscFunctionBegin; 1476 info->block_size = (PetscReal)a->bs; 1477 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1478 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1479 isend[3] = info->memory; isend[4] = info->mallocs; 1480 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1481 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1482 isend[3] += info->memory; isend[4] += info->mallocs; 1483 if (flag == MAT_LOCAL) { 1484 info->nz_used = isend[0]; 1485 info->nz_allocated = isend[1]; 1486 info->nz_unneeded = isend[2]; 1487 info->memory = isend[3]; 1488 info->mallocs = isend[4]; 1489 } else if (flag == MAT_GLOBAL_MAX) { 1490 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1491 info->nz_used = irecv[0]; 1492 info->nz_allocated = irecv[1]; 1493 info->nz_unneeded = irecv[2]; 1494 info->memory = irecv[3]; 1495 info->mallocs = irecv[4]; 1496 } else if (flag == MAT_GLOBAL_SUM) { 1497 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1498 info->nz_used = irecv[0]; 1499 info->nz_allocated = irecv[1]; 1500 info->nz_unneeded = irecv[2]; 1501 info->memory = irecv[3]; 1502 info->mallocs = irecv[4]; 1503 } else { 1504 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1505 } 1506 info->rows_global = (PetscReal)A->M; 1507 info->columns_global = (PetscReal)A->N; 1508 info->rows_local = (PetscReal)A->m; 1509 info->columns_local = (PetscReal)A->N; 1510 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1511 info->fill_ratio_needed = 0; 1512 info->factor_mallocs = 0; 1513 PetscFunctionReturn(0); 1514 } 1515 1516 #undef __FUNCT__ 1517 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1518 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1519 { 1520 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1521 int ierr; 1522 1523 PetscFunctionBegin; 1524 switch (op) { 1525 case MAT_NO_NEW_NONZERO_LOCATIONS: 1526 case MAT_YES_NEW_NONZERO_LOCATIONS: 1527 case MAT_COLUMNS_UNSORTED: 1528 case MAT_COLUMNS_SORTED: 1529 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1530 case MAT_KEEP_ZEROED_ROWS: 1531 case MAT_NEW_NONZERO_LOCATION_ERR: 1532 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1533 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1534 break; 1535 case MAT_ROW_ORIENTED: 1536 a->roworiented = PETSC_TRUE; 1537 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1538 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1539 break; 1540 case MAT_ROWS_SORTED: 1541 case MAT_ROWS_UNSORTED: 1542 case MAT_YES_NEW_DIAGONALS: 1543 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1544 break; 1545 case MAT_COLUMN_ORIENTED: 1546 a->roworiented = PETSC_FALSE; 1547 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1548 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1549 break; 1550 case MAT_IGNORE_OFF_PROC_ENTRIES: 1551 a->donotstash = PETSC_TRUE; 1552 break; 1553 case MAT_NO_NEW_DIAGONALS: 1554 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1555 case MAT_USE_HASH_TABLE: 1556 a->ht_flag = PETSC_TRUE; 1557 break; 1558 case MAT_SYMMETRIC: 1559 case MAT_STRUCTURALLY_SYMMETRIC: 1560 case MAT_HERMITIAN: 1561 case MAT_SYMMETRY_ETERNAL: 1562 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1563 break; 1564 case MAT_NOT_SYMMETRIC: 1565 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1566 case MAT_NOT_HERMITIAN: 1567 case MAT_NOT_SYMMETRY_ETERNAL: 1568 break; 1569 default: 1570 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1571 } 1572 PetscFunctionReturn(0); 1573 } 1574 1575 #undef __FUNCT__ 1576 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1577 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1578 { 1579 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1580 Mat_SeqBAIJ *Aloc; 1581 Mat B; 1582 int ierr,M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col; 1583 int bs=baij->bs,mbs=baij->mbs; 1584 MatScalar *a; 1585 1586 PetscFunctionBegin; 1587 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1588 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1589 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1590 ierr = MatMPIBAIJSetPreallocation(B,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1591 1592 /* copy over the A part */ 1593 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1594 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1595 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 1596 1597 for (i=0; i<mbs; i++) { 1598 rvals[0] = bs*(baij->rstart + i); 1599 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1600 for (j=ai[i]; j<ai[i+1]; j++) { 1601 col = (baij->cstart+aj[j])*bs; 1602 for (k=0; k<bs; k++) { 1603 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1604 col++; a += bs; 1605 } 1606 } 1607 } 1608 /* copy over the B part */ 1609 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1610 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1611 for (i=0; i<mbs; i++) { 1612 rvals[0] = bs*(baij->rstart + i); 1613 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1614 for (j=ai[i]; j<ai[i+1]; j++) { 1615 col = baij->garray[aj[j]]*bs; 1616 for (k=0; k<bs; k++) { 1617 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1618 col++; a += bs; 1619 } 1620 } 1621 } 1622 ierr = PetscFree(rvals);CHKERRQ(ierr); 1623 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1624 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1625 1626 if (matout) { 1627 *matout = B; 1628 } else { 1629 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1630 } 1631 PetscFunctionReturn(0); 1632 } 1633 1634 #undef __FUNCT__ 1635 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1636 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1637 { 1638 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1639 Mat a = baij->A,b = baij->B; 1640 int ierr,s1,s2,s3; 1641 1642 PetscFunctionBegin; 1643 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1644 if (rr) { 1645 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1646 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1647 /* Overlap communication with computation. */ 1648 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1649 } 1650 if (ll) { 1651 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1652 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1653 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1654 } 1655 /* scale the diagonal block */ 1656 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1657 1658 if (rr) { 1659 /* Do a scatter end and then right scale the off-diagonal block */ 1660 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1661 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1662 } 1663 1664 PetscFunctionReturn(0); 1665 } 1666 1667 #undef __FUNCT__ 1668 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1669 int MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag) 1670 { 1671 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1672 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1673 int *nprocs,j,idx,nsends,row; 1674 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1675 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1676 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1677 MPI_Comm comm = A->comm; 1678 MPI_Request *send_waits,*recv_waits; 1679 MPI_Status recv_status,*send_status; 1680 IS istmp; 1681 PetscTruth found; 1682 1683 PetscFunctionBegin; 1684 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 1685 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1686 1687 /* first count number of contributors to each processor */ 1688 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 1689 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1690 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 1691 for (i=0; i<N; i++) { 1692 idx = rows[i]; 1693 found = PETSC_FALSE; 1694 for (j=0; j<size; j++) { 1695 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1696 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 1697 } 1698 } 1699 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1700 } 1701 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1702 1703 /* inform other processors of number of messages and max length*/ 1704 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1705 1706 /* post receives: */ 1707 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 1708 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1709 for (i=0; i<nrecvs; i++) { 1710 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1711 } 1712 1713 /* do sends: 1714 1) starts[i] gives the starting index in svalues for stuff going to 1715 the ith processor 1716 */ 1717 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 1718 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1719 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 1720 starts[0] = 0; 1721 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1722 for (i=0; i<N; i++) { 1723 svalues[starts[owner[i]]++] = rows[i]; 1724 } 1725 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1726 1727 starts[0] = 0; 1728 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1729 count = 0; 1730 for (i=0; i<size; i++) { 1731 if (nprocs[2*i+1]) { 1732 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1733 } 1734 } 1735 ierr = PetscFree(starts);CHKERRQ(ierr); 1736 1737 base = owners[rank]*bs; 1738 1739 /* wait on receives */ 1740 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 1741 source = lens + nrecvs; 1742 count = nrecvs; slen = 0; 1743 while (count) { 1744 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1745 /* unpack receives into our local space */ 1746 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1747 source[imdex] = recv_status.MPI_SOURCE; 1748 lens[imdex] = n; 1749 slen += n; 1750 count--; 1751 } 1752 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1753 1754 /* move the data into the send scatter */ 1755 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 1756 count = 0; 1757 for (i=0; i<nrecvs; i++) { 1758 values = rvalues + i*nmax; 1759 for (j=0; j<lens[i]; j++) { 1760 lrows[count++] = values[j] - base; 1761 } 1762 } 1763 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1764 ierr = PetscFree(lens);CHKERRQ(ierr); 1765 ierr = PetscFree(owner);CHKERRQ(ierr); 1766 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1767 1768 /* actually zap the local rows */ 1769 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1770 PetscLogObjectParent(A,istmp); 1771 1772 /* 1773 Zero the required rows. If the "diagonal block" of the matrix 1774 is square and the user wishes to set the diagonal we use seperate 1775 code so that MatSetValues() is not called for each diagonal allocating 1776 new memory, thus calling lots of mallocs and slowing things down. 1777 1778 Contributed by: Mathew Knepley 1779 */ 1780 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1781 ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1782 if (diag && (l->A->M == l->A->N)) { 1783 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1784 } else if (diag) { 1785 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1786 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1787 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1788 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1789 } 1790 for (i=0; i<slen; i++) { 1791 row = lrows[i] + rstart_bs; 1792 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1793 } 1794 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1795 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1796 } else { 1797 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1798 } 1799 1800 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1801 ierr = PetscFree(lrows);CHKERRQ(ierr); 1802 1803 /* wait on sends */ 1804 if (nsends) { 1805 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1806 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1807 ierr = PetscFree(send_status);CHKERRQ(ierr); 1808 } 1809 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1810 ierr = PetscFree(svalues);CHKERRQ(ierr); 1811 1812 PetscFunctionReturn(0); 1813 } 1814 1815 #undef __FUNCT__ 1816 #define __FUNCT__ "MatPrintHelp_MPIBAIJ" 1817 int MatPrintHelp_MPIBAIJ(Mat A) 1818 { 1819 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1820 MPI_Comm comm = A->comm; 1821 static int called = 0; 1822 int ierr; 1823 1824 PetscFunctionBegin; 1825 if (!a->rank) { 1826 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1827 } 1828 if (called) {PetscFunctionReturn(0);} else called = 1; 1829 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1830 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1831 PetscFunctionReturn(0); 1832 } 1833 1834 #undef __FUNCT__ 1835 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1836 int MatSetUnfactored_MPIBAIJ(Mat A) 1837 { 1838 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1839 int ierr; 1840 1841 PetscFunctionBegin; 1842 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1843 PetscFunctionReturn(0); 1844 } 1845 1846 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1847 1848 #undef __FUNCT__ 1849 #define __FUNCT__ "MatEqual_MPIBAIJ" 1850 int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1851 { 1852 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1853 Mat a,b,c,d; 1854 PetscTruth flg; 1855 int ierr; 1856 1857 PetscFunctionBegin; 1858 a = matA->A; b = matA->B; 1859 c = matB->A; d = matB->B; 1860 1861 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1862 if (flg == PETSC_TRUE) { 1863 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1864 } 1865 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1866 PetscFunctionReturn(0); 1867 } 1868 1869 1870 #undef __FUNCT__ 1871 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1872 int MatSetUpPreallocation_MPIBAIJ(Mat A) 1873 { 1874 int ierr; 1875 1876 PetscFunctionBegin; 1877 ierr = MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1878 PetscFunctionReturn(0); 1879 } 1880 1881 /* -------------------------------------------------------------------*/ 1882 static struct _MatOps MatOps_Values = { 1883 MatSetValues_MPIBAIJ, 1884 MatGetRow_MPIBAIJ, 1885 MatRestoreRow_MPIBAIJ, 1886 MatMult_MPIBAIJ, 1887 /* 4*/ MatMultAdd_MPIBAIJ, 1888 MatMultTranspose_MPIBAIJ, 1889 MatMultTransposeAdd_MPIBAIJ, 1890 0, 1891 0, 1892 0, 1893 /*10*/ 0, 1894 0, 1895 0, 1896 0, 1897 MatTranspose_MPIBAIJ, 1898 /*15*/ MatGetInfo_MPIBAIJ, 1899 MatEqual_MPIBAIJ, 1900 MatGetDiagonal_MPIBAIJ, 1901 MatDiagonalScale_MPIBAIJ, 1902 MatNorm_MPIBAIJ, 1903 /*20*/ MatAssemblyBegin_MPIBAIJ, 1904 MatAssemblyEnd_MPIBAIJ, 1905 0, 1906 MatSetOption_MPIBAIJ, 1907 MatZeroEntries_MPIBAIJ, 1908 /*25*/ MatZeroRows_MPIBAIJ, 1909 0, 1910 0, 1911 0, 1912 0, 1913 /*30*/ MatSetUpPreallocation_MPIBAIJ, 1914 0, 1915 0, 1916 0, 1917 0, 1918 /*35*/ MatDuplicate_MPIBAIJ, 1919 0, 1920 0, 1921 0, 1922 0, 1923 /*40*/ 0, 1924 MatGetSubMatrices_MPIBAIJ, 1925 MatIncreaseOverlap_MPIBAIJ, 1926 MatGetValues_MPIBAIJ, 1927 0, 1928 /*45*/ MatPrintHelp_MPIBAIJ, 1929 MatScale_MPIBAIJ, 1930 0, 1931 0, 1932 0, 1933 /*50*/ MatGetBlockSize_MPIBAIJ, 1934 0, 1935 0, 1936 0, 1937 0, 1938 /*55*/ 0, 1939 0, 1940 MatSetUnfactored_MPIBAIJ, 1941 0, 1942 MatSetValuesBlocked_MPIBAIJ, 1943 /*60*/ 0, 1944 MatDestroy_MPIBAIJ, 1945 MatView_MPIBAIJ, 1946 MatGetPetscMaps_Petsc, 1947 0, 1948 /*65*/ 0, 1949 0, 1950 0, 1951 0, 1952 0, 1953 /*70*/ MatGetRowMax_MPIBAIJ, 1954 0, 1955 0, 1956 0, 1957 0, 1958 /*75*/ 0, 1959 0, 1960 0, 1961 0, 1962 0, 1963 /*80*/ 0, 1964 0, 1965 0, 1966 0, 1967 /*85*/ MatLoad_MPIBAIJ 1968 }; 1969 1970 1971 EXTERN_C_BEGIN 1972 #undef __FUNCT__ 1973 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 1974 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1975 { 1976 PetscFunctionBegin; 1977 *a = ((Mat_MPIBAIJ *)A->data)->A; 1978 *iscopy = PETSC_FALSE; 1979 PetscFunctionReturn(0); 1980 } 1981 EXTERN_C_END 1982 1983 EXTERN_C_BEGIN 1984 extern int MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,Mat*); 1985 EXTERN_C_END 1986 1987 EXTERN_C_BEGIN 1988 #undef __FUNCT__ 1989 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 1990 int MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1991 { 1992 Mat_MPIBAIJ *b; 1993 int ierr,i; 1994 1995 PetscFunctionBegin; 1996 B->preallocated = PETSC_TRUE; 1997 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1998 1999 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2000 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2001 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2002 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2003 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2004 if (d_nnz) { 2005 for (i=0; i<B->m/bs; i++) { 2006 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 2007 } 2008 } 2009 if (o_nnz) { 2010 for (i=0; i<B->m/bs; i++) { 2011 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 2012 } 2013 } 2014 2015 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 2016 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 2017 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 2018 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 2019 2020 b = (Mat_MPIBAIJ*)B->data; 2021 b->bs = bs; 2022 b->bs2 = bs*bs; 2023 b->mbs = B->m/bs; 2024 b->nbs = B->n/bs; 2025 b->Mbs = B->M/bs; 2026 b->Nbs = B->N/bs; 2027 2028 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 2029 b->rowners[0] = 0; 2030 for (i=2; i<=b->size; i++) { 2031 b->rowners[i] += b->rowners[i-1]; 2032 } 2033 b->rstart = b->rowners[b->rank]; 2034 b->rend = b->rowners[b->rank+1]; 2035 2036 ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 2037 b->cowners[0] = 0; 2038 for (i=2; i<=b->size; i++) { 2039 b->cowners[i] += b->cowners[i-1]; 2040 } 2041 b->cstart = b->cowners[b->rank]; 2042 b->cend = b->cowners[b->rank+1]; 2043 2044 for (i=0; i<=b->size; i++) { 2045 b->rowners_bs[i] = b->rowners[i]*bs; 2046 } 2047 b->rstart_bs = b->rstart*bs; 2048 b->rend_bs = b->rend*bs; 2049 b->cstart_bs = b->cstart*bs; 2050 b->cend_bs = b->cend*bs; 2051 2052 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 2053 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2054 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2055 PetscLogObjectParent(B,b->A); 2056 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 2057 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2058 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2059 PetscLogObjectParent(B,b->B); 2060 2061 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2062 2063 PetscFunctionReturn(0); 2064 } 2065 EXTERN_C_END 2066 2067 EXTERN_C_BEGIN 2068 EXTERN int MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2069 EXTERN int MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2070 EXTERN_C_END 2071 2072 /*MC 2073 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2074 2075 Options Database Keys: 2076 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2077 2078 Level: beginner 2079 2080 .seealso: MatCreateMPIBAIJ 2081 M*/ 2082 2083 EXTERN_C_BEGIN 2084 #undef __FUNCT__ 2085 #define __FUNCT__ "MatCreate_MPIBAIJ" 2086 int MatCreate_MPIBAIJ(Mat B) 2087 { 2088 Mat_MPIBAIJ *b; 2089 int ierr; 2090 PetscTruth flg; 2091 2092 PetscFunctionBegin; 2093 ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2094 B->data = (void*)b; 2095 2096 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2097 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2098 B->mapping = 0; 2099 B->factor = 0; 2100 B->assembled = PETSC_FALSE; 2101 2102 B->insertmode = NOT_SET_VALUES; 2103 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2104 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2105 2106 /* build local table of row and column ownerships */ 2107 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 2108 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2109 b->cowners = b->rowners + b->size + 2; 2110 b->rowners_bs = b->cowners + b->size + 2; 2111 2112 /* build cache for off array entries formed */ 2113 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2114 b->donotstash = PETSC_FALSE; 2115 b->colmap = PETSC_NULL; 2116 b->garray = PETSC_NULL; 2117 b->roworiented = PETSC_TRUE; 2118 2119 #if defined(PETSC_USE_MAT_SINGLE) 2120 /* stuff for MatSetValues_XXX in single precision */ 2121 b->setvalueslen = 0; 2122 b->setvaluescopy = PETSC_NULL; 2123 #endif 2124 2125 /* stuff used in block assembly */ 2126 b->barray = 0; 2127 2128 /* stuff used for matrix vector multiply */ 2129 b->lvec = 0; 2130 b->Mvctx = 0; 2131 2132 /* stuff for MatGetRow() */ 2133 b->rowindices = 0; 2134 b->rowvalues = 0; 2135 b->getrowactive = PETSC_FALSE; 2136 2137 /* hash table stuff */ 2138 b->ht = 0; 2139 b->hd = 0; 2140 b->ht_size = 0; 2141 b->ht_flag = PETSC_FALSE; 2142 b->ht_fact = 0; 2143 b->ht_total_ct = 0; 2144 b->ht_insert_ct = 0; 2145 2146 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2147 if (flg) { 2148 PetscReal fact = 1.39; 2149 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2150 ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2151 if (fact <= 1.0) fact = 1.39; 2152 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2153 PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2154 } 2155 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2156 "MatStoreValues_MPIBAIJ", 2157 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2158 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2159 "MatRetrieveValues_MPIBAIJ", 2160 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2161 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2162 "MatGetDiagonalBlock_MPIBAIJ", 2163 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2164 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2165 "MatMPIBAIJSetPreallocation_MPIBAIJ", 2166 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2167 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 2168 "MatDiagonalScaleLocal_MPIBAIJ", 2169 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2170 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 2171 "MatSetHashTableFactor_MPIBAIJ", 2172 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2173 PetscFunctionReturn(0); 2174 } 2175 EXTERN_C_END 2176 2177 /*MC 2178 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2179 2180 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2181 and MATMPIBAIJ otherwise. 2182 2183 Options Database Keys: 2184 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2185 2186 Level: beginner 2187 2188 .seealso: MatCreateMPIBAIJ,MATSEQBAIJ,MATMPIBAIJ 2189 M*/ 2190 2191 EXTERN_C_BEGIN 2192 #undef __FUNCT__ 2193 #define __FUNCT__ "MatCreate_BAIJ" 2194 int MatCreate_BAIJ(Mat A) { 2195 int ierr,size; 2196 2197 PetscFunctionBegin; 2198 ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr); 2199 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2200 if (size == 1) { 2201 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2202 } else { 2203 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2204 } 2205 PetscFunctionReturn(0); 2206 } 2207 EXTERN_C_END 2208 2209 #undef __FUNCT__ 2210 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2211 /*@C 2212 MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format 2213 (block compressed row). For good matrix assembly performance 2214 the user should preallocate the matrix storage by setting the parameters 2215 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2216 performance can be increased by more than a factor of 50. 2217 2218 Collective on Mat 2219 2220 Input Parameters: 2221 + A - the matrix 2222 . bs - size of blockk 2223 . d_nz - number of block nonzeros per block row in diagonal portion of local 2224 submatrix (same for all local rows) 2225 . d_nnz - array containing the number of block nonzeros in the various block rows 2226 of the in diagonal portion of the local (possibly different for each block 2227 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2228 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2229 submatrix (same for all local rows). 2230 - o_nnz - array containing the number of nonzeros in the various block rows of the 2231 off-diagonal portion of the local submatrix (possibly different for 2232 each block row) or PETSC_NULL. 2233 2234 Output Parameter: 2235 2236 2237 Options Database Keys: 2238 . -mat_no_unroll - uses code that does not unroll the loops in the 2239 block calculations (much slower) 2240 . -mat_block_size - size of the blocks to use 2241 2242 Notes: 2243 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2244 than it must be used on all processors that share the object for that argument. 2245 2246 Storage Information: 2247 For a square global matrix we define each processor's diagonal portion 2248 to be its local rows and the corresponding columns (a square submatrix); 2249 each processor's off-diagonal portion encompasses the remainder of the 2250 local matrix (a rectangular submatrix). 2251 2252 The user can specify preallocated storage for the diagonal part of 2253 the local submatrix with either d_nz or d_nnz (not both). Set 2254 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2255 memory allocation. Likewise, specify preallocated storage for the 2256 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2257 2258 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2259 the figure below we depict these three local rows and all columns (0-11). 2260 2261 .vb 2262 0 1 2 3 4 5 6 7 8 9 10 11 2263 ------------------- 2264 row 3 | o o o d d d o o o o o o 2265 row 4 | o o o d d d o o o o o o 2266 row 5 | o o o d d d o o o o o o 2267 ------------------- 2268 .ve 2269 2270 Thus, any entries in the d locations are stored in the d (diagonal) 2271 submatrix, and any entries in the o locations are stored in the 2272 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2273 stored simply in the MATSEQBAIJ format for compressed row storage. 2274 2275 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2276 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2277 In general, for PDE problems in which most nonzeros are near the diagonal, 2278 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2279 or you will get TERRIBLE performance; see the users' manual chapter on 2280 matrices. 2281 2282 Level: intermediate 2283 2284 .keywords: matrix, block, aij, compressed row, sparse, parallel 2285 2286 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2287 @*/ 2288 int MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2289 { 2290 int ierr,(*f)(Mat,int,int,const int[],int,const int[]); 2291 2292 PetscFunctionBegin; 2293 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2294 if (f) { 2295 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2296 } 2297 PetscFunctionReturn(0); 2298 } 2299 2300 #undef __FUNCT__ 2301 #define __FUNCT__ "MatCreateMPIBAIJ" 2302 /*@C 2303 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2304 (block compressed row). For good matrix assembly performance 2305 the user should preallocate the matrix storage by setting the parameters 2306 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2307 performance can be increased by more than a factor of 50. 2308 2309 Collective on MPI_Comm 2310 2311 Input Parameters: 2312 + comm - MPI communicator 2313 . bs - size of blockk 2314 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2315 This value should be the same as the local size used in creating the 2316 y vector for the matrix-vector product y = Ax. 2317 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2318 This value should be the same as the local size used in creating the 2319 x vector for the matrix-vector product y = Ax. 2320 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2321 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2322 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2323 submatrix (same for all local rows) 2324 . d_nnz - array containing the number of nonzero blocks in the various block rows 2325 of the in diagonal portion of the local (possibly different for each block 2326 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2327 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2328 submatrix (same for all local rows). 2329 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2330 off-diagonal portion of the local submatrix (possibly different for 2331 each block row) or PETSC_NULL. 2332 2333 Output Parameter: 2334 . A - the matrix 2335 2336 Options Database Keys: 2337 . -mat_no_unroll - uses code that does not unroll the loops in the 2338 block calculations (much slower) 2339 . -mat_block_size - size of the blocks to use 2340 2341 Notes: 2342 A nonzero block is any block that as 1 or more nonzeros in it 2343 2344 The user MUST specify either the local or global matrix dimensions 2345 (possibly both). 2346 2347 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2348 than it must be used on all processors that share the object for that argument. 2349 2350 Storage Information: 2351 For a square global matrix we define each processor's diagonal portion 2352 to be its local rows and the corresponding columns (a square submatrix); 2353 each processor's off-diagonal portion encompasses the remainder of the 2354 local matrix (a rectangular submatrix). 2355 2356 The user can specify preallocated storage for the diagonal part of 2357 the local submatrix with either d_nz or d_nnz (not both). Set 2358 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2359 memory allocation. Likewise, specify preallocated storage for the 2360 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2361 2362 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2363 the figure below we depict these three local rows and all columns (0-11). 2364 2365 .vb 2366 0 1 2 3 4 5 6 7 8 9 10 11 2367 ------------------- 2368 row 3 | o o o d d d o o o o o o 2369 row 4 | o o o d d d o o o o o o 2370 row 5 | o o o d d d o o o o o o 2371 ------------------- 2372 .ve 2373 2374 Thus, any entries in the d locations are stored in the d (diagonal) 2375 submatrix, and any entries in the o locations are stored in the 2376 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2377 stored simply in the MATSEQBAIJ format for compressed row storage. 2378 2379 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2380 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2381 In general, for PDE problems in which most nonzeros are near the diagonal, 2382 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2383 or you will get TERRIBLE performance; see the users' manual chapter on 2384 matrices. 2385 2386 Level: intermediate 2387 2388 .keywords: matrix, block, aij, compressed row, sparse, parallel 2389 2390 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2391 @*/ 2392 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2393 { 2394 int ierr,size; 2395 2396 PetscFunctionBegin; 2397 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2398 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2399 if (size > 1) { 2400 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2401 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2402 } else { 2403 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2404 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2405 } 2406 PetscFunctionReturn(0); 2407 } 2408 2409 #undef __FUNCT__ 2410 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 2411 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2412 { 2413 Mat mat; 2414 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2415 int ierr,len=0; 2416 2417 PetscFunctionBegin; 2418 *newmat = 0; 2419 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2420 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2421 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2422 2423 mat->factor = matin->factor; 2424 mat->preallocated = PETSC_TRUE; 2425 mat->assembled = PETSC_TRUE; 2426 mat->insertmode = NOT_SET_VALUES; 2427 2428 a = (Mat_MPIBAIJ*)mat->data; 2429 a->bs = oldmat->bs; 2430 a->bs2 = oldmat->bs2; 2431 a->mbs = oldmat->mbs; 2432 a->nbs = oldmat->nbs; 2433 a->Mbs = oldmat->Mbs; 2434 a->Nbs = oldmat->Nbs; 2435 2436 a->rstart = oldmat->rstart; 2437 a->rend = oldmat->rend; 2438 a->cstart = oldmat->cstart; 2439 a->cend = oldmat->cend; 2440 a->size = oldmat->size; 2441 a->rank = oldmat->rank; 2442 a->donotstash = oldmat->donotstash; 2443 a->roworiented = oldmat->roworiented; 2444 a->rowindices = 0; 2445 a->rowvalues = 0; 2446 a->getrowactive = PETSC_FALSE; 2447 a->barray = 0; 2448 a->rstart_bs = oldmat->rstart_bs; 2449 a->rend_bs = oldmat->rend_bs; 2450 a->cstart_bs = oldmat->cstart_bs; 2451 a->cend_bs = oldmat->cend_bs; 2452 2453 /* hash table stuff */ 2454 a->ht = 0; 2455 a->hd = 0; 2456 a->ht_size = 0; 2457 a->ht_flag = oldmat->ht_flag; 2458 a->ht_fact = oldmat->ht_fact; 2459 a->ht_total_ct = 0; 2460 a->ht_insert_ct = 0; 2461 2462 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2463 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2464 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2465 if (oldmat->colmap) { 2466 #if defined (PETSC_USE_CTABLE) 2467 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2468 #else 2469 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2470 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2471 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2472 #endif 2473 } else a->colmap = 0; 2474 2475 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2476 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2477 PetscLogObjectMemory(mat,len*sizeof(int)); 2478 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2479 } else a->garray = 0; 2480 2481 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2482 PetscLogObjectParent(mat,a->lvec); 2483 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2484 PetscLogObjectParent(mat,a->Mvctx); 2485 2486 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2487 PetscLogObjectParent(mat,a->A); 2488 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2489 PetscLogObjectParent(mat,a->B); 2490 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2491 *newmat = mat; 2492 2493 PetscFunctionReturn(0); 2494 } 2495 2496 #include "petscsys.h" 2497 2498 #undef __FUNCT__ 2499 #define __FUNCT__ "MatLoad_MPIBAIJ" 2500 int MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 2501 { 2502 Mat A; 2503 int i,nz,ierr,j,rstart,rend,fd; 2504 PetscScalar *vals,*buf; 2505 MPI_Comm comm = ((PetscObject)viewer)->comm; 2506 MPI_Status status; 2507 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2508 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2509 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2510 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2511 int dcount,kmax,k,nzcount,tmp; 2512 2513 PetscFunctionBegin; 2514 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2515 2516 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2517 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2518 if (!rank) { 2519 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2520 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2521 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2522 if (header[3] < 0) { 2523 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ"); 2524 } 2525 } 2526 2527 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2528 M = header[1]; N = header[2]; 2529 2530 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2531 2532 /* 2533 This code adds extra rows to make sure the number of rows is 2534 divisible by the blocksize 2535 */ 2536 Mbs = M/bs; 2537 extra_rows = bs - M + bs*(Mbs); 2538 if (extra_rows == bs) extra_rows = 0; 2539 else Mbs++; 2540 if (extra_rows &&!rank) { 2541 PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2542 } 2543 2544 /* determine ownership of all rows */ 2545 mbs = Mbs/size + ((Mbs % size) > rank); 2546 m = mbs*bs; 2547 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2548 browners = rowners + size + 1; 2549 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2550 rowners[0] = 0; 2551 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2552 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2553 rstart = rowners[rank]; 2554 rend = rowners[rank+1]; 2555 2556 /* distribute row lengths to all processors */ 2557 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2558 if (!rank) { 2559 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2560 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2561 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2562 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2563 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2564 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2565 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2566 } else { 2567 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2568 } 2569 2570 if (!rank) { 2571 /* calculate the number of nonzeros on each processor */ 2572 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2573 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2574 for (i=0; i<size; i++) { 2575 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2576 procsnz[i] += rowlengths[j]; 2577 } 2578 } 2579 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2580 2581 /* determine max buffer needed and allocate it */ 2582 maxnz = 0; 2583 for (i=0; i<size; i++) { 2584 maxnz = PetscMax(maxnz,procsnz[i]); 2585 } 2586 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2587 2588 /* read in my part of the matrix column indices */ 2589 nz = procsnz[0]; 2590 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2591 mycols = ibuf; 2592 if (size == 1) nz -= extra_rows; 2593 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2594 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2595 2596 /* read in every ones (except the last) and ship off */ 2597 for (i=1; i<size-1; i++) { 2598 nz = procsnz[i]; 2599 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2600 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2601 } 2602 /* read in the stuff for the last proc */ 2603 if (size != 1) { 2604 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2605 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2606 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2607 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2608 } 2609 ierr = PetscFree(cols);CHKERRQ(ierr); 2610 } else { 2611 /* determine buffer space needed for message */ 2612 nz = 0; 2613 for (i=0; i<m; i++) { 2614 nz += locrowlens[i]; 2615 } 2616 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2617 mycols = ibuf; 2618 /* receive message of column indices*/ 2619 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2620 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2621 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2622 } 2623 2624 /* loop over local rows, determining number of off diagonal entries */ 2625 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2626 odlens = dlens + (rend-rstart); 2627 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2628 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2629 masked1 = mask + Mbs; 2630 masked2 = masked1 + Mbs; 2631 rowcount = 0; nzcount = 0; 2632 for (i=0; i<mbs; i++) { 2633 dcount = 0; 2634 odcount = 0; 2635 for (j=0; j<bs; j++) { 2636 kmax = locrowlens[rowcount]; 2637 for (k=0; k<kmax; k++) { 2638 tmp = mycols[nzcount++]/bs; 2639 if (!mask[tmp]) { 2640 mask[tmp] = 1; 2641 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2642 else masked1[dcount++] = tmp; 2643 } 2644 } 2645 rowcount++; 2646 } 2647 2648 dlens[i] = dcount; 2649 odlens[i] = odcount; 2650 2651 /* zero out the mask elements we set */ 2652 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2653 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2654 } 2655 2656 /* create our matrix */ 2657 ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr); 2658 ierr = MatSetType(A,type);CHKERRQ(ierr) 2659 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2660 2661 /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */ 2662 MatSetOption(A,MAT_COLUMNS_SORTED); 2663 2664 if (!rank) { 2665 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2666 /* read in my part of the matrix numerical values */ 2667 nz = procsnz[0]; 2668 vals = buf; 2669 mycols = ibuf; 2670 if (size == 1) nz -= extra_rows; 2671 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2672 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2673 2674 /* insert into matrix */ 2675 jj = rstart*bs; 2676 for (i=0; i<m; i++) { 2677 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2678 mycols += locrowlens[i]; 2679 vals += locrowlens[i]; 2680 jj++; 2681 } 2682 /* read in other processors (except the last one) and ship out */ 2683 for (i=1; i<size-1; i++) { 2684 nz = procsnz[i]; 2685 vals = buf; 2686 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2687 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2688 } 2689 /* the last proc */ 2690 if (size != 1){ 2691 nz = procsnz[i] - extra_rows; 2692 vals = buf; 2693 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2694 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2695 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2696 } 2697 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2698 } else { 2699 /* receive numeric values */ 2700 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2701 2702 /* receive message of values*/ 2703 vals = buf; 2704 mycols = ibuf; 2705 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2706 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2707 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2708 2709 /* insert into matrix */ 2710 jj = rstart*bs; 2711 for (i=0; i<m; i++) { 2712 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2713 mycols += locrowlens[i]; 2714 vals += locrowlens[i]; 2715 jj++; 2716 } 2717 } 2718 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2719 ierr = PetscFree(buf);CHKERRQ(ierr); 2720 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2721 ierr = PetscFree(rowners);CHKERRQ(ierr); 2722 ierr = PetscFree(dlens);CHKERRQ(ierr); 2723 ierr = PetscFree(mask);CHKERRQ(ierr); 2724 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2725 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2726 2727 *newmat = A; 2728 PetscFunctionReturn(0); 2729 } 2730 2731 #undef __FUNCT__ 2732 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2733 /*@ 2734 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2735 2736 Input Parameters: 2737 . mat - the matrix 2738 . fact - factor 2739 2740 Collective on Mat 2741 2742 Level: advanced 2743 2744 Notes: 2745 This can also be set by the command line option: -mat_use_hash_table fact 2746 2747 .keywords: matrix, hashtable, factor, HT 2748 2749 .seealso: MatSetOption() 2750 @*/ 2751 int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2752 { 2753 int ierr,(*f)(Mat,PetscReal); 2754 2755 PetscFunctionBegin; 2756 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 2757 if (f) { 2758 ierr = (*f)(mat,fact);CHKERRQ(ierr); 2759 } 2760 PetscFunctionReturn(0); 2761 } 2762 2763 #undef __FUNCT__ 2764 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 2765 int MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 2766 { 2767 Mat_MPIBAIJ *baij; 2768 2769 PetscFunctionBegin; 2770 baij = (Mat_MPIBAIJ*)mat->data; 2771 baij->ht_fact = fact; 2772 PetscFunctionReturn(0); 2773 } 2774 2775 #undef __FUNCT__ 2776 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2777 int MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2778 { 2779 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2780 PetscFunctionBegin; 2781 *Ad = a->A; 2782 *Ao = a->B; 2783 *colmap = a->garray; 2784 PetscFunctionReturn(0); 2785 } 2786