1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 15 16 /* UGLY, ugly, ugly 17 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 18 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 19 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 20 converts the entries into single precision and then calls ..._MatScalar() to put them 21 into the single precision data structures. 22 */ 23 #if defined(PETSC_USE_MAT_SINGLE) 24 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 25 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 26 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 27 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 28 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 29 #else 30 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 31 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 32 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 33 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 34 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 35 #endif 36 37 #undef __FUNCT__ 38 #define __FUNCT__ "MatGetRowMax_MPIBAIJ" 39 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v) 40 { 41 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 42 PetscErrorCode ierr; 43 PetscInt i; 44 PetscScalar *va,*vb; 45 Vec vtmp; 46 47 PetscFunctionBegin; 48 49 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 50 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 51 52 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap.n,&vtmp);CHKERRQ(ierr); 53 ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 54 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 55 56 for (i=0; i<A->rmap.n; i++){ 57 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 58 } 59 60 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 61 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 62 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 63 64 PetscFunctionReturn(0); 65 } 66 67 EXTERN_C_BEGIN 68 #undef __FUNCT__ 69 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 70 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 71 { 72 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 73 PetscErrorCode ierr; 74 75 PetscFunctionBegin; 76 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 77 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 EXTERN_C_END 81 82 EXTERN_C_BEGIN 83 #undef __FUNCT__ 84 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 85 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 86 { 87 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 88 PetscErrorCode ierr; 89 90 PetscFunctionBegin; 91 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 92 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 93 PetscFunctionReturn(0); 94 } 95 EXTERN_C_END 96 97 /* 98 Local utility routine that creates a mapping from the global column 99 number to the local number in the off-diagonal part of the local 100 storage of the matrix. This is done in a non scable way since the 101 length of colmap equals the global matrix length. 102 */ 103 #undef __FUNCT__ 104 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 105 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 106 { 107 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 108 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 109 PetscErrorCode ierr; 110 PetscInt nbs = B->nbs,i,bs=mat->rmap.bs; 111 112 PetscFunctionBegin; 113 #if defined (PETSC_USE_CTABLE) 114 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 115 for (i=0; i<nbs; i++){ 116 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 117 } 118 #else 119 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 120 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 121 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 122 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 123 #endif 124 PetscFunctionReturn(0); 125 } 126 127 #define CHUNKSIZE 10 128 129 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 130 { \ 131 \ 132 brow = row/bs; \ 133 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 134 rmax = aimax[brow]; nrow = ailen[brow]; \ 135 bcol = col/bs; \ 136 ridx = row % bs; cidx = col % bs; \ 137 low = 0; high = nrow; \ 138 while (high-low > 3) { \ 139 t = (low+high)/2; \ 140 if (rp[t] > bcol) high = t; \ 141 else low = t; \ 142 } \ 143 for (_i=low; _i<high; _i++) { \ 144 if (rp[_i] > bcol) break; \ 145 if (rp[_i] == bcol) { \ 146 bap = ap + bs2*_i + bs*cidx + ridx; \ 147 if (addv == ADD_VALUES) *bap += value; \ 148 else *bap = value; \ 149 goto a_noinsert; \ 150 } \ 151 } \ 152 if (a->nonew == 1) goto a_noinsert; \ 153 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 154 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 155 N = nrow++ - 1; \ 156 /* shift up all the later entries in this row */ \ 157 for (ii=N; ii>=_i; ii--) { \ 158 rp[ii+1] = rp[ii]; \ 159 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 160 } \ 161 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 162 rp[_i] = bcol; \ 163 ap[bs2*_i + bs*cidx + ridx] = value; \ 164 a_noinsert:; \ 165 ailen[brow] = nrow; \ 166 } 167 168 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 169 { \ 170 brow = row/bs; \ 171 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 172 rmax = bimax[brow]; nrow = bilen[brow]; \ 173 bcol = col/bs; \ 174 ridx = row % bs; cidx = col % bs; \ 175 low = 0; high = nrow; \ 176 while (high-low > 3) { \ 177 t = (low+high)/2; \ 178 if (rp[t] > bcol) high = t; \ 179 else low = t; \ 180 } \ 181 for (_i=low; _i<high; _i++) { \ 182 if (rp[_i] > bcol) break; \ 183 if (rp[_i] == bcol) { \ 184 bap = ap + bs2*_i + bs*cidx + ridx; \ 185 if (addv == ADD_VALUES) *bap += value; \ 186 else *bap = value; \ 187 goto b_noinsert; \ 188 } \ 189 } \ 190 if (b->nonew == 1) goto b_noinsert; \ 191 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 192 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 193 CHKMEMQ;\ 194 N = nrow++ - 1; \ 195 /* shift up all the later entries in this row */ \ 196 for (ii=N; ii>=_i; ii--) { \ 197 rp[ii+1] = rp[ii]; \ 198 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 199 } \ 200 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 201 rp[_i] = bcol; \ 202 ap[bs2*_i + bs*cidx + ridx] = value; \ 203 b_noinsert:; \ 204 bilen[brow] = nrow; \ 205 } 206 207 #if defined(PETSC_USE_MAT_SINGLE) 208 #undef __FUNCT__ 209 #define __FUNCT__ "MatSetValues_MPIBAIJ" 210 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 211 { 212 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 213 PetscErrorCode ierr; 214 PetscInt i,N = m*n; 215 MatScalar *vsingle; 216 217 PetscFunctionBegin; 218 if (N > b->setvalueslen) { 219 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 220 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 221 b->setvalueslen = N; 222 } 223 vsingle = b->setvaluescopy; 224 225 for (i=0; i<N; i++) { 226 vsingle[i] = v[i]; 227 } 228 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 229 PetscFunctionReturn(0); 230 } 231 232 #undef __FUNCT__ 233 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 234 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 235 { 236 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 237 PetscErrorCode ierr; 238 PetscInt i,N = m*n*b->bs2; 239 MatScalar *vsingle; 240 241 PetscFunctionBegin; 242 if (N > b->setvalueslen) { 243 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 244 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 245 b->setvalueslen = N; 246 } 247 vsingle = b->setvaluescopy; 248 for (i=0; i<N; i++) { 249 vsingle[i] = v[i]; 250 } 251 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 252 PetscFunctionReturn(0); 253 } 254 255 #undef __FUNCT__ 256 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 257 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 258 { 259 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 260 PetscErrorCode ierr; 261 PetscInt i,N = m*n; 262 MatScalar *vsingle; 263 264 PetscFunctionBegin; 265 if (N > b->setvalueslen) { 266 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 267 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 268 b->setvalueslen = N; 269 } 270 vsingle = b->setvaluescopy; 271 for (i=0; i<N; i++) { 272 vsingle[i] = v[i]; 273 } 274 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 280 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 281 { 282 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 283 PetscErrorCode ierr; 284 PetscInt i,N = m*n*b->bs2; 285 MatScalar *vsingle; 286 287 PetscFunctionBegin; 288 if (N > b->setvalueslen) { 289 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 290 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 291 b->setvalueslen = N; 292 } 293 vsingle = b->setvaluescopy; 294 for (i=0; i<N; i++) { 295 vsingle[i] = v[i]; 296 } 297 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 298 PetscFunctionReturn(0); 299 } 300 #endif 301 302 #undef __FUNCT__ 303 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 304 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 305 { 306 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 307 MatScalar value; 308 PetscTruth roworiented = baij->roworiented; 309 PetscErrorCode ierr; 310 PetscInt i,j,row,col; 311 PetscInt rstart_orig=mat->rmap.rstart; 312 PetscInt rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart; 313 PetscInt cend_orig=mat->cmap.rend,bs=mat->rmap.bs; 314 315 /* Some Variables required in the macro */ 316 Mat A = baij->A; 317 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 318 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 319 MatScalar *aa=a->a; 320 321 Mat B = baij->B; 322 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 323 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 324 MatScalar *ba=b->a; 325 326 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 327 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 328 MatScalar *ap,*bap; 329 330 PetscFunctionBegin; 331 for (i=0; i<m; i++) { 332 if (im[i] < 0) continue; 333 #if defined(PETSC_USE_DEBUG) 334 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 335 #endif 336 if (im[i] >= rstart_orig && im[i] < rend_orig) { 337 row = im[i] - rstart_orig; 338 for (j=0; j<n; j++) { 339 if (in[j] >= cstart_orig && in[j] < cend_orig){ 340 col = in[j] - cstart_orig; 341 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 342 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 343 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 344 } else if (in[j] < 0) continue; 345 #if defined(PETSC_USE_DEBUG) 346 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap.N-1);} 347 #endif 348 else { 349 if (mat->was_assembled) { 350 if (!baij->colmap) { 351 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 352 } 353 #if defined (PETSC_USE_CTABLE) 354 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 355 col = col - 1; 356 #else 357 col = baij->colmap[in[j]/bs] - 1; 358 #endif 359 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 360 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 361 col = in[j]; 362 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 363 B = baij->B; 364 b = (Mat_SeqBAIJ*)(B)->data; 365 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 366 ba=b->a; 367 } else col += in[j]%bs; 368 } else col = in[j]; 369 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 370 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 371 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 372 } 373 } 374 } else { 375 if (!baij->donotstash) { 376 if (roworiented) { 377 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 378 } else { 379 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 380 } 381 } 382 } 383 } 384 PetscFunctionReturn(0); 385 } 386 387 #undef __FUNCT__ 388 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 389 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 390 { 391 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 392 const MatScalar *value; 393 MatScalar *barray=baij->barray; 394 PetscTruth roworiented = baij->roworiented; 395 PetscErrorCode ierr; 396 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 397 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 398 PetscInt cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2; 399 400 PetscFunctionBegin; 401 if(!barray) { 402 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 403 baij->barray = barray; 404 } 405 406 if (roworiented) { 407 stepval = (n-1)*bs; 408 } else { 409 stepval = (m-1)*bs; 410 } 411 for (i=0; i<m; i++) { 412 if (im[i] < 0) continue; 413 #if defined(PETSC_USE_DEBUG) 414 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 415 #endif 416 if (im[i] >= rstart && im[i] < rend) { 417 row = im[i] - rstart; 418 for (j=0; j<n; j++) { 419 /* If NumCol = 1 then a copy is not required */ 420 if ((roworiented) && (n == 1)) { 421 barray = (MatScalar*)v + i*bs2; 422 } else if((!roworiented) && (m == 1)) { 423 barray = (MatScalar*)v + j*bs2; 424 } else { /* Here a copy is required */ 425 if (roworiented) { 426 value = v + i*(stepval+bs)*bs + j*bs; 427 } else { 428 value = v + j*(stepval+bs)*bs + i*bs; 429 } 430 for (ii=0; ii<bs; ii++,value+=stepval) { 431 for (jj=0; jj<bs; jj++) { 432 *barray++ = *value++; 433 } 434 } 435 barray -=bs2; 436 } 437 438 if (in[j] >= cstart && in[j] < cend){ 439 col = in[j] - cstart; 440 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 441 } 442 else if (in[j] < 0) continue; 443 #if defined(PETSC_USE_DEBUG) 444 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 445 #endif 446 else { 447 if (mat->was_assembled) { 448 if (!baij->colmap) { 449 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 450 } 451 452 #if defined(PETSC_USE_DEBUG) 453 #if defined (PETSC_USE_CTABLE) 454 { PetscInt data; 455 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 456 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 457 } 458 #else 459 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 460 #endif 461 #endif 462 #if defined (PETSC_USE_CTABLE) 463 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 464 col = (col - 1)/bs; 465 #else 466 col = (baij->colmap[in[j]] - 1)/bs; 467 #endif 468 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 469 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 470 col = in[j]; 471 } 472 } 473 else col = in[j]; 474 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 475 } 476 } 477 } else { 478 if (!baij->donotstash) { 479 if (roworiented) { 480 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 481 } else { 482 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 483 } 484 } 485 } 486 } 487 PetscFunctionReturn(0); 488 } 489 490 #define HASH_KEY 0.6180339887 491 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 492 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 493 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 494 #undef __FUNCT__ 495 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 496 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 497 { 498 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 499 PetscTruth roworiented = baij->roworiented; 500 PetscErrorCode ierr; 501 PetscInt i,j,row,col; 502 PetscInt rstart_orig=mat->rmap.rstart; 503 PetscInt rend_orig=mat->rmap.rend,Nbs=baij->Nbs; 504 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap.bs,*HT=baij->ht,idx; 505 PetscReal tmp; 506 MatScalar **HD = baij->hd,value; 507 #if defined(PETSC_USE_DEBUG) 508 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 509 #endif 510 511 PetscFunctionBegin; 512 513 for (i=0; i<m; i++) { 514 #if defined(PETSC_USE_DEBUG) 515 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 516 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 517 #endif 518 row = im[i]; 519 if (row >= rstart_orig && row < rend_orig) { 520 for (j=0; j<n; j++) { 521 col = in[j]; 522 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 523 /* Look up PetscInto the Hash Table */ 524 key = (row/bs)*Nbs+(col/bs)+1; 525 h1 = HASH(size,key,tmp); 526 527 528 idx = h1; 529 #if defined(PETSC_USE_DEBUG) 530 insert_ct++; 531 total_ct++; 532 if (HT[idx] != key) { 533 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 534 if (idx == size) { 535 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 536 if (idx == h1) { 537 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 538 } 539 } 540 } 541 #else 542 if (HT[idx] != key) { 543 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 544 if (idx == size) { 545 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 546 if (idx == h1) { 547 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 548 } 549 } 550 } 551 #endif 552 /* A HASH table entry is found, so insert the values at the correct address */ 553 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 554 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 555 } 556 } else { 557 if (!baij->donotstash) { 558 if (roworiented) { 559 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 560 } else { 561 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 562 } 563 } 564 } 565 } 566 #if defined(PETSC_USE_DEBUG) 567 baij->ht_total_ct = total_ct; 568 baij->ht_insert_ct = insert_ct; 569 #endif 570 PetscFunctionReturn(0); 571 } 572 573 #undef __FUNCT__ 574 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 575 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 576 { 577 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 578 PetscTruth roworiented = baij->roworiented; 579 PetscErrorCode ierr; 580 PetscInt i,j,ii,jj,row,col; 581 PetscInt rstart=baij->rstartbs; 582 PetscInt rend=mat->rmap.rend,stepval,bs=mat->rmap.bs,bs2=baij->bs2; 583 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 584 PetscReal tmp; 585 MatScalar **HD = baij->hd,*baij_a; 586 const MatScalar *v_t,*value; 587 #if defined(PETSC_USE_DEBUG) 588 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 589 #endif 590 591 PetscFunctionBegin; 592 593 if (roworiented) { 594 stepval = (n-1)*bs; 595 } else { 596 stepval = (m-1)*bs; 597 } 598 for (i=0; i<m; i++) { 599 #if defined(PETSC_USE_DEBUG) 600 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 601 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 602 #endif 603 row = im[i]; 604 v_t = v + i*bs2; 605 if (row >= rstart && row < rend) { 606 for (j=0; j<n; j++) { 607 col = in[j]; 608 609 /* Look up into the Hash Table */ 610 key = row*Nbs+col+1; 611 h1 = HASH(size,key,tmp); 612 613 idx = h1; 614 #if defined(PETSC_USE_DEBUG) 615 total_ct++; 616 insert_ct++; 617 if (HT[idx] != key) { 618 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 619 if (idx == size) { 620 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 621 if (idx == h1) { 622 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 623 } 624 } 625 } 626 #else 627 if (HT[idx] != key) { 628 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 629 if (idx == size) { 630 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 631 if (idx == h1) { 632 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 633 } 634 } 635 } 636 #endif 637 baij_a = HD[idx]; 638 if (roworiented) { 639 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 640 /* value = v + (i*(stepval+bs)+j)*bs; */ 641 value = v_t; 642 v_t += bs; 643 if (addv == ADD_VALUES) { 644 for (ii=0; ii<bs; ii++,value+=stepval) { 645 for (jj=ii; jj<bs2; jj+=bs) { 646 baij_a[jj] += *value++; 647 } 648 } 649 } else { 650 for (ii=0; ii<bs; ii++,value+=stepval) { 651 for (jj=ii; jj<bs2; jj+=bs) { 652 baij_a[jj] = *value++; 653 } 654 } 655 } 656 } else { 657 value = v + j*(stepval+bs)*bs + i*bs; 658 if (addv == ADD_VALUES) { 659 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 660 for (jj=0; jj<bs; jj++) { 661 baij_a[jj] += *value++; 662 } 663 } 664 } else { 665 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 666 for (jj=0; jj<bs; jj++) { 667 baij_a[jj] = *value++; 668 } 669 } 670 } 671 } 672 } 673 } else { 674 if (!baij->donotstash) { 675 if (roworiented) { 676 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 677 } else { 678 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 679 } 680 } 681 } 682 } 683 #if defined(PETSC_USE_DEBUG) 684 baij->ht_total_ct = total_ct; 685 baij->ht_insert_ct = insert_ct; 686 #endif 687 PetscFunctionReturn(0); 688 } 689 690 #undef __FUNCT__ 691 #define __FUNCT__ "MatGetValues_MPIBAIJ" 692 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 693 { 694 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 695 PetscErrorCode ierr; 696 PetscInt bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend; 697 PetscInt bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data; 698 699 PetscFunctionBegin; 700 for (i=0; i<m; i++) { 701 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 702 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 703 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 704 row = idxm[i] - bsrstart; 705 for (j=0; j<n; j++) { 706 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 707 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 708 if (idxn[j] >= bscstart && idxn[j] < bscend){ 709 col = idxn[j] - bscstart; 710 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 711 } else { 712 if (!baij->colmap) { 713 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 714 } 715 #if defined (PETSC_USE_CTABLE) 716 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 717 data --; 718 #else 719 data = baij->colmap[idxn[j]/bs]-1; 720 #endif 721 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 722 else { 723 col = data + idxn[j]%bs; 724 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 725 } 726 } 727 } 728 } else { 729 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 730 } 731 } 732 PetscFunctionReturn(0); 733 } 734 735 #undef __FUNCT__ 736 #define __FUNCT__ "MatNorm_MPIBAIJ" 737 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 738 { 739 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 740 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 741 PetscErrorCode ierr; 742 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap.bs,nz,row,col; 743 PetscReal sum = 0.0; 744 MatScalar *v; 745 746 PetscFunctionBegin; 747 if (baij->size == 1) { 748 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 749 } else { 750 if (type == NORM_FROBENIUS) { 751 v = amat->a; 752 nz = amat->nz*bs2; 753 for (i=0; i<nz; i++) { 754 #if defined(PETSC_USE_COMPLEX) 755 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 756 #else 757 sum += (*v)*(*v); v++; 758 #endif 759 } 760 v = bmat->a; 761 nz = bmat->nz*bs2; 762 for (i=0; i<nz; i++) { 763 #if defined(PETSC_USE_COMPLEX) 764 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 765 #else 766 sum += (*v)*(*v); v++; 767 #endif 768 } 769 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 770 *nrm = sqrt(*nrm); 771 } else if (type == NORM_1) { /* max column sum */ 772 PetscReal *tmp,*tmp2; 773 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 774 ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 775 tmp2 = tmp + mat->cmap.N; 776 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 777 v = amat->a; jj = amat->j; 778 for (i=0; i<amat->nz; i++) { 779 for (j=0; j<bs; j++){ 780 col = bs*(cstart + *jj) + j; /* column index */ 781 for (row=0; row<bs; row++){ 782 tmp[col] += PetscAbsScalar(*v); v++; 783 } 784 } 785 jj++; 786 } 787 v = bmat->a; jj = bmat->j; 788 for (i=0; i<bmat->nz; i++) { 789 for (j=0; j<bs; j++){ 790 col = bs*garray[*jj] + j; 791 for (row=0; row<bs; row++){ 792 tmp[col] += PetscAbsScalar(*v); v++; 793 } 794 } 795 jj++; 796 } 797 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 798 *nrm = 0.0; 799 for (j=0; j<mat->cmap.N; j++) { 800 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 801 } 802 ierr = PetscFree(tmp);CHKERRQ(ierr); 803 } else if (type == NORM_INFINITY) { /* max row sum */ 804 PetscReal *sums; 805 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr) 806 sum = 0.0; 807 for (j=0; j<amat->mbs; j++) { 808 for (row=0; row<bs; row++) sums[row] = 0.0; 809 v = amat->a + bs2*amat->i[j]; 810 nz = amat->i[j+1]-amat->i[j]; 811 for (i=0; i<nz; i++) { 812 for (col=0; col<bs; col++){ 813 for (row=0; row<bs; row++){ 814 sums[row] += PetscAbsScalar(*v); v++; 815 } 816 } 817 } 818 v = bmat->a + bs2*bmat->i[j]; 819 nz = bmat->i[j+1]-bmat->i[j]; 820 for (i=0; i<nz; i++) { 821 for (col=0; col<bs; col++){ 822 for (row=0; row<bs; row++){ 823 sums[row] += PetscAbsScalar(*v); v++; 824 } 825 } 826 } 827 for (row=0; row<bs; row++){ 828 if (sums[row] > sum) sum = sums[row]; 829 } 830 } 831 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 832 ierr = PetscFree(sums);CHKERRQ(ierr); 833 } else { 834 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 835 } 836 } 837 PetscFunctionReturn(0); 838 } 839 840 /* 841 Creates the hash table, and sets the table 842 This table is created only once. 843 If new entried need to be added to the matrix 844 then the hash table has to be destroyed and 845 recreated. 846 */ 847 #undef __FUNCT__ 848 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 849 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 850 { 851 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 852 Mat A = baij->A,B=baij->B; 853 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 854 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 855 PetscErrorCode ierr; 856 PetscInt size,bs2=baij->bs2,rstart=baij->rstartbs; 857 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 858 PetscInt *HT,key; 859 MatScalar **HD; 860 PetscReal tmp; 861 #if defined(PETSC_USE_INFO) 862 PetscInt ct=0,max=0; 863 #endif 864 865 PetscFunctionBegin; 866 baij->ht_size=(PetscInt)(factor*nz); 867 size = baij->ht_size; 868 869 if (baij->ht) { 870 PetscFunctionReturn(0); 871 } 872 873 /* Allocate Memory for Hash Table */ 874 ierr = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 875 baij->ht = (PetscInt*)(baij->hd + size); 876 HD = baij->hd; 877 HT = baij->ht; 878 879 880 ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr); 881 882 883 /* Loop Over A */ 884 for (i=0; i<a->mbs; i++) { 885 for (j=ai[i]; j<ai[i+1]; j++) { 886 row = i+rstart; 887 col = aj[j]+cstart; 888 889 key = row*Nbs + col + 1; 890 h1 = HASH(size,key,tmp); 891 for (k=0; k<size; k++){ 892 if (!HT[(h1+k)%size]) { 893 HT[(h1+k)%size] = key; 894 HD[(h1+k)%size] = a->a + j*bs2; 895 break; 896 #if defined(PETSC_USE_INFO) 897 } else { 898 ct++; 899 #endif 900 } 901 } 902 #if defined(PETSC_USE_INFO) 903 if (k> max) max = k; 904 #endif 905 } 906 } 907 /* Loop Over B */ 908 for (i=0; i<b->mbs; i++) { 909 for (j=bi[i]; j<bi[i+1]; j++) { 910 row = i+rstart; 911 col = garray[bj[j]]; 912 key = row*Nbs + col + 1; 913 h1 = HASH(size,key,tmp); 914 for (k=0; k<size; k++){ 915 if (!HT[(h1+k)%size]) { 916 HT[(h1+k)%size] = key; 917 HD[(h1+k)%size] = b->a + j*bs2; 918 break; 919 #if defined(PETSC_USE_INFO) 920 } else { 921 ct++; 922 #endif 923 } 924 } 925 #if defined(PETSC_USE_INFO) 926 if (k> max) max = k; 927 #endif 928 } 929 } 930 931 /* Print Summary */ 932 #if defined(PETSC_USE_INFO) 933 for (i=0,j=0; i<size; i++) { 934 if (HT[i]) {j++;} 935 } 936 ierr = PetscInfo2(0,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 937 #endif 938 PetscFunctionReturn(0); 939 } 940 941 #undef __FUNCT__ 942 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 943 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 944 { 945 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 946 PetscErrorCode ierr; 947 PetscInt nstash,reallocs; 948 InsertMode addv; 949 950 PetscFunctionBegin; 951 if (baij->donotstash) { 952 PetscFunctionReturn(0); 953 } 954 955 /* make sure all processors are either in INSERTMODE or ADDMODE */ 956 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 957 if (addv == (ADD_VALUES|INSERT_VALUES)) { 958 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 959 } 960 mat->insertmode = addv; /* in case this processor had no cache */ 961 962 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 963 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr); 964 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 965 ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 966 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 967 ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 968 PetscFunctionReturn(0); 969 } 970 971 #undef __FUNCT__ 972 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 973 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 974 { 975 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 976 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 977 PetscErrorCode ierr; 978 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 979 PetscInt *row,*col,other_disassembled; 980 PetscTruth r1,r2,r3; 981 MatScalar *val; 982 InsertMode addv = mat->insertmode; 983 PetscMPIInt n; 984 985 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 986 PetscFunctionBegin; 987 if (!baij->donotstash) { 988 while (1) { 989 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 990 if (!flg) break; 991 992 for (i=0; i<n;) { 993 /* Now identify the consecutive vals belonging to the same row */ 994 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 995 if (j < n) ncols = j-i; 996 else ncols = n-i; 997 /* Now assemble all these values with a single function call */ 998 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 999 i = j; 1000 } 1001 } 1002 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1003 /* Now process the block-stash. Since the values are stashed column-oriented, 1004 set the roworiented flag to column oriented, and after MatSetValues() 1005 restore the original flags */ 1006 r1 = baij->roworiented; 1007 r2 = a->roworiented; 1008 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 1009 baij->roworiented = PETSC_FALSE; 1010 a->roworiented = PETSC_FALSE; 1011 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 1012 while (1) { 1013 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1014 if (!flg) break; 1015 1016 for (i=0; i<n;) { 1017 /* Now identify the consecutive vals belonging to the same row */ 1018 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1019 if (j < n) ncols = j-i; 1020 else ncols = n-i; 1021 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1022 i = j; 1023 } 1024 } 1025 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1026 baij->roworiented = r1; 1027 a->roworiented = r2; 1028 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 1029 } 1030 1031 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1032 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1033 1034 /* determine if any processor has disassembled, if so we must 1035 also disassemble ourselfs, in order that we may reassemble. */ 1036 /* 1037 if nonzero structure of submatrix B cannot change then we know that 1038 no processor disassembled thus we can skip this stuff 1039 */ 1040 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1041 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1042 if (mat->was_assembled && !other_disassembled) { 1043 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1044 } 1045 } 1046 1047 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1048 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1049 } 1050 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 1051 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1052 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1053 1054 #if defined(PETSC_USE_INFO) 1055 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1056 ierr = PetscInfo1(0,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 1057 baij->ht_total_ct = 0; 1058 baij->ht_insert_ct = 0; 1059 } 1060 #endif 1061 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1062 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1063 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1064 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1065 } 1066 1067 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1068 baij->rowvalues = 0; 1069 PetscFunctionReturn(0); 1070 } 1071 1072 #undef __FUNCT__ 1073 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1074 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1075 { 1076 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1077 PetscErrorCode ierr; 1078 PetscMPIInt size = baij->size,rank = baij->rank; 1079 PetscInt bs = mat->rmap.bs; 1080 PetscTruth iascii,isdraw; 1081 PetscViewer sviewer; 1082 PetscViewerFormat format; 1083 1084 PetscFunctionBegin; 1085 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1086 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1087 if (iascii) { 1088 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1089 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1090 MatInfo info; 1091 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1092 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1093 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 1094 rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 1095 mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr); 1096 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1097 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1098 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1099 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1100 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1101 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1102 PetscFunctionReturn(0); 1103 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1104 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1105 PetscFunctionReturn(0); 1106 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1107 PetscFunctionReturn(0); 1108 } 1109 } 1110 1111 if (isdraw) { 1112 PetscDraw draw; 1113 PetscTruth isnull; 1114 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1115 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1116 } 1117 1118 if (size == 1) { 1119 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 1120 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1121 } else { 1122 /* assemble the entire matrix onto first processor. */ 1123 Mat A; 1124 Mat_SeqBAIJ *Aloc; 1125 PetscInt M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1126 MatScalar *a; 1127 1128 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1129 /* Perhaps this should be the type of mat? */ 1130 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 1131 if (!rank) { 1132 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1133 } else { 1134 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1135 } 1136 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1137 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1138 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1139 1140 /* copy over the A part */ 1141 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1142 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1143 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1144 1145 for (i=0; i<mbs; i++) { 1146 rvals[0] = bs*(baij->rstartbs + i); 1147 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1148 for (j=ai[i]; j<ai[i+1]; j++) { 1149 col = (baij->cstartbs+aj[j])*bs; 1150 for (k=0; k<bs; k++) { 1151 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1152 col++; a += bs; 1153 } 1154 } 1155 } 1156 /* copy over the B part */ 1157 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1158 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1159 for (i=0; i<mbs; i++) { 1160 rvals[0] = bs*(baij->rstartbs + i); 1161 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1162 for (j=ai[i]; j<ai[i+1]; j++) { 1163 col = baij->garray[aj[j]]*bs; 1164 for (k=0; k<bs; k++) { 1165 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1166 col++; a += bs; 1167 } 1168 } 1169 } 1170 ierr = PetscFree(rvals);CHKERRQ(ierr); 1171 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1172 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1173 /* 1174 Everyone has to call to draw the matrix since the graphics waits are 1175 synchronized across all processors that share the PetscDraw object 1176 */ 1177 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1178 if (!rank) { 1179 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1180 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1181 } 1182 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1183 ierr = MatDestroy(A);CHKERRQ(ierr); 1184 } 1185 PetscFunctionReturn(0); 1186 } 1187 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "MatView_MPIBAIJ" 1190 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1191 { 1192 PetscErrorCode ierr; 1193 PetscTruth iascii,isdraw,issocket,isbinary; 1194 1195 PetscFunctionBegin; 1196 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1197 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1198 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1199 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1200 if (iascii || isdraw || issocket || isbinary) { 1201 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1202 } else { 1203 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1204 } 1205 PetscFunctionReturn(0); 1206 } 1207 1208 #undef __FUNCT__ 1209 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1210 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1211 { 1212 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1213 PetscErrorCode ierr; 1214 1215 PetscFunctionBegin; 1216 #if defined(PETSC_USE_LOG) 1217 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N); 1218 #endif 1219 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1220 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1221 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1222 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1223 #if defined (PETSC_USE_CTABLE) 1224 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1225 #else 1226 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1227 #endif 1228 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1229 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1230 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1231 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1232 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1233 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1234 #if defined(PETSC_USE_MAT_SINGLE) 1235 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 1236 #endif 1237 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1238 ierr = PetscFree(baij);CHKERRQ(ierr); 1239 1240 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1241 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1242 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1243 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1244 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1245 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1246 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1247 PetscFunctionReturn(0); 1248 } 1249 1250 #undef __FUNCT__ 1251 #define __FUNCT__ "MatMult_MPIBAIJ" 1252 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1253 { 1254 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1255 PetscErrorCode ierr; 1256 PetscInt nt; 1257 1258 PetscFunctionBegin; 1259 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1260 if (nt != A->cmap.n) { 1261 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1262 } 1263 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1264 if (nt != A->rmap.n) { 1265 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1266 } 1267 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1268 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1269 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1270 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1271 PetscFunctionReturn(0); 1272 } 1273 1274 #undef __FUNCT__ 1275 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1276 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1277 { 1278 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1279 PetscErrorCode ierr; 1280 1281 PetscFunctionBegin; 1282 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1283 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1284 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1285 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1286 PetscFunctionReturn(0); 1287 } 1288 1289 #undef __FUNCT__ 1290 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1291 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1292 { 1293 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1294 PetscErrorCode ierr; 1295 PetscTruth merged; 1296 1297 PetscFunctionBegin; 1298 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1299 /* do nondiagonal part */ 1300 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1301 if (!merged) { 1302 /* send it on its way */ 1303 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1304 /* do local part */ 1305 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1306 /* receive remote parts: note this assumes the values are not actually */ 1307 /* inserted in yy until the next line */ 1308 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1309 } else { 1310 /* do local part */ 1311 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1312 /* send it on its way */ 1313 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1314 /* values actually were received in the Begin() but we need to call this nop */ 1315 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1316 } 1317 PetscFunctionReturn(0); 1318 } 1319 1320 #undef __FUNCT__ 1321 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1322 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1323 { 1324 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1325 PetscErrorCode ierr; 1326 1327 PetscFunctionBegin; 1328 /* do nondiagonal part */ 1329 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1330 /* send it on its way */ 1331 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1332 /* do local part */ 1333 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1334 /* receive remote parts: note this assumes the values are not actually */ 1335 /* inserted in yy until the next line, which is true for my implementation*/ 1336 /* but is not perhaps always true. */ 1337 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1338 PetscFunctionReturn(0); 1339 } 1340 1341 /* 1342 This only works correctly for square matrices where the subblock A->A is the 1343 diagonal block 1344 */ 1345 #undef __FUNCT__ 1346 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1347 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1348 { 1349 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1350 PetscErrorCode ierr; 1351 1352 PetscFunctionBegin; 1353 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1354 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1355 PetscFunctionReturn(0); 1356 } 1357 1358 #undef __FUNCT__ 1359 #define __FUNCT__ "MatScale_MPIBAIJ" 1360 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1361 { 1362 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1363 PetscErrorCode ierr; 1364 1365 PetscFunctionBegin; 1366 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1367 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1368 PetscFunctionReturn(0); 1369 } 1370 1371 #undef __FUNCT__ 1372 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1373 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1374 { 1375 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1376 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1377 PetscErrorCode ierr; 1378 PetscInt bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1379 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend; 1380 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1381 1382 PetscFunctionBegin; 1383 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1384 mat->getrowactive = PETSC_TRUE; 1385 1386 if (!mat->rowvalues && (idx || v)) { 1387 /* 1388 allocate enough space to hold information from the longest row. 1389 */ 1390 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1391 PetscInt max = 1,mbs = mat->mbs,tmp; 1392 for (i=0; i<mbs; i++) { 1393 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1394 if (max < tmp) { max = tmp; } 1395 } 1396 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1397 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1398 } 1399 1400 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1401 lrow = row - brstart; 1402 1403 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1404 if (!v) {pvA = 0; pvB = 0;} 1405 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1406 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1407 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1408 nztot = nzA + nzB; 1409 1410 cmap = mat->garray; 1411 if (v || idx) { 1412 if (nztot) { 1413 /* Sort by increasing column numbers, assuming A and B already sorted */ 1414 PetscInt imark = -1; 1415 if (v) { 1416 *v = v_p = mat->rowvalues; 1417 for (i=0; i<nzB; i++) { 1418 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1419 else break; 1420 } 1421 imark = i; 1422 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1423 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1424 } 1425 if (idx) { 1426 *idx = idx_p = mat->rowindices; 1427 if (imark > -1) { 1428 for (i=0; i<imark; i++) { 1429 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1430 } 1431 } else { 1432 for (i=0; i<nzB; i++) { 1433 if (cmap[cworkB[i]/bs] < cstart) 1434 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1435 else break; 1436 } 1437 imark = i; 1438 } 1439 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1440 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1441 } 1442 } else { 1443 if (idx) *idx = 0; 1444 if (v) *v = 0; 1445 } 1446 } 1447 *nz = nztot; 1448 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1449 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1450 PetscFunctionReturn(0); 1451 } 1452 1453 #undef __FUNCT__ 1454 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1455 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1456 { 1457 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1458 1459 PetscFunctionBegin; 1460 if (!baij->getrowactive) { 1461 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1462 } 1463 baij->getrowactive = PETSC_FALSE; 1464 PetscFunctionReturn(0); 1465 } 1466 1467 #undef __FUNCT__ 1468 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1469 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1470 { 1471 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1472 PetscErrorCode ierr; 1473 1474 PetscFunctionBegin; 1475 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1476 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1477 PetscFunctionReturn(0); 1478 } 1479 1480 #undef __FUNCT__ 1481 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1482 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1483 { 1484 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1485 Mat A = a->A,B = a->B; 1486 PetscErrorCode ierr; 1487 PetscReal isend[5],irecv[5]; 1488 1489 PetscFunctionBegin; 1490 info->block_size = (PetscReal)matin->rmap.bs; 1491 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1492 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1493 isend[3] = info->memory; isend[4] = info->mallocs; 1494 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1495 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1496 isend[3] += info->memory; isend[4] += info->mallocs; 1497 if (flag == MAT_LOCAL) { 1498 info->nz_used = isend[0]; 1499 info->nz_allocated = isend[1]; 1500 info->nz_unneeded = isend[2]; 1501 info->memory = isend[3]; 1502 info->mallocs = isend[4]; 1503 } else if (flag == MAT_GLOBAL_MAX) { 1504 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1505 info->nz_used = irecv[0]; 1506 info->nz_allocated = irecv[1]; 1507 info->nz_unneeded = irecv[2]; 1508 info->memory = irecv[3]; 1509 info->mallocs = irecv[4]; 1510 } else if (flag == MAT_GLOBAL_SUM) { 1511 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1512 info->nz_used = irecv[0]; 1513 info->nz_allocated = irecv[1]; 1514 info->nz_unneeded = irecv[2]; 1515 info->memory = irecv[3]; 1516 info->mallocs = irecv[4]; 1517 } else { 1518 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1519 } 1520 info->rows_global = (PetscReal)A->rmap.N; 1521 info->columns_global = (PetscReal)A->cmap.N; 1522 info->rows_local = (PetscReal)A->rmap.N; 1523 info->columns_local = (PetscReal)A->cmap.N; 1524 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1525 info->fill_ratio_needed = 0; 1526 info->factor_mallocs = 0; 1527 PetscFunctionReturn(0); 1528 } 1529 1530 #undef __FUNCT__ 1531 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1532 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op) 1533 { 1534 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1535 PetscErrorCode ierr; 1536 1537 PetscFunctionBegin; 1538 switch (op) { 1539 case MAT_NO_NEW_NONZERO_LOCATIONS: 1540 case MAT_YES_NEW_NONZERO_LOCATIONS: 1541 case MAT_COLUMNS_UNSORTED: 1542 case MAT_COLUMNS_SORTED: 1543 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1544 case MAT_KEEP_ZEROED_ROWS: 1545 case MAT_NEW_NONZERO_LOCATION_ERR: 1546 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1547 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1548 break; 1549 case MAT_ROW_ORIENTED: 1550 a->roworiented = PETSC_TRUE; 1551 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1552 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1553 break; 1554 case MAT_ROWS_SORTED: 1555 case MAT_ROWS_UNSORTED: 1556 case MAT_YES_NEW_DIAGONALS: 1557 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1558 break; 1559 case MAT_COLUMN_ORIENTED: 1560 a->roworiented = PETSC_FALSE; 1561 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1562 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1563 break; 1564 case MAT_IGNORE_OFF_PROC_ENTRIES: 1565 a->donotstash = PETSC_TRUE; 1566 break; 1567 case MAT_NO_NEW_DIAGONALS: 1568 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1569 case MAT_USE_HASH_TABLE: 1570 a->ht_flag = PETSC_TRUE; 1571 break; 1572 case MAT_SYMMETRIC: 1573 case MAT_STRUCTURALLY_SYMMETRIC: 1574 case MAT_HERMITIAN: 1575 case MAT_SYMMETRY_ETERNAL: 1576 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1577 break; 1578 case MAT_NOT_SYMMETRIC: 1579 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1580 case MAT_NOT_HERMITIAN: 1581 case MAT_NOT_SYMMETRY_ETERNAL: 1582 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1583 break; 1584 default: 1585 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1586 } 1587 PetscFunctionReturn(0); 1588 } 1589 1590 #undef __FUNCT__ 1591 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1592 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1593 { 1594 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1595 Mat_SeqBAIJ *Aloc; 1596 Mat B; 1597 PetscErrorCode ierr; 1598 PetscInt M=A->rmap.N,N=A->cmap.N,*ai,*aj,i,*rvals,j,k,col; 1599 PetscInt bs=A->rmap.bs,mbs=baij->mbs; 1600 MatScalar *a; 1601 1602 PetscFunctionBegin; 1603 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1604 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1605 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1606 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1607 ierr = MatMPIBAIJSetPreallocation(B,A->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1608 1609 /* copy over the A part */ 1610 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1611 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1612 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1613 1614 for (i=0; i<mbs; i++) { 1615 rvals[0] = bs*(baij->rstartbs + i); 1616 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1617 for (j=ai[i]; j<ai[i+1]; j++) { 1618 col = (baij->cstartbs+aj[j])*bs; 1619 for (k=0; k<bs; k++) { 1620 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1621 col++; a += bs; 1622 } 1623 } 1624 } 1625 /* copy over the B part */ 1626 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1627 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1628 for (i=0; i<mbs; i++) { 1629 rvals[0] = bs*(baij->rstartbs + i); 1630 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1631 for (j=ai[i]; j<ai[i+1]; j++) { 1632 col = baij->garray[aj[j]]*bs; 1633 for (k=0; k<bs; k++) { 1634 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1635 col++; a += bs; 1636 } 1637 } 1638 } 1639 ierr = PetscFree(rvals);CHKERRQ(ierr); 1640 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1641 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1642 1643 if (matout) { 1644 *matout = B; 1645 } else { 1646 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1647 } 1648 PetscFunctionReturn(0); 1649 } 1650 1651 #undef __FUNCT__ 1652 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1653 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1654 { 1655 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1656 Mat a = baij->A,b = baij->B; 1657 PetscErrorCode ierr; 1658 PetscInt s1,s2,s3; 1659 1660 PetscFunctionBegin; 1661 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1662 if (rr) { 1663 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1664 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1665 /* Overlap communication with computation. */ 1666 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1667 } 1668 if (ll) { 1669 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1670 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1671 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1672 } 1673 /* scale the diagonal block */ 1674 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1675 1676 if (rr) { 1677 /* Do a scatter end and then right scale the off-diagonal block */ 1678 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1679 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1680 } 1681 1682 PetscFunctionReturn(0); 1683 } 1684 1685 #undef __FUNCT__ 1686 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1687 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 1688 { 1689 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1690 PetscErrorCode ierr; 1691 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1692 PetscInt i,*owners = A->rmap.range; 1693 PetscInt *nprocs,j,idx,nsends,row; 1694 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1695 PetscInt *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1; 1696 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap.rstart; 1697 MPI_Comm comm = A->comm; 1698 MPI_Request *send_waits,*recv_waits; 1699 MPI_Status recv_status,*send_status; 1700 #if defined(PETSC_DEBUG) 1701 PetscTruth found = PETSC_FALSE; 1702 #endif 1703 1704 PetscFunctionBegin; 1705 /* first count number of contributors to each processor */ 1706 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1707 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1708 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1709 j = 0; 1710 for (i=0; i<N; i++) { 1711 if (lastidx > (idx = rows[i])) j = 0; 1712 lastidx = idx; 1713 for (; j<size; j++) { 1714 if (idx >= owners[j] && idx < owners[j+1]) { 1715 nprocs[2*j]++; 1716 nprocs[2*j+1] = 1; 1717 owner[i] = j; 1718 #if defined(PETSC_DEBUG) 1719 found = PETSC_TRUE; 1720 #endif 1721 break; 1722 } 1723 } 1724 #if defined(PETSC_DEBUG) 1725 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1726 found = PETSC_FALSE; 1727 #endif 1728 } 1729 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1730 1731 /* inform other processors of number of messages and max length*/ 1732 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1733 1734 /* post receives: */ 1735 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1736 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1737 for (i=0; i<nrecvs; i++) { 1738 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1739 } 1740 1741 /* do sends: 1742 1) starts[i] gives the starting index in svalues for stuff going to 1743 the ith processor 1744 */ 1745 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1746 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1747 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1748 starts[0] = 0; 1749 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1750 for (i=0; i<N; i++) { 1751 svalues[starts[owner[i]]++] = rows[i]; 1752 } 1753 1754 starts[0] = 0; 1755 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1756 count = 0; 1757 for (i=0; i<size; i++) { 1758 if (nprocs[2*i+1]) { 1759 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1760 } 1761 } 1762 ierr = PetscFree(starts);CHKERRQ(ierr); 1763 1764 base = owners[rank]; 1765 1766 /* wait on receives */ 1767 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 1768 source = lens + nrecvs; 1769 count = nrecvs; slen = 0; 1770 while (count) { 1771 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1772 /* unpack receives into our local space */ 1773 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1774 source[imdex] = recv_status.MPI_SOURCE; 1775 lens[imdex] = n; 1776 slen += n; 1777 count--; 1778 } 1779 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1780 1781 /* move the data into the send scatter */ 1782 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1783 count = 0; 1784 for (i=0; i<nrecvs; i++) { 1785 values = rvalues + i*nmax; 1786 for (j=0; j<lens[i]; j++) { 1787 lrows[count++] = values[j] - base; 1788 } 1789 } 1790 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1791 ierr = PetscFree(lens);CHKERRQ(ierr); 1792 ierr = PetscFree(owner);CHKERRQ(ierr); 1793 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1794 1795 /* actually zap the local rows */ 1796 /* 1797 Zero the required rows. If the "diagonal block" of the matrix 1798 is square and the user wishes to set the diagonal we use separate 1799 code so that MatSetValues() is not called for each diagonal allocating 1800 new memory, thus calling lots of mallocs and slowing things down. 1801 1802 Contributed by: Matthew Knepley 1803 */ 1804 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1805 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1806 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 1807 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1808 } else if (diag != 0.0) { 1809 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1810 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1811 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1812 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1813 } 1814 for (i=0; i<slen; i++) { 1815 row = lrows[i] + rstart_bs; 1816 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1817 } 1818 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1819 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1820 } else { 1821 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1822 } 1823 1824 ierr = PetscFree(lrows);CHKERRQ(ierr); 1825 1826 /* wait on sends */ 1827 if (nsends) { 1828 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1829 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1830 ierr = PetscFree(send_status);CHKERRQ(ierr); 1831 } 1832 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1833 ierr = PetscFree(svalues);CHKERRQ(ierr); 1834 1835 PetscFunctionReturn(0); 1836 } 1837 1838 #undef __FUNCT__ 1839 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1840 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1841 { 1842 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1843 PetscErrorCode ierr; 1844 1845 PetscFunctionBegin; 1846 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1847 PetscFunctionReturn(0); 1848 } 1849 1850 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1851 1852 #undef __FUNCT__ 1853 #define __FUNCT__ "MatEqual_MPIBAIJ" 1854 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1855 { 1856 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1857 Mat a,b,c,d; 1858 PetscTruth flg; 1859 PetscErrorCode ierr; 1860 1861 PetscFunctionBegin; 1862 a = matA->A; b = matA->B; 1863 c = matB->A; d = matB->B; 1864 1865 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1866 if (flg) { 1867 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1868 } 1869 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1870 PetscFunctionReturn(0); 1871 } 1872 1873 #undef __FUNCT__ 1874 #define __FUNCT__ "MatCopy_MPIBAIJ" 1875 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1876 { 1877 PetscErrorCode ierr; 1878 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1879 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1880 1881 PetscFunctionBegin; 1882 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1883 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1884 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1885 } else { 1886 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1887 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1888 } 1889 PetscFunctionReturn(0); 1890 } 1891 1892 #undef __FUNCT__ 1893 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1894 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1895 { 1896 PetscErrorCode ierr; 1897 1898 PetscFunctionBegin; 1899 ierr = MatMPIBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1900 PetscFunctionReturn(0); 1901 } 1902 1903 #include "petscblaslapack.h" 1904 #undef __FUNCT__ 1905 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1906 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1907 { 1908 PetscErrorCode ierr; 1909 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1910 PetscBLASInt bnz,one=1; 1911 Mat_SeqBAIJ *x,*y; 1912 1913 PetscFunctionBegin; 1914 if (str == SAME_NONZERO_PATTERN) { 1915 PetscScalar alpha = a; 1916 x = (Mat_SeqBAIJ *)xx->A->data; 1917 y = (Mat_SeqBAIJ *)yy->A->data; 1918 bnz = (PetscBLASInt)x->nz; 1919 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1920 x = (Mat_SeqBAIJ *)xx->B->data; 1921 y = (Mat_SeqBAIJ *)yy->B->data; 1922 bnz = (PetscBLASInt)x->nz; 1923 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1924 } else { 1925 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1926 } 1927 PetscFunctionReturn(0); 1928 } 1929 1930 #undef __FUNCT__ 1931 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1932 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1933 { 1934 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1935 PetscErrorCode ierr; 1936 1937 PetscFunctionBegin; 1938 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1939 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1940 PetscFunctionReturn(0); 1941 } 1942 1943 #undef __FUNCT__ 1944 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1945 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1946 { 1947 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1948 PetscErrorCode ierr; 1949 1950 PetscFunctionBegin; 1951 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1952 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1953 PetscFunctionReturn(0); 1954 } 1955 1956 /* -------------------------------------------------------------------*/ 1957 static struct _MatOps MatOps_Values = { 1958 MatSetValues_MPIBAIJ, 1959 MatGetRow_MPIBAIJ, 1960 MatRestoreRow_MPIBAIJ, 1961 MatMult_MPIBAIJ, 1962 /* 4*/ MatMultAdd_MPIBAIJ, 1963 MatMultTranspose_MPIBAIJ, 1964 MatMultTransposeAdd_MPIBAIJ, 1965 0, 1966 0, 1967 0, 1968 /*10*/ 0, 1969 0, 1970 0, 1971 0, 1972 MatTranspose_MPIBAIJ, 1973 /*15*/ MatGetInfo_MPIBAIJ, 1974 MatEqual_MPIBAIJ, 1975 MatGetDiagonal_MPIBAIJ, 1976 MatDiagonalScale_MPIBAIJ, 1977 MatNorm_MPIBAIJ, 1978 /*20*/ MatAssemblyBegin_MPIBAIJ, 1979 MatAssemblyEnd_MPIBAIJ, 1980 0, 1981 MatSetOption_MPIBAIJ, 1982 MatZeroEntries_MPIBAIJ, 1983 /*25*/ MatZeroRows_MPIBAIJ, 1984 0, 1985 0, 1986 0, 1987 0, 1988 /*30*/ MatSetUpPreallocation_MPIBAIJ, 1989 0, 1990 0, 1991 0, 1992 0, 1993 /*35*/ MatDuplicate_MPIBAIJ, 1994 0, 1995 0, 1996 0, 1997 0, 1998 /*40*/ MatAXPY_MPIBAIJ, 1999 MatGetSubMatrices_MPIBAIJ, 2000 MatIncreaseOverlap_MPIBAIJ, 2001 MatGetValues_MPIBAIJ, 2002 MatCopy_MPIBAIJ, 2003 /*45*/ 0, 2004 MatScale_MPIBAIJ, 2005 0, 2006 0, 2007 0, 2008 /*50*/ 0, 2009 0, 2010 0, 2011 0, 2012 0, 2013 /*55*/ 0, 2014 0, 2015 MatSetUnfactored_MPIBAIJ, 2016 0, 2017 MatSetValuesBlocked_MPIBAIJ, 2018 /*60*/ 0, 2019 MatDestroy_MPIBAIJ, 2020 MatView_MPIBAIJ, 2021 0, 2022 0, 2023 /*65*/ 0, 2024 0, 2025 0, 2026 0, 2027 0, 2028 /*70*/ MatGetRowMax_MPIBAIJ, 2029 0, 2030 0, 2031 0, 2032 0, 2033 /*75*/ 0, 2034 0, 2035 0, 2036 0, 2037 0, 2038 /*80*/ 0, 2039 0, 2040 0, 2041 0, 2042 MatLoad_MPIBAIJ, 2043 /*85*/ 0, 2044 0, 2045 0, 2046 0, 2047 0, 2048 /*90*/ 0, 2049 0, 2050 0, 2051 0, 2052 0, 2053 /*95*/ 0, 2054 0, 2055 0, 2056 0, 2057 0, 2058 /*100*/0, 2059 0, 2060 0, 2061 0, 2062 0, 2063 /*105*/0, 2064 MatRealPart_MPIBAIJ, 2065 MatImaginaryPart_MPIBAIJ}; 2066 2067 2068 EXTERN_C_BEGIN 2069 #undef __FUNCT__ 2070 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2071 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2072 { 2073 PetscFunctionBegin; 2074 *a = ((Mat_MPIBAIJ *)A->data)->A; 2075 *iscopy = PETSC_FALSE; 2076 PetscFunctionReturn(0); 2077 } 2078 EXTERN_C_END 2079 2080 EXTERN_C_BEGIN 2081 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2082 EXTERN_C_END 2083 2084 #undef __FUNCT__ 2085 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2086 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 2087 { 2088 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)B->data; 2089 PetscInt m = B->rmap.n/bs,cstart = baij->cstartbs, cend = baij->cendbs,j,nnz,i,d; 2090 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = baij->rstartbs,ii; 2091 const PetscInt *JJ; 2092 PetscScalar *values; 2093 PetscErrorCode ierr; 2094 2095 PetscFunctionBegin; 2096 #if defined(PETSC_OPT_g) 2097 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"Ii[0] must be 0 it is %D",Ii[0]); 2098 #endif 2099 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2100 o_nnz = d_nnz + m; 2101 2102 for (i=0; i<m; i++) { 2103 nnz = Ii[i+1]- Ii[i]; 2104 JJ = J + Ii[i]; 2105 nnz_max = PetscMax(nnz_max,nnz); 2106 #if defined(PETSC_OPT_g) 2107 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2108 #endif 2109 for (j=0; j<nnz; j++) { 2110 if (*JJ >= cstart) break; 2111 JJ++; 2112 } 2113 d = 0; 2114 for (; j<nnz; j++) { 2115 if (*JJ++ >= cend) break; 2116 d++; 2117 } 2118 d_nnz[i] = d; 2119 o_nnz[i] = nnz - d; 2120 } 2121 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2122 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2123 2124 if (v) values = (PetscScalar*)v; 2125 else { 2126 ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2127 ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2128 } 2129 2130 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2131 for (i=0; i<m; i++) { 2132 ii = i + rstart; 2133 nnz = Ii[i+1]- Ii[i]; 2134 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2135 } 2136 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2137 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2138 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2139 2140 if (!v) { 2141 ierr = PetscFree(values);CHKERRQ(ierr); 2142 } 2143 PetscFunctionReturn(0); 2144 } 2145 2146 #undef __FUNCT__ 2147 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2148 /*@C 2149 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2150 (the default parallel PETSc format). 2151 2152 Collective on MPI_Comm 2153 2154 Input Parameters: 2155 + A - the matrix 2156 . i - the indices into j for the start of each local row (starts with zero) 2157 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2158 - v - optional values in the matrix 2159 2160 Level: developer 2161 2162 .keywords: matrix, aij, compressed row, sparse, parallel 2163 2164 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2165 @*/ 2166 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2167 { 2168 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2169 2170 PetscFunctionBegin; 2171 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2172 if (f) { 2173 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2174 } 2175 PetscFunctionReturn(0); 2176 } 2177 2178 EXTERN_C_BEGIN 2179 #undef __FUNCT__ 2180 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2181 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2182 { 2183 Mat_MPIBAIJ *b; 2184 PetscErrorCode ierr; 2185 PetscInt i; 2186 2187 PetscFunctionBegin; 2188 B->preallocated = PETSC_TRUE; 2189 ierr = PetscOptionsBegin(B->comm,B->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2190 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2191 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2192 2193 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2194 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2195 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2196 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2197 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2198 2199 B->rmap.bs = bs; 2200 B->cmap.bs = bs; 2201 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2202 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2203 2204 if (d_nnz) { 2205 for (i=0; i<B->rmap.n/bs; i++) { 2206 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2207 } 2208 } 2209 if (o_nnz) { 2210 for (i=0; i<B->rmap.n/bs; i++) { 2211 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2212 } 2213 } 2214 2215 b = (Mat_MPIBAIJ*)B->data; 2216 b->bs2 = bs*bs; 2217 b->mbs = B->rmap.n/bs; 2218 b->nbs = B->cmap.n/bs; 2219 b->Mbs = B->rmap.N/bs; 2220 b->Nbs = B->cmap.N/bs; 2221 2222 for (i=0; i<=b->size; i++) { 2223 b->rangebs[i] = B->rmap.range[i]/bs; 2224 } 2225 b->rstartbs = B->rmap.rstart/bs; 2226 b->rendbs = B->rmap.rend/bs; 2227 b->cstartbs = B->cmap.rstart/bs; 2228 b->cendbs = B->cmap.rend/bs; 2229 2230 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2231 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 2232 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2233 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2234 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2235 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2236 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 2237 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2238 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2239 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2240 2241 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2242 2243 PetscFunctionReturn(0); 2244 } 2245 EXTERN_C_END 2246 2247 EXTERN_C_BEGIN 2248 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2249 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2250 EXTERN_C_END 2251 2252 /*MC 2253 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2254 2255 Options Database Keys: 2256 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2257 . -mat_block_size <bs> - set the blocksize used to store the matrix 2258 - -mat_use_hash_table <fact> 2259 2260 Level: beginner 2261 2262 .seealso: MatCreateMPIBAIJ 2263 M*/ 2264 2265 EXTERN_C_BEGIN 2266 #undef __FUNCT__ 2267 #define __FUNCT__ "MatCreate_MPIBAIJ" 2268 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2269 { 2270 Mat_MPIBAIJ *b; 2271 PetscErrorCode ierr; 2272 PetscTruth flg; 2273 2274 PetscFunctionBegin; 2275 ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2276 B->data = (void*)b; 2277 2278 2279 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2280 B->mapping = 0; 2281 B->factor = 0; 2282 B->assembled = PETSC_FALSE; 2283 2284 B->insertmode = NOT_SET_VALUES; 2285 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2286 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2287 2288 /* build local table of row and column ownerships */ 2289 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 2290 2291 /* build cache for off array entries formed */ 2292 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2293 b->donotstash = PETSC_FALSE; 2294 b->colmap = PETSC_NULL; 2295 b->garray = PETSC_NULL; 2296 b->roworiented = PETSC_TRUE; 2297 2298 #if defined(PETSC_USE_MAT_SINGLE) 2299 /* stuff for MatSetValues_XXX in single precision */ 2300 b->setvalueslen = 0; 2301 b->setvaluescopy = PETSC_NULL; 2302 #endif 2303 2304 /* stuff used in block assembly */ 2305 b->barray = 0; 2306 2307 /* stuff used for matrix vector multiply */ 2308 b->lvec = 0; 2309 b->Mvctx = 0; 2310 2311 /* stuff for MatGetRow() */ 2312 b->rowindices = 0; 2313 b->rowvalues = 0; 2314 b->getrowactive = PETSC_FALSE; 2315 2316 /* hash table stuff */ 2317 b->ht = 0; 2318 b->hd = 0; 2319 b->ht_size = 0; 2320 b->ht_flag = PETSC_FALSE; 2321 b->ht_fact = 0; 2322 b->ht_total_ct = 0; 2323 b->ht_insert_ct = 0; 2324 2325 ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2326 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2327 if (flg) { 2328 PetscReal fact = 1.39; 2329 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2330 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 2331 if (fact <= 1.0) fact = 1.39; 2332 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2333 ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2334 } 2335 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2336 2337 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2338 "MatStoreValues_MPIBAIJ", 2339 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2340 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2341 "MatRetrieveValues_MPIBAIJ", 2342 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2343 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2344 "MatGetDiagonalBlock_MPIBAIJ", 2345 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2346 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2347 "MatMPIBAIJSetPreallocation_MPIBAIJ", 2348 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2349 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 2350 "MatMPIBAIJSetPreallocationCSR_MPIAIJ", 2351 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 2352 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 2353 "MatDiagonalScaleLocal_MPIBAIJ", 2354 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2355 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 2356 "MatSetHashTableFactor_MPIBAIJ", 2357 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2358 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 2359 PetscFunctionReturn(0); 2360 } 2361 EXTERN_C_END 2362 2363 /*MC 2364 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2365 2366 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2367 and MATMPIBAIJ otherwise. 2368 2369 Options Database Keys: 2370 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2371 2372 Level: beginner 2373 2374 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2375 M*/ 2376 2377 EXTERN_C_BEGIN 2378 #undef __FUNCT__ 2379 #define __FUNCT__ "MatCreate_BAIJ" 2380 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 2381 { 2382 PetscErrorCode ierr; 2383 PetscMPIInt size; 2384 2385 PetscFunctionBegin; 2386 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2387 if (size == 1) { 2388 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2389 } else { 2390 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2391 } 2392 PetscFunctionReturn(0); 2393 } 2394 EXTERN_C_END 2395 2396 #undef __FUNCT__ 2397 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2398 /*@C 2399 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2400 (block compressed row). For good matrix assembly performance 2401 the user should preallocate the matrix storage by setting the parameters 2402 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2403 performance can be increased by more than a factor of 50. 2404 2405 Collective on Mat 2406 2407 Input Parameters: 2408 + A - the matrix 2409 . bs - size of blockk 2410 . d_nz - number of block nonzeros per block row in diagonal portion of local 2411 submatrix (same for all local rows) 2412 . d_nnz - array containing the number of block nonzeros in the various block rows 2413 of the in diagonal portion of the local (possibly different for each block 2414 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2415 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2416 submatrix (same for all local rows). 2417 - o_nnz - array containing the number of nonzeros in the various block rows of the 2418 off-diagonal portion of the local submatrix (possibly different for 2419 each block row) or PETSC_NULL. 2420 2421 If the *_nnz parameter is given then the *_nz parameter is ignored 2422 2423 Options Database Keys: 2424 + -mat_block_size - size of the blocks to use 2425 - -mat_use_hash_table <fact> 2426 2427 Notes: 2428 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2429 than it must be used on all processors that share the object for that argument. 2430 2431 Storage Information: 2432 For a square global matrix we define each processor's diagonal portion 2433 to be its local rows and the corresponding columns (a square submatrix); 2434 each processor's off-diagonal portion encompasses the remainder of the 2435 local matrix (a rectangular submatrix). 2436 2437 The user can specify preallocated storage for the diagonal part of 2438 the local submatrix with either d_nz or d_nnz (not both). Set 2439 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2440 memory allocation. Likewise, specify preallocated storage for the 2441 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2442 2443 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2444 the figure below we depict these three local rows and all columns (0-11). 2445 2446 .vb 2447 0 1 2 3 4 5 6 7 8 9 10 11 2448 ------------------- 2449 row 3 | o o o d d d o o o o o o 2450 row 4 | o o o d d d o o o o o o 2451 row 5 | o o o d d d o o o o o o 2452 ------------------- 2453 .ve 2454 2455 Thus, any entries in the d locations are stored in the d (diagonal) 2456 submatrix, and any entries in the o locations are stored in the 2457 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2458 stored simply in the MATSEQBAIJ format for compressed row storage. 2459 2460 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2461 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2462 In general, for PDE problems in which most nonzeros are near the diagonal, 2463 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2464 or you will get TERRIBLE performance; see the users' manual chapter on 2465 matrices. 2466 2467 Level: intermediate 2468 2469 .keywords: matrix, block, aij, compressed row, sparse, parallel 2470 2471 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 2472 @*/ 2473 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2474 { 2475 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2476 2477 PetscFunctionBegin; 2478 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2479 if (f) { 2480 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2481 } 2482 PetscFunctionReturn(0); 2483 } 2484 2485 #undef __FUNCT__ 2486 #define __FUNCT__ "MatCreateMPIBAIJ" 2487 /*@C 2488 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2489 (block compressed row). For good matrix assembly performance 2490 the user should preallocate the matrix storage by setting the parameters 2491 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2492 performance can be increased by more than a factor of 50. 2493 2494 Collective on MPI_Comm 2495 2496 Input Parameters: 2497 + comm - MPI communicator 2498 . bs - size of blockk 2499 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2500 This value should be the same as the local size used in creating the 2501 y vector for the matrix-vector product y = Ax. 2502 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2503 This value should be the same as the local size used in creating the 2504 x vector for the matrix-vector product y = Ax. 2505 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2506 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2507 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2508 submatrix (same for all local rows) 2509 . d_nnz - array containing the number of nonzero blocks in the various block rows 2510 of the in diagonal portion of the local (possibly different for each block 2511 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2512 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2513 submatrix (same for all local rows). 2514 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2515 off-diagonal portion of the local submatrix (possibly different for 2516 each block row) or PETSC_NULL. 2517 2518 Output Parameter: 2519 . A - the matrix 2520 2521 Options Database Keys: 2522 + -mat_block_size - size of the blocks to use 2523 - -mat_use_hash_table <fact> 2524 2525 Notes: 2526 If the *_nnz parameter is given then the *_nz parameter is ignored 2527 2528 A nonzero block is any block that as 1 or more nonzeros in it 2529 2530 The user MUST specify either the local or global matrix dimensions 2531 (possibly both). 2532 2533 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2534 than it must be used on all processors that share the object for that argument. 2535 2536 Storage Information: 2537 For a square global matrix we define each processor's diagonal portion 2538 to be its local rows and the corresponding columns (a square submatrix); 2539 each processor's off-diagonal portion encompasses the remainder of the 2540 local matrix (a rectangular submatrix). 2541 2542 The user can specify preallocated storage for the diagonal part of 2543 the local submatrix with either d_nz or d_nnz (not both). Set 2544 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2545 memory allocation. Likewise, specify preallocated storage for the 2546 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2547 2548 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2549 the figure below we depict these three local rows and all columns (0-11). 2550 2551 .vb 2552 0 1 2 3 4 5 6 7 8 9 10 11 2553 ------------------- 2554 row 3 | o o o d d d o o o o o o 2555 row 4 | o o o d d d o o o o o o 2556 row 5 | o o o d d d o o o o o o 2557 ------------------- 2558 .ve 2559 2560 Thus, any entries in the d locations are stored in the d (diagonal) 2561 submatrix, and any entries in the o locations are stored in the 2562 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2563 stored simply in the MATSEQBAIJ format for compressed row storage. 2564 2565 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2566 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2567 In general, for PDE problems in which most nonzeros are near the diagonal, 2568 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2569 or you will get TERRIBLE performance; see the users' manual chapter on 2570 matrices. 2571 2572 Level: intermediate 2573 2574 .keywords: matrix, block, aij, compressed row, sparse, parallel 2575 2576 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2577 @*/ 2578 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2579 { 2580 PetscErrorCode ierr; 2581 PetscMPIInt size; 2582 2583 PetscFunctionBegin; 2584 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2585 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2586 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2587 if (size > 1) { 2588 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2589 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2590 } else { 2591 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2592 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2593 } 2594 PetscFunctionReturn(0); 2595 } 2596 2597 #undef __FUNCT__ 2598 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 2599 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2600 { 2601 Mat mat; 2602 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2603 PetscErrorCode ierr; 2604 PetscInt len=0; 2605 2606 PetscFunctionBegin; 2607 *newmat = 0; 2608 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2609 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2610 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2611 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2612 2613 mat->factor = matin->factor; 2614 mat->preallocated = PETSC_TRUE; 2615 mat->assembled = PETSC_TRUE; 2616 mat->insertmode = NOT_SET_VALUES; 2617 2618 a = (Mat_MPIBAIJ*)mat->data; 2619 mat->rmap.bs = matin->rmap.bs; 2620 a->bs2 = oldmat->bs2; 2621 a->mbs = oldmat->mbs; 2622 a->nbs = oldmat->nbs; 2623 a->Mbs = oldmat->Mbs; 2624 a->Nbs = oldmat->Nbs; 2625 2626 ierr = PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2627 ierr = PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2628 2629 a->size = oldmat->size; 2630 a->rank = oldmat->rank; 2631 a->donotstash = oldmat->donotstash; 2632 a->roworiented = oldmat->roworiented; 2633 a->rowindices = 0; 2634 a->rowvalues = 0; 2635 a->getrowactive = PETSC_FALSE; 2636 a->barray = 0; 2637 a->rstartbs = oldmat->rstartbs; 2638 a->rendbs = oldmat->rendbs; 2639 a->cstartbs = oldmat->cstartbs; 2640 a->cendbs = oldmat->cendbs; 2641 2642 /* hash table stuff */ 2643 a->ht = 0; 2644 a->hd = 0; 2645 a->ht_size = 0; 2646 a->ht_flag = oldmat->ht_flag; 2647 a->ht_fact = oldmat->ht_fact; 2648 a->ht_total_ct = 0; 2649 a->ht_insert_ct = 0; 2650 2651 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 2652 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2653 ierr = MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr); 2654 if (oldmat->colmap) { 2655 #if defined (PETSC_USE_CTABLE) 2656 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2657 #else 2658 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2659 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2660 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2661 #endif 2662 } else a->colmap = 0; 2663 2664 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2665 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2666 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2667 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2668 } else a->garray = 0; 2669 2670 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2671 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2672 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2673 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2674 2675 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2676 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2677 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2678 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2679 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2680 *newmat = mat; 2681 2682 PetscFunctionReturn(0); 2683 } 2684 2685 #include "petscsys.h" 2686 2687 #undef __FUNCT__ 2688 #define __FUNCT__ "MatLoad_MPIBAIJ" 2689 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2690 { 2691 Mat A; 2692 PetscErrorCode ierr; 2693 int fd; 2694 PetscInt i,nz,j,rstart,rend; 2695 PetscScalar *vals,*buf; 2696 MPI_Comm comm = ((PetscObject)viewer)->comm; 2697 MPI_Status status; 2698 PetscMPIInt rank,size,maxnz; 2699 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 2700 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 2701 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 2702 PetscMPIInt tag = ((PetscObject)viewer)->tag; 2703 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 2704 PetscInt dcount,kmax,k,nzcount,tmp,mend; 2705 2706 PetscFunctionBegin; 2707 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2708 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2709 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2710 2711 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2712 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2713 if (!rank) { 2714 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2715 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2716 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2717 } 2718 2719 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2720 M = header[1]; N = header[2]; 2721 2722 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2723 2724 /* 2725 This code adds extra rows to make sure the number of rows is 2726 divisible by the blocksize 2727 */ 2728 Mbs = M/bs; 2729 extra_rows = bs - M + bs*Mbs; 2730 if (extra_rows == bs) extra_rows = 0; 2731 else Mbs++; 2732 if (extra_rows && !rank) { 2733 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2734 } 2735 2736 /* determine ownership of all rows */ 2737 mbs = Mbs/size + ((Mbs % size) > rank); 2738 m = mbs*bs; 2739 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 2740 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2741 2742 /* process 0 needs enough room for process with most rows */ 2743 if (!rank) { 2744 mmax = rowners[1]; 2745 for (i=2; i<size; i++) { 2746 mmax = PetscMax(mmax,rowners[i]); 2747 } 2748 mmax*=bs; 2749 } else mmax = m; 2750 2751 rowners[0] = 0; 2752 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2753 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2754 rstart = rowners[rank]; 2755 rend = rowners[rank+1]; 2756 2757 /* distribute row lengths to all processors */ 2758 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 2759 if (!rank) { 2760 mend = m; 2761 if (size == 1) mend = mend - extra_rows; 2762 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 2763 for (j=mend; j<m; j++) locrowlens[j] = 1; 2764 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2765 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2766 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2767 for (j=0; j<m; j++) { 2768 procsnz[0] += locrowlens[j]; 2769 } 2770 for (i=1; i<size; i++) { 2771 mend = browners[i+1] - browners[i]; 2772 if (i == size-1) mend = mend - extra_rows; 2773 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 2774 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 2775 /* calculate the number of nonzeros on each processor */ 2776 for (j=0; j<browners[i+1]-browners[i]; j++) { 2777 procsnz[i] += rowlengths[j]; 2778 } 2779 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2780 } 2781 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2782 } else { 2783 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2784 } 2785 2786 if (!rank) { 2787 /* determine max buffer needed and allocate it */ 2788 maxnz = procsnz[0]; 2789 for (i=1; i<size; i++) { 2790 maxnz = PetscMax(maxnz,procsnz[i]); 2791 } 2792 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2793 2794 /* read in my part of the matrix column indices */ 2795 nz = procsnz[0]; 2796 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2797 mycols = ibuf; 2798 if (size == 1) nz -= extra_rows; 2799 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2800 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2801 2802 /* read in every ones (except the last) and ship off */ 2803 for (i=1; i<size-1; i++) { 2804 nz = procsnz[i]; 2805 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2806 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2807 } 2808 /* read in the stuff for the last proc */ 2809 if (size != 1) { 2810 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2811 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2812 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2813 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2814 } 2815 ierr = PetscFree(cols);CHKERRQ(ierr); 2816 } else { 2817 /* determine buffer space needed for message */ 2818 nz = 0; 2819 for (i=0; i<m; i++) { 2820 nz += locrowlens[i]; 2821 } 2822 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2823 mycols = ibuf; 2824 /* receive message of column indices*/ 2825 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2826 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2827 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2828 } 2829 2830 /* loop over local rows, determining number of off diagonal entries */ 2831 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2832 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2833 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2834 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2835 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2836 rowcount = 0; nzcount = 0; 2837 for (i=0; i<mbs; i++) { 2838 dcount = 0; 2839 odcount = 0; 2840 for (j=0; j<bs; j++) { 2841 kmax = locrowlens[rowcount]; 2842 for (k=0; k<kmax; k++) { 2843 tmp = mycols[nzcount++]/bs; 2844 if (!mask[tmp]) { 2845 mask[tmp] = 1; 2846 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2847 else masked1[dcount++] = tmp; 2848 } 2849 } 2850 rowcount++; 2851 } 2852 2853 dlens[i] = dcount; 2854 odlens[i] = odcount; 2855 2856 /* zero out the mask elements we set */ 2857 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2858 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2859 } 2860 2861 /* create our matrix */ 2862 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2863 ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2864 ierr = MatSetType(A,type);CHKERRQ(ierr) 2865 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2866 2867 /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */ 2868 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2869 2870 if (!rank) { 2871 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2872 /* read in my part of the matrix numerical values */ 2873 nz = procsnz[0]; 2874 vals = buf; 2875 mycols = ibuf; 2876 if (size == 1) nz -= extra_rows; 2877 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2878 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2879 2880 /* insert into matrix */ 2881 jj = rstart*bs; 2882 for (i=0; i<m; i++) { 2883 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2884 mycols += locrowlens[i]; 2885 vals += locrowlens[i]; 2886 jj++; 2887 } 2888 /* read in other processors (except the last one) and ship out */ 2889 for (i=1; i<size-1; i++) { 2890 nz = procsnz[i]; 2891 vals = buf; 2892 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2893 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2894 } 2895 /* the last proc */ 2896 if (size != 1){ 2897 nz = procsnz[i] - extra_rows; 2898 vals = buf; 2899 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2900 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2901 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2902 } 2903 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2904 } else { 2905 /* receive numeric values */ 2906 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2907 2908 /* receive message of values*/ 2909 vals = buf; 2910 mycols = ibuf; 2911 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2912 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2913 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2914 2915 /* insert into matrix */ 2916 jj = rstart*bs; 2917 for (i=0; i<m; i++) { 2918 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2919 mycols += locrowlens[i]; 2920 vals += locrowlens[i]; 2921 jj++; 2922 } 2923 } 2924 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2925 ierr = PetscFree(buf);CHKERRQ(ierr); 2926 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2927 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2928 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2929 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2930 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2931 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2932 2933 *newmat = A; 2934 PetscFunctionReturn(0); 2935 } 2936 2937 #undef __FUNCT__ 2938 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2939 /*@ 2940 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2941 2942 Input Parameters: 2943 . mat - the matrix 2944 . fact - factor 2945 2946 Collective on Mat 2947 2948 Level: advanced 2949 2950 Notes: 2951 This can also be set by the command line option: -mat_use_hash_table <fact> 2952 2953 .keywords: matrix, hashtable, factor, HT 2954 2955 .seealso: MatSetOption() 2956 @*/ 2957 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2958 { 2959 PetscErrorCode ierr,(*f)(Mat,PetscReal); 2960 2961 PetscFunctionBegin; 2962 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 2963 if (f) { 2964 ierr = (*f)(mat,fact);CHKERRQ(ierr); 2965 } 2966 PetscFunctionReturn(0); 2967 } 2968 2969 EXTERN_C_BEGIN 2970 #undef __FUNCT__ 2971 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 2972 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 2973 { 2974 Mat_MPIBAIJ *baij; 2975 2976 PetscFunctionBegin; 2977 baij = (Mat_MPIBAIJ*)mat->data; 2978 baij->ht_fact = fact; 2979 PetscFunctionReturn(0); 2980 } 2981 EXTERN_C_END 2982 2983 #undef __FUNCT__ 2984 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2985 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2986 { 2987 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2988 PetscFunctionBegin; 2989 *Ad = a->A; 2990 *Ao = a->B; 2991 *colmap = a->garray; 2992 PetscFunctionReturn(0); 2993 } 2994