1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 15 16 /* UGLY, ugly, ugly 17 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 18 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 19 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 20 converts the entries into single precision and then calls ..._MatScalar() to put them 21 into the single precision data structures. 22 */ 23 #if defined(PETSC_USE_MAT_SINGLE) 24 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 25 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 26 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 27 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 28 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 29 #else 30 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 31 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 32 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 33 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 34 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 35 #endif 36 37 #undef __FUNCT__ 38 #define __FUNCT__ "MatGetRowMax_MPIBAIJ" 39 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v) 40 { 41 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 42 PetscErrorCode ierr; 43 PetscInt i; 44 PetscScalar *va,*vb; 45 Vec vtmp; 46 47 PetscFunctionBegin; 48 49 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 50 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 51 52 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap.n,&vtmp);CHKERRQ(ierr); 53 ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 54 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 55 56 for (i=0; i<A->rmap.n; i++){ 57 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 58 } 59 60 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 61 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 62 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 63 64 PetscFunctionReturn(0); 65 } 66 67 EXTERN_C_BEGIN 68 #undef __FUNCT__ 69 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 70 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 71 { 72 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 73 PetscErrorCode ierr; 74 75 PetscFunctionBegin; 76 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 77 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 EXTERN_C_END 81 82 EXTERN_C_BEGIN 83 #undef __FUNCT__ 84 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 85 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 86 { 87 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 88 PetscErrorCode ierr; 89 90 PetscFunctionBegin; 91 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 92 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 93 PetscFunctionReturn(0); 94 } 95 EXTERN_C_END 96 97 /* 98 Local utility routine that creates a mapping from the global column 99 number to the local number in the off-diagonal part of the local 100 storage of the matrix. This is done in a non scable way since the 101 length of colmap equals the global matrix length. 102 */ 103 #undef __FUNCT__ 104 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 105 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 106 { 107 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 108 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 109 PetscErrorCode ierr; 110 PetscInt nbs = B->nbs,i,bs=mat->rmap.bs; 111 112 PetscFunctionBegin; 113 #if defined (PETSC_USE_CTABLE) 114 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 115 for (i=0; i<nbs; i++){ 116 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 117 } 118 #else 119 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 120 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 121 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 122 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 123 #endif 124 PetscFunctionReturn(0); 125 } 126 127 #define CHUNKSIZE 10 128 129 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 130 { \ 131 \ 132 brow = row/bs; \ 133 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 134 rmax = aimax[brow]; nrow = ailen[brow]; \ 135 bcol = col/bs; \ 136 ridx = row % bs; cidx = col % bs; \ 137 low = 0; high = nrow; \ 138 while (high-low > 3) { \ 139 t = (low+high)/2; \ 140 if (rp[t] > bcol) high = t; \ 141 else low = t; \ 142 } \ 143 for (_i=low; _i<high; _i++) { \ 144 if (rp[_i] > bcol) break; \ 145 if (rp[_i] == bcol) { \ 146 bap = ap + bs2*_i + bs*cidx + ridx; \ 147 if (addv == ADD_VALUES) *bap += value; \ 148 else *bap = value; \ 149 goto a_noinsert; \ 150 } \ 151 } \ 152 if (a->nonew == 1) goto a_noinsert; \ 153 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 154 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 155 N = nrow++ - 1; \ 156 /* shift up all the later entries in this row */ \ 157 for (ii=N; ii>=_i; ii--) { \ 158 rp[ii+1] = rp[ii]; \ 159 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 160 } \ 161 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 162 rp[_i] = bcol; \ 163 ap[bs2*_i + bs*cidx + ridx] = value; \ 164 a_noinsert:; \ 165 ailen[brow] = nrow; \ 166 } 167 168 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 169 { \ 170 brow = row/bs; \ 171 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 172 rmax = bimax[brow]; nrow = bilen[brow]; \ 173 bcol = col/bs; \ 174 ridx = row % bs; cidx = col % bs; \ 175 low = 0; high = nrow; \ 176 while (high-low > 3) { \ 177 t = (low+high)/2; \ 178 if (rp[t] > bcol) high = t; \ 179 else low = t; \ 180 } \ 181 for (_i=low; _i<high; _i++) { \ 182 if (rp[_i] > bcol) break; \ 183 if (rp[_i] == bcol) { \ 184 bap = ap + bs2*_i + bs*cidx + ridx; \ 185 if (addv == ADD_VALUES) *bap += value; \ 186 else *bap = value; \ 187 goto b_noinsert; \ 188 } \ 189 } \ 190 if (b->nonew == 1) goto b_noinsert; \ 191 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 192 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 193 CHKMEMQ;\ 194 N = nrow++ - 1; \ 195 /* shift up all the later entries in this row */ \ 196 for (ii=N; ii>=_i; ii--) { \ 197 rp[ii+1] = rp[ii]; \ 198 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 199 } \ 200 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 201 rp[_i] = bcol; \ 202 ap[bs2*_i + bs*cidx + ridx] = value; \ 203 b_noinsert:; \ 204 bilen[brow] = nrow; \ 205 } 206 207 #if defined(PETSC_USE_MAT_SINGLE) 208 #undef __FUNCT__ 209 #define __FUNCT__ "MatSetValues_MPIBAIJ" 210 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 211 { 212 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 213 PetscErrorCode ierr; 214 PetscInt i,N = m*n; 215 MatScalar *vsingle; 216 217 PetscFunctionBegin; 218 if (N > b->setvalueslen) { 219 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 220 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 221 b->setvalueslen = N; 222 } 223 vsingle = b->setvaluescopy; 224 225 for (i=0; i<N; i++) { 226 vsingle[i] = v[i]; 227 } 228 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 229 PetscFunctionReturn(0); 230 } 231 232 #undef __FUNCT__ 233 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 234 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 235 { 236 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 237 PetscErrorCode ierr; 238 PetscInt i,N = m*n*b->bs2; 239 MatScalar *vsingle; 240 241 PetscFunctionBegin; 242 if (N > b->setvalueslen) { 243 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 244 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 245 b->setvalueslen = N; 246 } 247 vsingle = b->setvaluescopy; 248 for (i=0; i<N; i++) { 249 vsingle[i] = v[i]; 250 } 251 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 252 PetscFunctionReturn(0); 253 } 254 255 #undef __FUNCT__ 256 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 257 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 258 { 259 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 260 PetscErrorCode ierr; 261 PetscInt i,N = m*n; 262 MatScalar *vsingle; 263 264 PetscFunctionBegin; 265 if (N > b->setvalueslen) { 266 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 267 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 268 b->setvalueslen = N; 269 } 270 vsingle = b->setvaluescopy; 271 for (i=0; i<N; i++) { 272 vsingle[i] = v[i]; 273 } 274 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 280 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 281 { 282 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 283 PetscErrorCode ierr; 284 PetscInt i,N = m*n*b->bs2; 285 MatScalar *vsingle; 286 287 PetscFunctionBegin; 288 if (N > b->setvalueslen) { 289 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 290 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 291 b->setvalueslen = N; 292 } 293 vsingle = b->setvaluescopy; 294 for (i=0; i<N; i++) { 295 vsingle[i] = v[i]; 296 } 297 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 298 PetscFunctionReturn(0); 299 } 300 #endif 301 302 #undef __FUNCT__ 303 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 304 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 305 { 306 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 307 MatScalar value; 308 PetscTruth roworiented = baij->roworiented; 309 PetscErrorCode ierr; 310 PetscInt i,j,row,col; 311 PetscInt rstart_orig=mat->rmap.rstart; 312 PetscInt rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart; 313 PetscInt cend_orig=mat->cmap.rend,bs=mat->rmap.bs; 314 315 /* Some Variables required in the macro */ 316 Mat A = baij->A; 317 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 318 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 319 MatScalar *aa=a->a; 320 321 Mat B = baij->B; 322 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 323 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 324 MatScalar *ba=b->a; 325 326 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 327 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 328 MatScalar *ap,*bap; 329 330 PetscFunctionBegin; 331 for (i=0; i<m; i++) { 332 if (im[i] < 0) continue; 333 #if defined(PETSC_USE_DEBUG) 334 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 335 #endif 336 if (im[i] >= rstart_orig && im[i] < rend_orig) { 337 row = im[i] - rstart_orig; 338 for (j=0; j<n; j++) { 339 if (in[j] >= cstart_orig && in[j] < cend_orig){ 340 col = in[j] - cstart_orig; 341 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 342 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 343 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 344 } else if (in[j] < 0) continue; 345 #if defined(PETSC_USE_DEBUG) 346 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap.N-1);} 347 #endif 348 else { 349 if (mat->was_assembled) { 350 if (!baij->colmap) { 351 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 352 } 353 #if defined (PETSC_USE_CTABLE) 354 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 355 col = col - 1; 356 #else 357 col = baij->colmap[in[j]/bs] - 1; 358 #endif 359 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 360 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 361 col = in[j]; 362 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 363 B = baij->B; 364 b = (Mat_SeqBAIJ*)(B)->data; 365 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 366 ba=b->a; 367 } else col += in[j]%bs; 368 } else col = in[j]; 369 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 370 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 371 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 372 } 373 } 374 } else { 375 if (!baij->donotstash) { 376 if (roworiented) { 377 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 378 } else { 379 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 380 } 381 } 382 } 383 } 384 PetscFunctionReturn(0); 385 } 386 387 #undef __FUNCT__ 388 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 389 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 390 { 391 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 392 const MatScalar *value; 393 MatScalar *barray=baij->barray; 394 PetscTruth roworiented = baij->roworiented; 395 PetscErrorCode ierr; 396 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 397 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 398 PetscInt cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2; 399 400 PetscFunctionBegin; 401 if(!barray) { 402 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 403 baij->barray = barray; 404 } 405 406 if (roworiented) { 407 stepval = (n-1)*bs; 408 } else { 409 stepval = (m-1)*bs; 410 } 411 for (i=0; i<m; i++) { 412 if (im[i] < 0) continue; 413 #if defined(PETSC_USE_DEBUG) 414 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 415 #endif 416 if (im[i] >= rstart && im[i] < rend) { 417 row = im[i] - rstart; 418 for (j=0; j<n; j++) { 419 /* If NumCol = 1 then a copy is not required */ 420 if ((roworiented) && (n == 1)) { 421 barray = (MatScalar*)v + i*bs2; 422 } else if((!roworiented) && (m == 1)) { 423 barray = (MatScalar*)v + j*bs2; 424 } else { /* Here a copy is required */ 425 if (roworiented) { 426 value = v + i*(stepval+bs)*bs + j*bs; 427 } else { 428 value = v + j*(stepval+bs)*bs + i*bs; 429 } 430 for (ii=0; ii<bs; ii++,value+=stepval) { 431 for (jj=0; jj<bs; jj++) { 432 *barray++ = *value++; 433 } 434 } 435 barray -=bs2; 436 } 437 438 if (in[j] >= cstart && in[j] < cend){ 439 col = in[j] - cstart; 440 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 441 } 442 else if (in[j] < 0) continue; 443 #if defined(PETSC_USE_DEBUG) 444 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 445 #endif 446 else { 447 if (mat->was_assembled) { 448 if (!baij->colmap) { 449 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 450 } 451 452 #if defined(PETSC_USE_DEBUG) 453 #if defined (PETSC_USE_CTABLE) 454 { PetscInt data; 455 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 456 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 457 } 458 #else 459 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 460 #endif 461 #endif 462 #if defined (PETSC_USE_CTABLE) 463 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 464 col = (col - 1)/bs; 465 #else 466 col = (baij->colmap[in[j]] - 1)/bs; 467 #endif 468 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 469 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 470 col = in[j]; 471 } 472 } 473 else col = in[j]; 474 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 475 } 476 } 477 } else { 478 if (!baij->donotstash) { 479 if (roworiented) { 480 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 481 } else { 482 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 483 } 484 } 485 } 486 } 487 PetscFunctionReturn(0); 488 } 489 490 #define HASH_KEY 0.6180339887 491 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 492 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 493 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 494 #undef __FUNCT__ 495 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 496 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 497 { 498 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 499 PetscTruth roworiented = baij->roworiented; 500 PetscErrorCode ierr; 501 PetscInt i,j,row,col; 502 PetscInt rstart_orig=mat->rmap.rstart; 503 PetscInt rend_orig=mat->rmap.rend,Nbs=baij->Nbs; 504 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap.bs,*HT=baij->ht,idx; 505 PetscReal tmp; 506 MatScalar **HD = baij->hd,value; 507 #if defined(PETSC_USE_DEBUG) 508 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 509 #endif 510 511 PetscFunctionBegin; 512 513 for (i=0; i<m; i++) { 514 #if defined(PETSC_USE_DEBUG) 515 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 516 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 517 #endif 518 row = im[i]; 519 if (row >= rstart_orig && row < rend_orig) { 520 for (j=0; j<n; j++) { 521 col = in[j]; 522 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 523 /* Look up PetscInto the Hash Table */ 524 key = (row/bs)*Nbs+(col/bs)+1; 525 h1 = HASH(size,key,tmp); 526 527 528 idx = h1; 529 #if defined(PETSC_USE_DEBUG) 530 insert_ct++; 531 total_ct++; 532 if (HT[idx] != key) { 533 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 534 if (idx == size) { 535 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 536 if (idx == h1) { 537 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 538 } 539 } 540 } 541 #else 542 if (HT[idx] != key) { 543 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 544 if (idx == size) { 545 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 546 if (idx == h1) { 547 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 548 } 549 } 550 } 551 #endif 552 /* A HASH table entry is found, so insert the values at the correct address */ 553 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 554 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 555 } 556 } else { 557 if (!baij->donotstash) { 558 if (roworiented) { 559 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 560 } else { 561 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 562 } 563 } 564 } 565 } 566 #if defined(PETSC_USE_DEBUG) 567 baij->ht_total_ct = total_ct; 568 baij->ht_insert_ct = insert_ct; 569 #endif 570 PetscFunctionReturn(0); 571 } 572 573 #undef __FUNCT__ 574 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 575 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 576 { 577 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 578 PetscTruth roworiented = baij->roworiented; 579 PetscErrorCode ierr; 580 PetscInt i,j,ii,jj,row,col; 581 PetscInt rstart=baij->rstartbs; 582 PetscInt rend=mat->rmap.rend,stepval,bs=mat->rmap.bs,bs2=baij->bs2; 583 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 584 PetscReal tmp; 585 MatScalar **HD = baij->hd,*baij_a; 586 const MatScalar *v_t,*value; 587 #if defined(PETSC_USE_DEBUG) 588 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 589 #endif 590 591 PetscFunctionBegin; 592 593 if (roworiented) { 594 stepval = (n-1)*bs; 595 } else { 596 stepval = (m-1)*bs; 597 } 598 for (i=0; i<m; i++) { 599 #if defined(PETSC_USE_DEBUG) 600 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 601 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 602 #endif 603 row = im[i]; 604 v_t = v + i*bs2; 605 if (row >= rstart && row < rend) { 606 for (j=0; j<n; j++) { 607 col = in[j]; 608 609 /* Look up into the Hash Table */ 610 key = row*Nbs+col+1; 611 h1 = HASH(size,key,tmp); 612 613 idx = h1; 614 #if defined(PETSC_USE_DEBUG) 615 total_ct++; 616 insert_ct++; 617 if (HT[idx] != key) { 618 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 619 if (idx == size) { 620 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 621 if (idx == h1) { 622 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 623 } 624 } 625 } 626 #else 627 if (HT[idx] != key) { 628 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 629 if (idx == size) { 630 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 631 if (idx == h1) { 632 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 633 } 634 } 635 } 636 #endif 637 baij_a = HD[idx]; 638 if (roworiented) { 639 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 640 /* value = v + (i*(stepval+bs)+j)*bs; */ 641 value = v_t; 642 v_t += bs; 643 if (addv == ADD_VALUES) { 644 for (ii=0; ii<bs; ii++,value+=stepval) { 645 for (jj=ii; jj<bs2; jj+=bs) { 646 baij_a[jj] += *value++; 647 } 648 } 649 } else { 650 for (ii=0; ii<bs; ii++,value+=stepval) { 651 for (jj=ii; jj<bs2; jj+=bs) { 652 baij_a[jj] = *value++; 653 } 654 } 655 } 656 } else { 657 value = v + j*(stepval+bs)*bs + i*bs; 658 if (addv == ADD_VALUES) { 659 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 660 for (jj=0; jj<bs; jj++) { 661 baij_a[jj] += *value++; 662 } 663 } 664 } else { 665 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 666 for (jj=0; jj<bs; jj++) { 667 baij_a[jj] = *value++; 668 } 669 } 670 } 671 } 672 } 673 } else { 674 if (!baij->donotstash) { 675 if (roworiented) { 676 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 677 } else { 678 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 679 } 680 } 681 } 682 } 683 #if defined(PETSC_USE_DEBUG) 684 baij->ht_total_ct = total_ct; 685 baij->ht_insert_ct = insert_ct; 686 #endif 687 PetscFunctionReturn(0); 688 } 689 690 #undef __FUNCT__ 691 #define __FUNCT__ "MatGetValues_MPIBAIJ" 692 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 693 { 694 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 695 PetscErrorCode ierr; 696 PetscInt bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend; 697 PetscInt bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data; 698 699 PetscFunctionBegin; 700 for (i=0; i<m; i++) { 701 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 702 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 703 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 704 row = idxm[i] - bsrstart; 705 for (j=0; j<n; j++) { 706 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 707 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 708 if (idxn[j] >= bscstart && idxn[j] < bscend){ 709 col = idxn[j] - bscstart; 710 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 711 } else { 712 if (!baij->colmap) { 713 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 714 } 715 #if defined (PETSC_USE_CTABLE) 716 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 717 data --; 718 #else 719 data = baij->colmap[idxn[j]/bs]-1; 720 #endif 721 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 722 else { 723 col = data + idxn[j]%bs; 724 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 725 } 726 } 727 } 728 } else { 729 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 730 } 731 } 732 PetscFunctionReturn(0); 733 } 734 735 #undef __FUNCT__ 736 #define __FUNCT__ "MatNorm_MPIBAIJ" 737 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 738 { 739 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 740 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 741 PetscErrorCode ierr; 742 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap.bs,nz,row,col; 743 PetscReal sum = 0.0; 744 MatScalar *v; 745 746 PetscFunctionBegin; 747 if (baij->size == 1) { 748 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 749 } else { 750 if (type == NORM_FROBENIUS) { 751 v = amat->a; 752 nz = amat->nz*bs2; 753 for (i=0; i<nz; i++) { 754 #if defined(PETSC_USE_COMPLEX) 755 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 756 #else 757 sum += (*v)*(*v); v++; 758 #endif 759 } 760 v = bmat->a; 761 nz = bmat->nz*bs2; 762 for (i=0; i<nz; i++) { 763 #if defined(PETSC_USE_COMPLEX) 764 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 765 #else 766 sum += (*v)*(*v); v++; 767 #endif 768 } 769 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 770 *nrm = sqrt(*nrm); 771 } else if (type == NORM_1) { /* max column sum */ 772 PetscReal *tmp,*tmp2; 773 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 774 ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 775 tmp2 = tmp + mat->cmap.N; 776 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 777 v = amat->a; jj = amat->j; 778 for (i=0; i<amat->nz; i++) { 779 for (j=0; j<bs; j++){ 780 col = bs*(cstart + *jj) + j; /* column index */ 781 for (row=0; row<bs; row++){ 782 tmp[col] += PetscAbsScalar(*v); v++; 783 } 784 } 785 jj++; 786 } 787 v = bmat->a; jj = bmat->j; 788 for (i=0; i<bmat->nz; i++) { 789 for (j=0; j<bs; j++){ 790 col = bs*garray[*jj] + j; 791 for (row=0; row<bs; row++){ 792 tmp[col] += PetscAbsScalar(*v); v++; 793 } 794 } 795 jj++; 796 } 797 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 798 *nrm = 0.0; 799 for (j=0; j<mat->cmap.N; j++) { 800 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 801 } 802 ierr = PetscFree(tmp);CHKERRQ(ierr); 803 } else if (type == NORM_INFINITY) { /* max row sum */ 804 PetscReal *sums; 805 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr) 806 sum = 0.0; 807 for (j=0; j<amat->mbs; j++) { 808 for (row=0; row<bs; row++) sums[row] = 0.0; 809 v = amat->a + bs2*amat->i[j]; 810 nz = amat->i[j+1]-amat->i[j]; 811 for (i=0; i<nz; i++) { 812 for (col=0; col<bs; col++){ 813 for (row=0; row<bs; row++){ 814 sums[row] += PetscAbsScalar(*v); v++; 815 } 816 } 817 } 818 v = bmat->a + bs2*bmat->i[j]; 819 nz = bmat->i[j+1]-bmat->i[j]; 820 for (i=0; i<nz; i++) { 821 for (col=0; col<bs; col++){ 822 for (row=0; row<bs; row++){ 823 sums[row] += PetscAbsScalar(*v); v++; 824 } 825 } 826 } 827 for (row=0; row<bs; row++){ 828 if (sums[row] > sum) sum = sums[row]; 829 } 830 } 831 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 832 ierr = PetscFree(sums);CHKERRQ(ierr); 833 } else { 834 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 835 } 836 } 837 PetscFunctionReturn(0); 838 } 839 840 /* 841 Creates the hash table, and sets the table 842 This table is created only once. 843 If new entried need to be added to the matrix 844 then the hash table has to be destroyed and 845 recreated. 846 */ 847 #undef __FUNCT__ 848 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 849 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 850 { 851 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 852 Mat A = baij->A,B=baij->B; 853 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 854 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 855 PetscErrorCode ierr; 856 PetscInt size,bs2=baij->bs2,rstart=baij->rstartbs; 857 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 858 PetscInt *HT,key; 859 MatScalar **HD; 860 PetscReal tmp; 861 #if defined(PETSC_USE_INFO) 862 PetscInt ct=0,max=0; 863 #endif 864 865 PetscFunctionBegin; 866 baij->ht_size=(PetscInt)(factor*nz); 867 size = baij->ht_size; 868 869 if (baij->ht) { 870 PetscFunctionReturn(0); 871 } 872 873 /* Allocate Memory for Hash Table */ 874 ierr = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 875 baij->ht = (PetscInt*)(baij->hd + size); 876 HD = baij->hd; 877 HT = baij->ht; 878 879 880 ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr); 881 882 883 /* Loop Over A */ 884 for (i=0; i<a->mbs; i++) { 885 for (j=ai[i]; j<ai[i+1]; j++) { 886 row = i+rstart; 887 col = aj[j]+cstart; 888 889 key = row*Nbs + col + 1; 890 h1 = HASH(size,key,tmp); 891 for (k=0; k<size; k++){ 892 if (!HT[(h1+k)%size]) { 893 HT[(h1+k)%size] = key; 894 HD[(h1+k)%size] = a->a + j*bs2; 895 break; 896 #if defined(PETSC_USE_INFO) 897 } else { 898 ct++; 899 #endif 900 } 901 } 902 #if defined(PETSC_USE_INFO) 903 if (k> max) max = k; 904 #endif 905 } 906 } 907 /* Loop Over B */ 908 for (i=0; i<b->mbs; i++) { 909 for (j=bi[i]; j<bi[i+1]; j++) { 910 row = i+rstart; 911 col = garray[bj[j]]; 912 key = row*Nbs + col + 1; 913 h1 = HASH(size,key,tmp); 914 for (k=0; k<size; k++){ 915 if (!HT[(h1+k)%size]) { 916 HT[(h1+k)%size] = key; 917 HD[(h1+k)%size] = b->a + j*bs2; 918 break; 919 #if defined(PETSC_USE_INFO) 920 } else { 921 ct++; 922 #endif 923 } 924 } 925 #if defined(PETSC_USE_INFO) 926 if (k> max) max = k; 927 #endif 928 } 929 } 930 931 /* Print Summary */ 932 #if defined(PETSC_USE_INFO) 933 for (i=0,j=0; i<size; i++) { 934 if (HT[i]) {j++;} 935 } 936 ierr = PetscInfo2(0,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 937 #endif 938 PetscFunctionReturn(0); 939 } 940 941 #undef __FUNCT__ 942 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 943 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 944 { 945 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 946 PetscErrorCode ierr; 947 PetscInt nstash,reallocs; 948 InsertMode addv; 949 950 PetscFunctionBegin; 951 if (baij->donotstash) { 952 PetscFunctionReturn(0); 953 } 954 955 /* make sure all processors are either in INSERTMODE or ADDMODE */ 956 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 957 if (addv == (ADD_VALUES|INSERT_VALUES)) { 958 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 959 } 960 mat->insertmode = addv; /* in case this processor had no cache */ 961 962 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 963 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr); 964 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 965 ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 966 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 967 ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 968 PetscFunctionReturn(0); 969 } 970 971 #undef __FUNCT__ 972 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 973 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 974 { 975 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 976 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 977 PetscErrorCode ierr; 978 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 979 PetscInt *row,*col,other_disassembled; 980 PetscTruth r1,r2,r3; 981 MatScalar *val; 982 InsertMode addv = mat->insertmode; 983 PetscMPIInt n; 984 985 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 986 PetscFunctionBegin; 987 if (!baij->donotstash) { 988 while (1) { 989 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 990 if (!flg) break; 991 992 for (i=0; i<n;) { 993 /* Now identify the consecutive vals belonging to the same row */ 994 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 995 if (j < n) ncols = j-i; 996 else ncols = n-i; 997 /* Now assemble all these values with a single function call */ 998 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 999 i = j; 1000 } 1001 } 1002 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1003 /* Now process the block-stash. Since the values are stashed column-oriented, 1004 set the roworiented flag to column oriented, and after MatSetValues() 1005 restore the original flags */ 1006 r1 = baij->roworiented; 1007 r2 = a->roworiented; 1008 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 1009 baij->roworiented = PETSC_FALSE; 1010 a->roworiented = PETSC_FALSE; 1011 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 1012 while (1) { 1013 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1014 if (!flg) break; 1015 1016 for (i=0; i<n;) { 1017 /* Now identify the consecutive vals belonging to the same row */ 1018 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1019 if (j < n) ncols = j-i; 1020 else ncols = n-i; 1021 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1022 i = j; 1023 } 1024 } 1025 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1026 baij->roworiented = r1; 1027 a->roworiented = r2; 1028 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 1029 } 1030 1031 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1032 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1033 1034 /* determine if any processor has disassembled, if so we must 1035 also disassemble ourselfs, in order that we may reassemble. */ 1036 /* 1037 if nonzero structure of submatrix B cannot change then we know that 1038 no processor disassembled thus we can skip this stuff 1039 */ 1040 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1041 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1042 if (mat->was_assembled && !other_disassembled) { 1043 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1044 } 1045 } 1046 1047 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1048 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1049 } 1050 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 1051 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1052 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1053 1054 #if defined(PETSC_USE_INFO) 1055 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1056 ierr = PetscInfo1(0,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 1057 baij->ht_total_ct = 0; 1058 baij->ht_insert_ct = 0; 1059 } 1060 #endif 1061 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1062 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1063 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1064 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1065 } 1066 1067 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1068 baij->rowvalues = 0; 1069 PetscFunctionReturn(0); 1070 } 1071 1072 #undef __FUNCT__ 1073 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1074 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1075 { 1076 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1077 PetscErrorCode ierr; 1078 PetscMPIInt size = baij->size,rank = baij->rank; 1079 PetscInt bs = mat->rmap.bs; 1080 PetscTruth iascii,isdraw; 1081 PetscViewer sviewer; 1082 PetscViewerFormat format; 1083 1084 PetscFunctionBegin; 1085 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1086 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1087 if (iascii) { 1088 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1089 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1090 MatInfo info; 1091 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1092 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1093 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 1094 rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 1095 mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr); 1096 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1097 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1098 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1099 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1100 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1101 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1102 PetscFunctionReturn(0); 1103 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1104 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1105 PetscFunctionReturn(0); 1106 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1107 PetscFunctionReturn(0); 1108 } 1109 } 1110 1111 if (isdraw) { 1112 PetscDraw draw; 1113 PetscTruth isnull; 1114 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1115 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1116 } 1117 1118 if (size == 1) { 1119 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 1120 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1121 } else { 1122 /* assemble the entire matrix onto first processor. */ 1123 Mat A; 1124 Mat_SeqBAIJ *Aloc; 1125 PetscInt M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1126 MatScalar *a; 1127 1128 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1129 /* Perhaps this should be the type of mat? */ 1130 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 1131 if (!rank) { 1132 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1133 } else { 1134 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1135 } 1136 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1137 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1138 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1139 1140 /* copy over the A part */ 1141 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1142 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1143 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1144 1145 for (i=0; i<mbs; i++) { 1146 rvals[0] = bs*(baij->rstartbs + i); 1147 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1148 for (j=ai[i]; j<ai[i+1]; j++) { 1149 col = (baij->cstartbs+aj[j])*bs; 1150 for (k=0; k<bs; k++) { 1151 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1152 col++; a += bs; 1153 } 1154 } 1155 } 1156 /* copy over the B part */ 1157 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1158 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1159 for (i=0; i<mbs; i++) { 1160 rvals[0] = bs*(baij->rstartbs + i); 1161 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1162 for (j=ai[i]; j<ai[i+1]; j++) { 1163 col = baij->garray[aj[j]]*bs; 1164 for (k=0; k<bs; k++) { 1165 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1166 col++; a += bs; 1167 } 1168 } 1169 } 1170 ierr = PetscFree(rvals);CHKERRQ(ierr); 1171 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1172 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1173 /* 1174 Everyone has to call to draw the matrix since the graphics waits are 1175 synchronized across all processors that share the PetscDraw object 1176 */ 1177 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1178 if (!rank) { 1179 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1180 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1181 } 1182 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1183 ierr = MatDestroy(A);CHKERRQ(ierr); 1184 } 1185 PetscFunctionReturn(0); 1186 } 1187 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "MatView_MPIBAIJ" 1190 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1191 { 1192 PetscErrorCode ierr; 1193 PetscTruth iascii,isdraw,issocket,isbinary; 1194 1195 PetscFunctionBegin; 1196 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1197 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1198 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1199 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1200 if (iascii || isdraw || issocket || isbinary) { 1201 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1202 } else { 1203 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1204 } 1205 PetscFunctionReturn(0); 1206 } 1207 1208 #undef __FUNCT__ 1209 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1210 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1211 { 1212 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1213 PetscErrorCode ierr; 1214 1215 PetscFunctionBegin; 1216 #if defined(PETSC_USE_LOG) 1217 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N); 1218 #endif 1219 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1220 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1221 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1222 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1223 #if defined (PETSC_USE_CTABLE) 1224 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1225 #else 1226 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1227 #endif 1228 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1229 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1230 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1231 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1232 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1233 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1234 #if defined(PETSC_USE_MAT_SINGLE) 1235 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 1236 #endif 1237 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1238 ierr = PetscFree(baij);CHKERRQ(ierr); 1239 1240 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1241 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1242 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1243 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1244 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1245 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1246 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1247 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1248 PetscFunctionReturn(0); 1249 } 1250 1251 #undef __FUNCT__ 1252 #define __FUNCT__ "MatMult_MPIBAIJ" 1253 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1254 { 1255 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1256 PetscErrorCode ierr; 1257 PetscInt nt; 1258 1259 PetscFunctionBegin; 1260 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1261 if (nt != A->cmap.n) { 1262 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1263 } 1264 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1265 if (nt != A->rmap.n) { 1266 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1267 } 1268 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1269 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1270 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1271 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1272 PetscFunctionReturn(0); 1273 } 1274 1275 #undef __FUNCT__ 1276 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1277 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1278 { 1279 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1280 PetscErrorCode ierr; 1281 1282 PetscFunctionBegin; 1283 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1284 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1285 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1286 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1287 PetscFunctionReturn(0); 1288 } 1289 1290 #undef __FUNCT__ 1291 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1292 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1293 { 1294 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1295 PetscErrorCode ierr; 1296 PetscTruth merged; 1297 1298 PetscFunctionBegin; 1299 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1300 /* do nondiagonal part */ 1301 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1302 if (!merged) { 1303 /* send it on its way */ 1304 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1305 /* do local part */ 1306 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1307 /* receive remote parts: note this assumes the values are not actually */ 1308 /* inserted in yy until the next line */ 1309 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1310 } else { 1311 /* do local part */ 1312 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1313 /* send it on its way */ 1314 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1315 /* values actually were received in the Begin() but we need to call this nop */ 1316 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1317 } 1318 PetscFunctionReturn(0); 1319 } 1320 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1323 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1324 { 1325 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1326 PetscErrorCode ierr; 1327 1328 PetscFunctionBegin; 1329 /* do nondiagonal part */ 1330 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1331 /* send it on its way */ 1332 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1333 /* do local part */ 1334 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1335 /* receive remote parts: note this assumes the values are not actually */ 1336 /* inserted in yy until the next line, which is true for my implementation*/ 1337 /* but is not perhaps always true. */ 1338 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } 1341 1342 /* 1343 This only works correctly for square matrices where the subblock A->A is the 1344 diagonal block 1345 */ 1346 #undef __FUNCT__ 1347 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1348 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1349 { 1350 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1351 PetscErrorCode ierr; 1352 1353 PetscFunctionBegin; 1354 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1355 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1356 PetscFunctionReturn(0); 1357 } 1358 1359 #undef __FUNCT__ 1360 #define __FUNCT__ "MatScale_MPIBAIJ" 1361 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1362 { 1363 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1364 PetscErrorCode ierr; 1365 1366 PetscFunctionBegin; 1367 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1368 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1369 PetscFunctionReturn(0); 1370 } 1371 1372 #undef __FUNCT__ 1373 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1374 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1375 { 1376 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1377 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1378 PetscErrorCode ierr; 1379 PetscInt bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1380 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend; 1381 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1382 1383 PetscFunctionBegin; 1384 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1385 mat->getrowactive = PETSC_TRUE; 1386 1387 if (!mat->rowvalues && (idx || v)) { 1388 /* 1389 allocate enough space to hold information from the longest row. 1390 */ 1391 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1392 PetscInt max = 1,mbs = mat->mbs,tmp; 1393 for (i=0; i<mbs; i++) { 1394 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1395 if (max < tmp) { max = tmp; } 1396 } 1397 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1398 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1399 } 1400 1401 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1402 lrow = row - brstart; 1403 1404 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1405 if (!v) {pvA = 0; pvB = 0;} 1406 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1407 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1408 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1409 nztot = nzA + nzB; 1410 1411 cmap = mat->garray; 1412 if (v || idx) { 1413 if (nztot) { 1414 /* Sort by increasing column numbers, assuming A and B already sorted */ 1415 PetscInt imark = -1; 1416 if (v) { 1417 *v = v_p = mat->rowvalues; 1418 for (i=0; i<nzB; i++) { 1419 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1420 else break; 1421 } 1422 imark = i; 1423 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1424 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1425 } 1426 if (idx) { 1427 *idx = idx_p = mat->rowindices; 1428 if (imark > -1) { 1429 for (i=0; i<imark; i++) { 1430 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1431 } 1432 } else { 1433 for (i=0; i<nzB; i++) { 1434 if (cmap[cworkB[i]/bs] < cstart) 1435 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1436 else break; 1437 } 1438 imark = i; 1439 } 1440 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1441 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1442 } 1443 } else { 1444 if (idx) *idx = 0; 1445 if (v) *v = 0; 1446 } 1447 } 1448 *nz = nztot; 1449 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1450 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1451 PetscFunctionReturn(0); 1452 } 1453 1454 #undef __FUNCT__ 1455 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1456 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1457 { 1458 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1459 1460 PetscFunctionBegin; 1461 if (!baij->getrowactive) { 1462 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1463 } 1464 baij->getrowactive = PETSC_FALSE; 1465 PetscFunctionReturn(0); 1466 } 1467 1468 #undef __FUNCT__ 1469 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1470 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1471 { 1472 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1473 PetscErrorCode ierr; 1474 1475 PetscFunctionBegin; 1476 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1477 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1478 PetscFunctionReturn(0); 1479 } 1480 1481 #undef __FUNCT__ 1482 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1483 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1484 { 1485 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1486 Mat A = a->A,B = a->B; 1487 PetscErrorCode ierr; 1488 PetscReal isend[5],irecv[5]; 1489 1490 PetscFunctionBegin; 1491 info->block_size = (PetscReal)matin->rmap.bs; 1492 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1493 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1494 isend[3] = info->memory; isend[4] = info->mallocs; 1495 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1496 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1497 isend[3] += info->memory; isend[4] += info->mallocs; 1498 if (flag == MAT_LOCAL) { 1499 info->nz_used = isend[0]; 1500 info->nz_allocated = isend[1]; 1501 info->nz_unneeded = isend[2]; 1502 info->memory = isend[3]; 1503 info->mallocs = isend[4]; 1504 } else if (flag == MAT_GLOBAL_MAX) { 1505 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1506 info->nz_used = irecv[0]; 1507 info->nz_allocated = irecv[1]; 1508 info->nz_unneeded = irecv[2]; 1509 info->memory = irecv[3]; 1510 info->mallocs = irecv[4]; 1511 } else if (flag == MAT_GLOBAL_SUM) { 1512 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1513 info->nz_used = irecv[0]; 1514 info->nz_allocated = irecv[1]; 1515 info->nz_unneeded = irecv[2]; 1516 info->memory = irecv[3]; 1517 info->mallocs = irecv[4]; 1518 } else { 1519 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1520 } 1521 info->rows_global = (PetscReal)A->rmap.N; 1522 info->columns_global = (PetscReal)A->cmap.N; 1523 info->rows_local = (PetscReal)A->rmap.N; 1524 info->columns_local = (PetscReal)A->cmap.N; 1525 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1526 info->fill_ratio_needed = 0; 1527 info->factor_mallocs = 0; 1528 PetscFunctionReturn(0); 1529 } 1530 1531 #undef __FUNCT__ 1532 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1533 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op) 1534 { 1535 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1536 PetscErrorCode ierr; 1537 1538 PetscFunctionBegin; 1539 switch (op) { 1540 case MAT_NO_NEW_NONZERO_LOCATIONS: 1541 case MAT_YES_NEW_NONZERO_LOCATIONS: 1542 case MAT_COLUMNS_UNSORTED: 1543 case MAT_COLUMNS_SORTED: 1544 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1545 case MAT_KEEP_ZEROED_ROWS: 1546 case MAT_NEW_NONZERO_LOCATION_ERR: 1547 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1548 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1549 break; 1550 case MAT_ROW_ORIENTED: 1551 a->roworiented = PETSC_TRUE; 1552 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1553 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1554 break; 1555 case MAT_ROWS_SORTED: 1556 case MAT_ROWS_UNSORTED: 1557 case MAT_YES_NEW_DIAGONALS: 1558 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1559 break; 1560 case MAT_COLUMN_ORIENTED: 1561 a->roworiented = PETSC_FALSE; 1562 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1563 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1564 break; 1565 case MAT_IGNORE_OFF_PROC_ENTRIES: 1566 a->donotstash = PETSC_TRUE; 1567 break; 1568 case MAT_NO_NEW_DIAGONALS: 1569 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1570 case MAT_USE_HASH_TABLE: 1571 a->ht_flag = PETSC_TRUE; 1572 break; 1573 case MAT_SYMMETRIC: 1574 case MAT_STRUCTURALLY_SYMMETRIC: 1575 case MAT_HERMITIAN: 1576 case MAT_SYMMETRY_ETERNAL: 1577 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1578 break; 1579 case MAT_NOT_SYMMETRIC: 1580 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1581 case MAT_NOT_HERMITIAN: 1582 case MAT_NOT_SYMMETRY_ETERNAL: 1583 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1584 break; 1585 default: 1586 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1587 } 1588 PetscFunctionReturn(0); 1589 } 1590 1591 #undef __FUNCT__ 1592 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1593 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1594 { 1595 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1596 Mat_SeqBAIJ *Aloc; 1597 Mat B; 1598 PetscErrorCode ierr; 1599 PetscInt M=A->rmap.N,N=A->cmap.N,*ai,*aj,i,*rvals,j,k,col; 1600 PetscInt bs=A->rmap.bs,mbs=baij->mbs; 1601 MatScalar *a; 1602 1603 PetscFunctionBegin; 1604 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1605 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1606 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1607 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1608 ierr = MatMPIBAIJSetPreallocation(B,A->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1609 1610 /* copy over the A part */ 1611 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1612 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1613 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1614 1615 for (i=0; i<mbs; i++) { 1616 rvals[0] = bs*(baij->rstartbs + i); 1617 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1618 for (j=ai[i]; j<ai[i+1]; j++) { 1619 col = (baij->cstartbs+aj[j])*bs; 1620 for (k=0; k<bs; k++) { 1621 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1622 col++; a += bs; 1623 } 1624 } 1625 } 1626 /* copy over the B part */ 1627 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1628 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1629 for (i=0; i<mbs; i++) { 1630 rvals[0] = bs*(baij->rstartbs + i); 1631 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1632 for (j=ai[i]; j<ai[i+1]; j++) { 1633 col = baij->garray[aj[j]]*bs; 1634 for (k=0; k<bs; k++) { 1635 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1636 col++; a += bs; 1637 } 1638 } 1639 } 1640 ierr = PetscFree(rvals);CHKERRQ(ierr); 1641 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1642 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1643 1644 if (matout) { 1645 *matout = B; 1646 } else { 1647 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1648 } 1649 PetscFunctionReturn(0); 1650 } 1651 1652 #undef __FUNCT__ 1653 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1654 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1655 { 1656 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1657 Mat a = baij->A,b = baij->B; 1658 PetscErrorCode ierr; 1659 PetscInt s1,s2,s3; 1660 1661 PetscFunctionBegin; 1662 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1663 if (rr) { 1664 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1665 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1666 /* Overlap communication with computation. */ 1667 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1668 } 1669 if (ll) { 1670 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1671 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1672 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1673 } 1674 /* scale the diagonal block */ 1675 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1676 1677 if (rr) { 1678 /* Do a scatter end and then right scale the off-diagonal block */ 1679 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1680 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1681 } 1682 1683 PetscFunctionReturn(0); 1684 } 1685 1686 #undef __FUNCT__ 1687 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1688 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 1689 { 1690 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1691 PetscErrorCode ierr; 1692 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1693 PetscInt i,*owners = A->rmap.range; 1694 PetscInt *nprocs,j,idx,nsends,row; 1695 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1696 PetscInt *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1; 1697 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap.rstart; 1698 MPI_Comm comm = A->comm; 1699 MPI_Request *send_waits,*recv_waits; 1700 MPI_Status recv_status,*send_status; 1701 #if defined(PETSC_DEBUG) 1702 PetscTruth found = PETSC_FALSE; 1703 #endif 1704 1705 PetscFunctionBegin; 1706 /* first count number of contributors to each processor */ 1707 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1708 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1709 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1710 j = 0; 1711 for (i=0; i<N; i++) { 1712 if (lastidx > (idx = rows[i])) j = 0; 1713 lastidx = idx; 1714 for (; j<size; j++) { 1715 if (idx >= owners[j] && idx < owners[j+1]) { 1716 nprocs[2*j]++; 1717 nprocs[2*j+1] = 1; 1718 owner[i] = j; 1719 #if defined(PETSC_DEBUG) 1720 found = PETSC_TRUE; 1721 #endif 1722 break; 1723 } 1724 } 1725 #if defined(PETSC_DEBUG) 1726 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1727 found = PETSC_FALSE; 1728 #endif 1729 } 1730 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1731 1732 /* inform other processors of number of messages and max length*/ 1733 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1734 1735 /* post receives: */ 1736 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1737 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1738 for (i=0; i<nrecvs; i++) { 1739 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1740 } 1741 1742 /* do sends: 1743 1) starts[i] gives the starting index in svalues for stuff going to 1744 the ith processor 1745 */ 1746 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1747 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1748 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1749 starts[0] = 0; 1750 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1751 for (i=0; i<N; i++) { 1752 svalues[starts[owner[i]]++] = rows[i]; 1753 } 1754 1755 starts[0] = 0; 1756 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1757 count = 0; 1758 for (i=0; i<size; i++) { 1759 if (nprocs[2*i+1]) { 1760 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1761 } 1762 } 1763 ierr = PetscFree(starts);CHKERRQ(ierr); 1764 1765 base = owners[rank]; 1766 1767 /* wait on receives */ 1768 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 1769 source = lens + nrecvs; 1770 count = nrecvs; slen = 0; 1771 while (count) { 1772 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1773 /* unpack receives into our local space */ 1774 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1775 source[imdex] = recv_status.MPI_SOURCE; 1776 lens[imdex] = n; 1777 slen += n; 1778 count--; 1779 } 1780 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1781 1782 /* move the data into the send scatter */ 1783 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1784 count = 0; 1785 for (i=0; i<nrecvs; i++) { 1786 values = rvalues + i*nmax; 1787 for (j=0; j<lens[i]; j++) { 1788 lrows[count++] = values[j] - base; 1789 } 1790 } 1791 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1792 ierr = PetscFree(lens);CHKERRQ(ierr); 1793 ierr = PetscFree(owner);CHKERRQ(ierr); 1794 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1795 1796 /* actually zap the local rows */ 1797 /* 1798 Zero the required rows. If the "diagonal block" of the matrix 1799 is square and the user wishes to set the diagonal we use separate 1800 code so that MatSetValues() is not called for each diagonal allocating 1801 new memory, thus calling lots of mallocs and slowing things down. 1802 1803 Contributed by: Matthew Knepley 1804 */ 1805 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1806 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1807 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 1808 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1809 } else if (diag != 0.0) { 1810 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1811 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1812 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1813 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1814 } 1815 for (i=0; i<slen; i++) { 1816 row = lrows[i] + rstart_bs; 1817 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1818 } 1819 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1820 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1821 } else { 1822 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1823 } 1824 1825 ierr = PetscFree(lrows);CHKERRQ(ierr); 1826 1827 /* wait on sends */ 1828 if (nsends) { 1829 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1830 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1831 ierr = PetscFree(send_status);CHKERRQ(ierr); 1832 } 1833 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1834 ierr = PetscFree(svalues);CHKERRQ(ierr); 1835 1836 PetscFunctionReturn(0); 1837 } 1838 1839 #undef __FUNCT__ 1840 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1841 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1842 { 1843 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1844 PetscErrorCode ierr; 1845 1846 PetscFunctionBegin; 1847 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1848 PetscFunctionReturn(0); 1849 } 1850 1851 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1852 1853 #undef __FUNCT__ 1854 #define __FUNCT__ "MatEqual_MPIBAIJ" 1855 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1856 { 1857 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1858 Mat a,b,c,d; 1859 PetscTruth flg; 1860 PetscErrorCode ierr; 1861 1862 PetscFunctionBegin; 1863 a = matA->A; b = matA->B; 1864 c = matB->A; d = matB->B; 1865 1866 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1867 if (flg) { 1868 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1869 } 1870 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1871 PetscFunctionReturn(0); 1872 } 1873 1874 #undef __FUNCT__ 1875 #define __FUNCT__ "MatCopy_MPIBAIJ" 1876 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1877 { 1878 PetscErrorCode ierr; 1879 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1880 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1881 1882 PetscFunctionBegin; 1883 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1884 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1885 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1886 } else { 1887 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1888 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1889 } 1890 PetscFunctionReturn(0); 1891 } 1892 1893 #undef __FUNCT__ 1894 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1895 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1896 { 1897 PetscErrorCode ierr; 1898 1899 PetscFunctionBegin; 1900 ierr = MatMPIBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1901 PetscFunctionReturn(0); 1902 } 1903 1904 #include "petscblaslapack.h" 1905 #undef __FUNCT__ 1906 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1907 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1908 { 1909 PetscErrorCode ierr; 1910 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1911 PetscBLASInt bnz,one=1; 1912 Mat_SeqBAIJ *x,*y; 1913 1914 PetscFunctionBegin; 1915 if (str == SAME_NONZERO_PATTERN) { 1916 PetscScalar alpha = a; 1917 x = (Mat_SeqBAIJ *)xx->A->data; 1918 y = (Mat_SeqBAIJ *)yy->A->data; 1919 bnz = (PetscBLASInt)x->nz; 1920 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1921 x = (Mat_SeqBAIJ *)xx->B->data; 1922 y = (Mat_SeqBAIJ *)yy->B->data; 1923 bnz = (PetscBLASInt)x->nz; 1924 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1925 } else { 1926 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1927 } 1928 PetscFunctionReturn(0); 1929 } 1930 1931 #undef __FUNCT__ 1932 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1933 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1934 { 1935 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1936 PetscErrorCode ierr; 1937 1938 PetscFunctionBegin; 1939 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1940 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1941 PetscFunctionReturn(0); 1942 } 1943 1944 #undef __FUNCT__ 1945 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1946 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1947 { 1948 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1949 PetscErrorCode ierr; 1950 1951 PetscFunctionBegin; 1952 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1953 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1954 PetscFunctionReturn(0); 1955 } 1956 1957 /* -------------------------------------------------------------------*/ 1958 static struct _MatOps MatOps_Values = { 1959 MatSetValues_MPIBAIJ, 1960 MatGetRow_MPIBAIJ, 1961 MatRestoreRow_MPIBAIJ, 1962 MatMult_MPIBAIJ, 1963 /* 4*/ MatMultAdd_MPIBAIJ, 1964 MatMultTranspose_MPIBAIJ, 1965 MatMultTransposeAdd_MPIBAIJ, 1966 0, 1967 0, 1968 0, 1969 /*10*/ 0, 1970 0, 1971 0, 1972 0, 1973 MatTranspose_MPIBAIJ, 1974 /*15*/ MatGetInfo_MPIBAIJ, 1975 MatEqual_MPIBAIJ, 1976 MatGetDiagonal_MPIBAIJ, 1977 MatDiagonalScale_MPIBAIJ, 1978 MatNorm_MPIBAIJ, 1979 /*20*/ MatAssemblyBegin_MPIBAIJ, 1980 MatAssemblyEnd_MPIBAIJ, 1981 0, 1982 MatSetOption_MPIBAIJ, 1983 MatZeroEntries_MPIBAIJ, 1984 /*25*/ MatZeroRows_MPIBAIJ, 1985 0, 1986 0, 1987 0, 1988 0, 1989 /*30*/ MatSetUpPreallocation_MPIBAIJ, 1990 0, 1991 0, 1992 0, 1993 0, 1994 /*35*/ MatDuplicate_MPIBAIJ, 1995 0, 1996 0, 1997 0, 1998 0, 1999 /*40*/ MatAXPY_MPIBAIJ, 2000 MatGetSubMatrices_MPIBAIJ, 2001 MatIncreaseOverlap_MPIBAIJ, 2002 MatGetValues_MPIBAIJ, 2003 MatCopy_MPIBAIJ, 2004 /*45*/ 0, 2005 MatScale_MPIBAIJ, 2006 0, 2007 0, 2008 0, 2009 /*50*/ 0, 2010 0, 2011 0, 2012 0, 2013 0, 2014 /*55*/ 0, 2015 0, 2016 MatSetUnfactored_MPIBAIJ, 2017 0, 2018 MatSetValuesBlocked_MPIBAIJ, 2019 /*60*/ 0, 2020 MatDestroy_MPIBAIJ, 2021 MatView_MPIBAIJ, 2022 0, 2023 0, 2024 /*65*/ 0, 2025 0, 2026 0, 2027 0, 2028 0, 2029 /*70*/ MatGetRowMax_MPIBAIJ, 2030 0, 2031 0, 2032 0, 2033 0, 2034 /*75*/ 0, 2035 0, 2036 0, 2037 0, 2038 0, 2039 /*80*/ 0, 2040 0, 2041 0, 2042 0, 2043 MatLoad_MPIBAIJ, 2044 /*85*/ 0, 2045 0, 2046 0, 2047 0, 2048 0, 2049 /*90*/ 0, 2050 0, 2051 0, 2052 0, 2053 0, 2054 /*95*/ 0, 2055 0, 2056 0, 2057 0, 2058 0, 2059 /*100*/0, 2060 0, 2061 0, 2062 0, 2063 0, 2064 /*105*/0, 2065 MatRealPart_MPIBAIJ, 2066 MatImaginaryPart_MPIBAIJ}; 2067 2068 2069 EXTERN_C_BEGIN 2070 #undef __FUNCT__ 2071 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2072 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2073 { 2074 PetscFunctionBegin; 2075 *a = ((Mat_MPIBAIJ *)A->data)->A; 2076 *iscopy = PETSC_FALSE; 2077 PetscFunctionReturn(0); 2078 } 2079 EXTERN_C_END 2080 2081 EXTERN_C_BEGIN 2082 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2083 EXTERN_C_END 2084 2085 #undef __FUNCT__ 2086 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2087 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 2088 { 2089 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)B->data; 2090 PetscInt m = B->rmap.n/bs,cstart = baij->cstartbs, cend = baij->cendbs,j,nnz,i,d; 2091 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = baij->rstartbs,ii; 2092 const PetscInt *JJ; 2093 PetscScalar *values; 2094 PetscErrorCode ierr; 2095 2096 PetscFunctionBegin; 2097 #if defined(PETSC_OPT_g) 2098 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"Ii[0] must be 0 it is %D",Ii[0]); 2099 #endif 2100 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2101 o_nnz = d_nnz + m; 2102 2103 for (i=0; i<m; i++) { 2104 nnz = Ii[i+1]- Ii[i]; 2105 JJ = J + Ii[i]; 2106 nnz_max = PetscMax(nnz_max,nnz); 2107 #if defined(PETSC_OPT_g) 2108 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2109 #endif 2110 for (j=0; j<nnz; j++) { 2111 if (*JJ >= cstart) break; 2112 JJ++; 2113 } 2114 d = 0; 2115 for (; j<nnz; j++) { 2116 if (*JJ++ >= cend) break; 2117 d++; 2118 } 2119 d_nnz[i] = d; 2120 o_nnz[i] = nnz - d; 2121 } 2122 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2123 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2124 2125 if (v) values = (PetscScalar*)v; 2126 else { 2127 ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2128 ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2129 } 2130 2131 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2132 for (i=0; i<m; i++) { 2133 ii = i + rstart; 2134 nnz = Ii[i+1]- Ii[i]; 2135 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2136 } 2137 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2138 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2139 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2140 2141 if (!v) { 2142 ierr = PetscFree(values);CHKERRQ(ierr); 2143 } 2144 PetscFunctionReturn(0); 2145 } 2146 2147 #undef __FUNCT__ 2148 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2149 /*@C 2150 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2151 (the default parallel PETSc format). 2152 2153 Collective on MPI_Comm 2154 2155 Input Parameters: 2156 + A - the matrix 2157 . i - the indices into j for the start of each local row (starts with zero) 2158 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2159 - v - optional values in the matrix 2160 2161 Level: developer 2162 2163 .keywords: matrix, aij, compressed row, sparse, parallel 2164 2165 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2166 @*/ 2167 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2168 { 2169 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2170 2171 PetscFunctionBegin; 2172 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2173 if (f) { 2174 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2175 } 2176 PetscFunctionReturn(0); 2177 } 2178 2179 EXTERN_C_BEGIN 2180 #undef __FUNCT__ 2181 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2182 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2183 { 2184 Mat_MPIBAIJ *b; 2185 PetscErrorCode ierr; 2186 PetscInt i; 2187 2188 PetscFunctionBegin; 2189 B->preallocated = PETSC_TRUE; 2190 ierr = PetscOptionsBegin(B->comm,B->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2191 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2192 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2193 2194 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2195 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2196 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2197 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2198 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2199 2200 B->rmap.bs = bs; 2201 B->cmap.bs = bs; 2202 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2203 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2204 2205 if (d_nnz) { 2206 for (i=0; i<B->rmap.n/bs; i++) { 2207 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2208 } 2209 } 2210 if (o_nnz) { 2211 for (i=0; i<B->rmap.n/bs; i++) { 2212 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2213 } 2214 } 2215 2216 b = (Mat_MPIBAIJ*)B->data; 2217 b->bs2 = bs*bs; 2218 b->mbs = B->rmap.n/bs; 2219 b->nbs = B->cmap.n/bs; 2220 b->Mbs = B->rmap.N/bs; 2221 b->Nbs = B->cmap.N/bs; 2222 2223 for (i=0; i<=b->size; i++) { 2224 b->rangebs[i] = B->rmap.range[i]/bs; 2225 } 2226 b->rstartbs = B->rmap.rstart/bs; 2227 b->rendbs = B->rmap.rend/bs; 2228 b->cstartbs = B->cmap.rstart/bs; 2229 b->cendbs = B->cmap.rend/bs; 2230 2231 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2232 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 2233 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2234 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2235 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2236 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2237 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 2238 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2239 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2240 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2241 2242 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2243 2244 PetscFunctionReturn(0); 2245 } 2246 EXTERN_C_END 2247 2248 EXTERN_C_BEGIN 2249 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2250 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2251 EXTERN_C_END 2252 2253 /*MC 2254 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2255 2256 Options Database Keys: 2257 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2258 . -mat_block_size <bs> - set the blocksize used to store the matrix 2259 - -mat_use_hash_table <fact> 2260 2261 Level: beginner 2262 2263 .seealso: MatCreateMPIBAIJ 2264 M*/ 2265 2266 EXTERN_C_BEGIN 2267 #undef __FUNCT__ 2268 #define __FUNCT__ "MatCreate_MPIBAIJ" 2269 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2270 { 2271 Mat_MPIBAIJ *b; 2272 PetscErrorCode ierr; 2273 PetscTruth flg; 2274 2275 PetscFunctionBegin; 2276 ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2277 B->data = (void*)b; 2278 2279 2280 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2281 B->mapping = 0; 2282 B->factor = 0; 2283 B->assembled = PETSC_FALSE; 2284 2285 B->insertmode = NOT_SET_VALUES; 2286 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2287 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2288 2289 /* build local table of row and column ownerships */ 2290 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 2291 2292 /* build cache for off array entries formed */ 2293 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2294 b->donotstash = PETSC_FALSE; 2295 b->colmap = PETSC_NULL; 2296 b->garray = PETSC_NULL; 2297 b->roworiented = PETSC_TRUE; 2298 2299 #if defined(PETSC_USE_MAT_SINGLE) 2300 /* stuff for MatSetValues_XXX in single precision */ 2301 b->setvalueslen = 0; 2302 b->setvaluescopy = PETSC_NULL; 2303 #endif 2304 2305 /* stuff used in block assembly */ 2306 b->barray = 0; 2307 2308 /* stuff used for matrix vector multiply */ 2309 b->lvec = 0; 2310 b->Mvctx = 0; 2311 2312 /* stuff for MatGetRow() */ 2313 b->rowindices = 0; 2314 b->rowvalues = 0; 2315 b->getrowactive = PETSC_FALSE; 2316 2317 /* hash table stuff */ 2318 b->ht = 0; 2319 b->hd = 0; 2320 b->ht_size = 0; 2321 b->ht_flag = PETSC_FALSE; 2322 b->ht_fact = 0; 2323 b->ht_total_ct = 0; 2324 b->ht_insert_ct = 0; 2325 2326 ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 2327 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2328 if (flg) { 2329 PetscReal fact = 1.39; 2330 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2331 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 2332 if (fact <= 1.0) fact = 1.39; 2333 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2334 ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2335 } 2336 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2337 2338 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2339 "MatStoreValues_MPIBAIJ", 2340 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2341 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2342 "MatRetrieveValues_MPIBAIJ", 2343 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2344 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2345 "MatGetDiagonalBlock_MPIBAIJ", 2346 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2347 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2348 "MatMPIBAIJSetPreallocation_MPIBAIJ", 2349 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2350 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 2351 "MatMPIBAIJSetPreallocationCSR_MPIAIJ", 2352 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 2353 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 2354 "MatDiagonalScaleLocal_MPIBAIJ", 2355 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2356 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 2357 "MatSetHashTableFactor_MPIBAIJ", 2358 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2359 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 2360 PetscFunctionReturn(0); 2361 } 2362 EXTERN_C_END 2363 2364 /*MC 2365 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2366 2367 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2368 and MATMPIBAIJ otherwise. 2369 2370 Options Database Keys: 2371 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2372 2373 Level: beginner 2374 2375 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2376 M*/ 2377 2378 EXTERN_C_BEGIN 2379 #undef __FUNCT__ 2380 #define __FUNCT__ "MatCreate_BAIJ" 2381 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 2382 { 2383 PetscErrorCode ierr; 2384 PetscMPIInt size; 2385 2386 PetscFunctionBegin; 2387 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2388 if (size == 1) { 2389 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2390 } else { 2391 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2392 } 2393 PetscFunctionReturn(0); 2394 } 2395 EXTERN_C_END 2396 2397 #undef __FUNCT__ 2398 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2399 /*@C 2400 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2401 (block compressed row). For good matrix assembly performance 2402 the user should preallocate the matrix storage by setting the parameters 2403 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2404 performance can be increased by more than a factor of 50. 2405 2406 Collective on Mat 2407 2408 Input Parameters: 2409 + A - the matrix 2410 . bs - size of blockk 2411 . d_nz - number of block nonzeros per block row in diagonal portion of local 2412 submatrix (same for all local rows) 2413 . d_nnz - array containing the number of block nonzeros in the various block rows 2414 of the in diagonal portion of the local (possibly different for each block 2415 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2416 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2417 submatrix (same for all local rows). 2418 - o_nnz - array containing the number of nonzeros in the various block rows of the 2419 off-diagonal portion of the local submatrix (possibly different for 2420 each block row) or PETSC_NULL. 2421 2422 If the *_nnz parameter is given then the *_nz parameter is ignored 2423 2424 Options Database Keys: 2425 + -mat_block_size - size of the blocks to use 2426 - -mat_use_hash_table <fact> 2427 2428 Notes: 2429 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2430 than it must be used on all processors that share the object for that argument. 2431 2432 Storage Information: 2433 For a square global matrix we define each processor's diagonal portion 2434 to be its local rows and the corresponding columns (a square submatrix); 2435 each processor's off-diagonal portion encompasses the remainder of the 2436 local matrix (a rectangular submatrix). 2437 2438 The user can specify preallocated storage for the diagonal part of 2439 the local submatrix with either d_nz or d_nnz (not both). Set 2440 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2441 memory allocation. Likewise, specify preallocated storage for the 2442 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2443 2444 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2445 the figure below we depict these three local rows and all columns (0-11). 2446 2447 .vb 2448 0 1 2 3 4 5 6 7 8 9 10 11 2449 ------------------- 2450 row 3 | o o o d d d o o o o o o 2451 row 4 | o o o d d d o o o o o o 2452 row 5 | o o o d d d o o o o o o 2453 ------------------- 2454 .ve 2455 2456 Thus, any entries in the d locations are stored in the d (diagonal) 2457 submatrix, and any entries in the o locations are stored in the 2458 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2459 stored simply in the MATSEQBAIJ format for compressed row storage. 2460 2461 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2462 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2463 In general, for PDE problems in which most nonzeros are near the diagonal, 2464 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2465 or you will get TERRIBLE performance; see the users' manual chapter on 2466 matrices. 2467 2468 Level: intermediate 2469 2470 .keywords: matrix, block, aij, compressed row, sparse, parallel 2471 2472 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 2473 @*/ 2474 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2475 { 2476 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2477 2478 PetscFunctionBegin; 2479 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2480 if (f) { 2481 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2482 } 2483 PetscFunctionReturn(0); 2484 } 2485 2486 #undef __FUNCT__ 2487 #define __FUNCT__ "MatCreateMPIBAIJ" 2488 /*@C 2489 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2490 (block compressed row). For good matrix assembly performance 2491 the user should preallocate the matrix storage by setting the parameters 2492 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2493 performance can be increased by more than a factor of 50. 2494 2495 Collective on MPI_Comm 2496 2497 Input Parameters: 2498 + comm - MPI communicator 2499 . bs - size of blockk 2500 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2501 This value should be the same as the local size used in creating the 2502 y vector for the matrix-vector product y = Ax. 2503 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2504 This value should be the same as the local size used in creating the 2505 x vector for the matrix-vector product y = Ax. 2506 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2507 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2508 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2509 submatrix (same for all local rows) 2510 . d_nnz - array containing the number of nonzero blocks in the various block rows 2511 of the in diagonal portion of the local (possibly different for each block 2512 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2513 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2514 submatrix (same for all local rows). 2515 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2516 off-diagonal portion of the local submatrix (possibly different for 2517 each block row) or PETSC_NULL. 2518 2519 Output Parameter: 2520 . A - the matrix 2521 2522 Options Database Keys: 2523 + -mat_block_size - size of the blocks to use 2524 - -mat_use_hash_table <fact> 2525 2526 Notes: 2527 If the *_nnz parameter is given then the *_nz parameter is ignored 2528 2529 A nonzero block is any block that as 1 or more nonzeros in it 2530 2531 The user MUST specify either the local or global matrix dimensions 2532 (possibly both). 2533 2534 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2535 than it must be used on all processors that share the object for that argument. 2536 2537 Storage Information: 2538 For a square global matrix we define each processor's diagonal portion 2539 to be its local rows and the corresponding columns (a square submatrix); 2540 each processor's off-diagonal portion encompasses the remainder of the 2541 local matrix (a rectangular submatrix). 2542 2543 The user can specify preallocated storage for the diagonal part of 2544 the local submatrix with either d_nz or d_nnz (not both). Set 2545 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2546 memory allocation. Likewise, specify preallocated storage for the 2547 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2548 2549 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2550 the figure below we depict these three local rows and all columns (0-11). 2551 2552 .vb 2553 0 1 2 3 4 5 6 7 8 9 10 11 2554 ------------------- 2555 row 3 | o o o d d d o o o o o o 2556 row 4 | o o o d d d o o o o o o 2557 row 5 | o o o d d d o o o o o o 2558 ------------------- 2559 .ve 2560 2561 Thus, any entries in the d locations are stored in the d (diagonal) 2562 submatrix, and any entries in the o locations are stored in the 2563 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2564 stored simply in the MATSEQBAIJ format for compressed row storage. 2565 2566 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2567 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2568 In general, for PDE problems in which most nonzeros are near the diagonal, 2569 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2570 or you will get TERRIBLE performance; see the users' manual chapter on 2571 matrices. 2572 2573 Level: intermediate 2574 2575 .keywords: matrix, block, aij, compressed row, sparse, parallel 2576 2577 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2578 @*/ 2579 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2580 { 2581 PetscErrorCode ierr; 2582 PetscMPIInt size; 2583 2584 PetscFunctionBegin; 2585 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2586 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2587 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2588 if (size > 1) { 2589 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2590 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2591 } else { 2592 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2593 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2594 } 2595 PetscFunctionReturn(0); 2596 } 2597 2598 #undef __FUNCT__ 2599 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 2600 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2601 { 2602 Mat mat; 2603 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2604 PetscErrorCode ierr; 2605 PetscInt len=0; 2606 2607 PetscFunctionBegin; 2608 *newmat = 0; 2609 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2610 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2611 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2612 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2613 2614 mat->factor = matin->factor; 2615 mat->preallocated = PETSC_TRUE; 2616 mat->assembled = PETSC_TRUE; 2617 mat->insertmode = NOT_SET_VALUES; 2618 2619 a = (Mat_MPIBAIJ*)mat->data; 2620 mat->rmap.bs = matin->rmap.bs; 2621 a->bs2 = oldmat->bs2; 2622 a->mbs = oldmat->mbs; 2623 a->nbs = oldmat->nbs; 2624 a->Mbs = oldmat->Mbs; 2625 a->Nbs = oldmat->Nbs; 2626 2627 ierr = PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2628 ierr = PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2629 2630 a->size = oldmat->size; 2631 a->rank = oldmat->rank; 2632 a->donotstash = oldmat->donotstash; 2633 a->roworiented = oldmat->roworiented; 2634 a->rowindices = 0; 2635 a->rowvalues = 0; 2636 a->getrowactive = PETSC_FALSE; 2637 a->barray = 0; 2638 a->rstartbs = oldmat->rstartbs; 2639 a->rendbs = oldmat->rendbs; 2640 a->cstartbs = oldmat->cstartbs; 2641 a->cendbs = oldmat->cendbs; 2642 2643 /* hash table stuff */ 2644 a->ht = 0; 2645 a->hd = 0; 2646 a->ht_size = 0; 2647 a->ht_flag = oldmat->ht_flag; 2648 a->ht_fact = oldmat->ht_fact; 2649 a->ht_total_ct = 0; 2650 a->ht_insert_ct = 0; 2651 2652 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 2653 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2654 ierr = MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr); 2655 if (oldmat->colmap) { 2656 #if defined (PETSC_USE_CTABLE) 2657 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2658 #else 2659 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2660 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2661 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2662 #endif 2663 } else a->colmap = 0; 2664 2665 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2666 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2667 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2668 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2669 } else a->garray = 0; 2670 2671 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2672 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2673 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2674 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2675 2676 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2677 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2678 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2679 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2680 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2681 *newmat = mat; 2682 2683 PetscFunctionReturn(0); 2684 } 2685 2686 #include "petscsys.h" 2687 2688 #undef __FUNCT__ 2689 #define __FUNCT__ "MatLoad_MPIBAIJ" 2690 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2691 { 2692 Mat A; 2693 PetscErrorCode ierr; 2694 int fd; 2695 PetscInt i,nz,j,rstart,rend; 2696 PetscScalar *vals,*buf; 2697 MPI_Comm comm = ((PetscObject)viewer)->comm; 2698 MPI_Status status; 2699 PetscMPIInt rank,size,maxnz; 2700 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 2701 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 2702 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 2703 PetscMPIInt tag = ((PetscObject)viewer)->tag; 2704 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 2705 PetscInt dcount,kmax,k,nzcount,tmp,mend; 2706 2707 PetscFunctionBegin; 2708 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 2709 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2710 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2711 2712 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2713 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2714 if (!rank) { 2715 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2716 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2717 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2718 } 2719 2720 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2721 M = header[1]; N = header[2]; 2722 2723 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2724 2725 /* 2726 This code adds extra rows to make sure the number of rows is 2727 divisible by the blocksize 2728 */ 2729 Mbs = M/bs; 2730 extra_rows = bs - M + bs*Mbs; 2731 if (extra_rows == bs) extra_rows = 0; 2732 else Mbs++; 2733 if (extra_rows && !rank) { 2734 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2735 } 2736 2737 /* determine ownership of all rows */ 2738 mbs = Mbs/size + ((Mbs % size) > rank); 2739 m = mbs*bs; 2740 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 2741 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2742 2743 /* process 0 needs enough room for process with most rows */ 2744 if (!rank) { 2745 mmax = rowners[1]; 2746 for (i=2; i<size; i++) { 2747 mmax = PetscMax(mmax,rowners[i]); 2748 } 2749 mmax*=bs; 2750 } else mmax = m; 2751 2752 rowners[0] = 0; 2753 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2754 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2755 rstart = rowners[rank]; 2756 rend = rowners[rank+1]; 2757 2758 /* distribute row lengths to all processors */ 2759 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 2760 if (!rank) { 2761 mend = m; 2762 if (size == 1) mend = mend - extra_rows; 2763 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 2764 for (j=mend; j<m; j++) locrowlens[j] = 1; 2765 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2766 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2767 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2768 for (j=0; j<m; j++) { 2769 procsnz[0] += locrowlens[j]; 2770 } 2771 for (i=1; i<size; i++) { 2772 mend = browners[i+1] - browners[i]; 2773 if (i == size-1) mend = mend - extra_rows; 2774 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 2775 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 2776 /* calculate the number of nonzeros on each processor */ 2777 for (j=0; j<browners[i+1]-browners[i]; j++) { 2778 procsnz[i] += rowlengths[j]; 2779 } 2780 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2781 } 2782 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2783 } else { 2784 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2785 } 2786 2787 if (!rank) { 2788 /* determine max buffer needed and allocate it */ 2789 maxnz = procsnz[0]; 2790 for (i=1; i<size; i++) { 2791 maxnz = PetscMax(maxnz,procsnz[i]); 2792 } 2793 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2794 2795 /* read in my part of the matrix column indices */ 2796 nz = procsnz[0]; 2797 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2798 mycols = ibuf; 2799 if (size == 1) nz -= extra_rows; 2800 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2801 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2802 2803 /* read in every ones (except the last) and ship off */ 2804 for (i=1; i<size-1; i++) { 2805 nz = procsnz[i]; 2806 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2807 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2808 } 2809 /* read in the stuff for the last proc */ 2810 if (size != 1) { 2811 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2812 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2813 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2814 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2815 } 2816 ierr = PetscFree(cols);CHKERRQ(ierr); 2817 } else { 2818 /* determine buffer space needed for message */ 2819 nz = 0; 2820 for (i=0; i<m; i++) { 2821 nz += locrowlens[i]; 2822 } 2823 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2824 mycols = ibuf; 2825 /* receive message of column indices*/ 2826 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2827 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2828 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2829 } 2830 2831 /* loop over local rows, determining number of off diagonal entries */ 2832 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2833 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2834 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2835 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2836 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2837 rowcount = 0; nzcount = 0; 2838 for (i=0; i<mbs; i++) { 2839 dcount = 0; 2840 odcount = 0; 2841 for (j=0; j<bs; j++) { 2842 kmax = locrowlens[rowcount]; 2843 for (k=0; k<kmax; k++) { 2844 tmp = mycols[nzcount++]/bs; 2845 if (!mask[tmp]) { 2846 mask[tmp] = 1; 2847 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2848 else masked1[dcount++] = tmp; 2849 } 2850 } 2851 rowcount++; 2852 } 2853 2854 dlens[i] = dcount; 2855 odlens[i] = odcount; 2856 2857 /* zero out the mask elements we set */ 2858 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2859 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2860 } 2861 2862 /* create our matrix */ 2863 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2864 ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2865 ierr = MatSetType(A,type);CHKERRQ(ierr) 2866 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2867 2868 /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */ 2869 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2870 2871 if (!rank) { 2872 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2873 /* read in my part of the matrix numerical values */ 2874 nz = procsnz[0]; 2875 vals = buf; 2876 mycols = ibuf; 2877 if (size == 1) nz -= extra_rows; 2878 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2879 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2880 2881 /* insert into matrix */ 2882 jj = rstart*bs; 2883 for (i=0; i<m; i++) { 2884 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2885 mycols += locrowlens[i]; 2886 vals += locrowlens[i]; 2887 jj++; 2888 } 2889 /* read in other processors (except the last one) and ship out */ 2890 for (i=1; i<size-1; i++) { 2891 nz = procsnz[i]; 2892 vals = buf; 2893 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2894 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2895 } 2896 /* the last proc */ 2897 if (size != 1){ 2898 nz = procsnz[i] - extra_rows; 2899 vals = buf; 2900 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2901 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2902 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2903 } 2904 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2905 } else { 2906 /* receive numeric values */ 2907 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2908 2909 /* receive message of values*/ 2910 vals = buf; 2911 mycols = ibuf; 2912 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2913 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2914 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2915 2916 /* insert into matrix */ 2917 jj = rstart*bs; 2918 for (i=0; i<m; i++) { 2919 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2920 mycols += locrowlens[i]; 2921 vals += locrowlens[i]; 2922 jj++; 2923 } 2924 } 2925 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2926 ierr = PetscFree(buf);CHKERRQ(ierr); 2927 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2928 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2929 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2930 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2931 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2932 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2933 2934 *newmat = A; 2935 PetscFunctionReturn(0); 2936 } 2937 2938 #undef __FUNCT__ 2939 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2940 /*@ 2941 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2942 2943 Input Parameters: 2944 . mat - the matrix 2945 . fact - factor 2946 2947 Collective on Mat 2948 2949 Level: advanced 2950 2951 Notes: 2952 This can also be set by the command line option: -mat_use_hash_table <fact> 2953 2954 .keywords: matrix, hashtable, factor, HT 2955 2956 .seealso: MatSetOption() 2957 @*/ 2958 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2959 { 2960 PetscErrorCode ierr,(*f)(Mat,PetscReal); 2961 2962 PetscFunctionBegin; 2963 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 2964 if (f) { 2965 ierr = (*f)(mat,fact);CHKERRQ(ierr); 2966 } 2967 PetscFunctionReturn(0); 2968 } 2969 2970 EXTERN_C_BEGIN 2971 #undef __FUNCT__ 2972 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 2973 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 2974 { 2975 Mat_MPIBAIJ *baij; 2976 2977 PetscFunctionBegin; 2978 baij = (Mat_MPIBAIJ*)mat->data; 2979 baij->ht_fact = fact; 2980 PetscFunctionReturn(0); 2981 } 2982 EXTERN_C_END 2983 2984 #undef __FUNCT__ 2985 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2986 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2987 { 2988 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2989 PetscFunctionBegin; 2990 *Ad = a->A; 2991 *Ao = a->B; 2992 *colmap = a->garray; 2993 PetscFunctionReturn(0); 2994 } 2995