1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat); 15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 16 17 /* UGLY, ugly, ugly 18 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 19 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 20 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 21 converts the entries into single precision and then calls ..._MatScalar() to put them 22 into the single precision data structures. 23 */ 24 #if defined(PETSC_USE_MAT_SINGLE) 25 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 26 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 27 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 28 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 29 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 30 #else 31 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 32 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 33 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 34 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 35 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 36 #endif 37 38 #undef __FUNCT__ 39 #define __FUNCT__ "MatGetRowMax_MPIBAIJ" 40 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v) 41 { 42 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 43 PetscErrorCode ierr; 44 PetscInt i; 45 PetscScalar *va,*vb; 46 Vec vtmp; 47 48 PetscFunctionBegin; 49 50 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 51 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 52 53 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap.n,&vtmp);CHKERRQ(ierr); 54 ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 55 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 56 57 for (i=0; i<A->rmap.n; i++){ 58 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 59 } 60 61 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 62 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 63 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 64 65 PetscFunctionReturn(0); 66 } 67 68 EXTERN_C_BEGIN 69 #undef __FUNCT__ 70 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 71 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 72 { 73 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 74 PetscErrorCode ierr; 75 76 PetscFunctionBegin; 77 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 78 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 79 PetscFunctionReturn(0); 80 } 81 EXTERN_C_END 82 83 EXTERN_C_BEGIN 84 #undef __FUNCT__ 85 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 86 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 87 { 88 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 89 PetscErrorCode ierr; 90 91 PetscFunctionBegin; 92 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 93 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 94 PetscFunctionReturn(0); 95 } 96 EXTERN_C_END 97 98 /* 99 Local utility routine that creates a mapping from the global column 100 number to the local number in the off-diagonal part of the local 101 storage of the matrix. This is done in a non scable way since the 102 length of colmap equals the global matrix length. 103 */ 104 #undef __FUNCT__ 105 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 106 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 107 { 108 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 109 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 110 PetscErrorCode ierr; 111 PetscInt nbs = B->nbs,i,bs=mat->rmap.bs; 112 113 PetscFunctionBegin; 114 #if defined (PETSC_USE_CTABLE) 115 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 116 for (i=0; i<nbs; i++){ 117 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 118 } 119 #else 120 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 121 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 122 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 123 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 124 #endif 125 PetscFunctionReturn(0); 126 } 127 128 #define CHUNKSIZE 10 129 130 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 131 { \ 132 \ 133 brow = row/bs; \ 134 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 135 rmax = aimax[brow]; nrow = ailen[brow]; \ 136 bcol = col/bs; \ 137 ridx = row % bs; cidx = col % bs; \ 138 low = 0; high = nrow; \ 139 while (high-low > 3) { \ 140 t = (low+high)/2; \ 141 if (rp[t] > bcol) high = t; \ 142 else low = t; \ 143 } \ 144 for (_i=low; _i<high; _i++) { \ 145 if (rp[_i] > bcol) break; \ 146 if (rp[_i] == bcol) { \ 147 bap = ap + bs2*_i + bs*cidx + ridx; \ 148 if (addv == ADD_VALUES) *bap += value; \ 149 else *bap = value; \ 150 goto a_noinsert; \ 151 } \ 152 } \ 153 if (a->nonew == 1) goto a_noinsert; \ 154 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 155 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew); \ 156 N = nrow++ - 1; \ 157 /* shift up all the later entries in this row */ \ 158 for (ii=N; ii>=_i; ii--) { \ 159 rp[ii+1] = rp[ii]; \ 160 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 161 } \ 162 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 163 rp[_i] = bcol; \ 164 ap[bs2*_i + bs*cidx + ridx] = value; \ 165 a_noinsert:; \ 166 ailen[brow] = nrow; \ 167 } 168 169 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 170 { \ 171 brow = row/bs; \ 172 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 173 rmax = bimax[brow]; nrow = bilen[brow]; \ 174 bcol = col/bs; \ 175 ridx = row % bs; cidx = col % bs; \ 176 low = 0; high = nrow; \ 177 while (high-low > 3) { \ 178 t = (low+high)/2; \ 179 if (rp[t] > bcol) high = t; \ 180 else low = t; \ 181 } \ 182 for (_i=low; _i<high; _i++) { \ 183 if (rp[_i] > bcol) break; \ 184 if (rp[_i] == bcol) { \ 185 bap = ap + bs2*_i + bs*cidx + ridx; \ 186 if (addv == ADD_VALUES) *bap += value; \ 187 else *bap = value; \ 188 goto b_noinsert; \ 189 } \ 190 } \ 191 if (b->nonew == 1) goto b_noinsert; \ 192 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 193 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew); \ 194 CHKMEMQ;\ 195 N = nrow++ - 1; \ 196 /* shift up all the later entries in this row */ \ 197 for (ii=N; ii>=_i; ii--) { \ 198 rp[ii+1] = rp[ii]; \ 199 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 200 } \ 201 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 202 rp[_i] = bcol; \ 203 ap[bs2*_i + bs*cidx + ridx] = value; \ 204 b_noinsert:; \ 205 bilen[brow] = nrow; \ 206 } 207 208 #if defined(PETSC_USE_MAT_SINGLE) 209 #undef __FUNCT__ 210 #define __FUNCT__ "MatSetValues_MPIBAIJ" 211 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 212 { 213 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 214 PetscErrorCode ierr; 215 PetscInt i,N = m*n; 216 MatScalar *vsingle; 217 218 PetscFunctionBegin; 219 if (N > b->setvalueslen) { 220 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 221 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 222 b->setvalueslen = N; 223 } 224 vsingle = b->setvaluescopy; 225 226 for (i=0; i<N; i++) { 227 vsingle[i] = v[i]; 228 } 229 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 230 PetscFunctionReturn(0); 231 } 232 233 #undef __FUNCT__ 234 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 235 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 236 { 237 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 238 PetscErrorCode ierr; 239 PetscInt i,N = m*n*b->bs2; 240 MatScalar *vsingle; 241 242 PetscFunctionBegin; 243 if (N > b->setvalueslen) { 244 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 245 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 246 b->setvalueslen = N; 247 } 248 vsingle = b->setvaluescopy; 249 for (i=0; i<N; i++) { 250 vsingle[i] = v[i]; 251 } 252 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 253 PetscFunctionReturn(0); 254 } 255 256 #undef __FUNCT__ 257 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 258 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 259 { 260 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 261 PetscErrorCode ierr; 262 PetscInt i,N = m*n; 263 MatScalar *vsingle; 264 265 PetscFunctionBegin; 266 if (N > b->setvalueslen) { 267 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 268 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 269 b->setvalueslen = N; 270 } 271 vsingle = b->setvaluescopy; 272 for (i=0; i<N; i++) { 273 vsingle[i] = v[i]; 274 } 275 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 276 PetscFunctionReturn(0); 277 } 278 279 #undef __FUNCT__ 280 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 281 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 282 { 283 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 284 PetscErrorCode ierr; 285 PetscInt i,N = m*n*b->bs2; 286 MatScalar *vsingle; 287 288 PetscFunctionBegin; 289 if (N > b->setvalueslen) { 290 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 291 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 292 b->setvalueslen = N; 293 } 294 vsingle = b->setvaluescopy; 295 for (i=0; i<N; i++) { 296 vsingle[i] = v[i]; 297 } 298 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 299 PetscFunctionReturn(0); 300 } 301 #endif 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 305 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 306 { 307 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 308 MatScalar value; 309 PetscTruth roworiented = baij->roworiented; 310 PetscErrorCode ierr; 311 PetscInt i,j,row,col; 312 PetscInt rstart_orig=mat->rmap.rstart; 313 PetscInt rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart; 314 PetscInt cend_orig=mat->cmap.rend,bs=mat->rmap.bs; 315 316 /* Some Variables required in the macro */ 317 Mat A = baij->A; 318 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 319 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 320 MatScalar *aa=a->a; 321 322 Mat B = baij->B; 323 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 324 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 325 MatScalar *ba=b->a; 326 327 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 328 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 329 MatScalar *ap,*bap; 330 331 PetscFunctionBegin; 332 for (i=0; i<m; i++) { 333 if (im[i] < 0) continue; 334 #if defined(PETSC_USE_DEBUG) 335 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 336 #endif 337 if (im[i] >= rstart_orig && im[i] < rend_orig) { 338 row = im[i] - rstart_orig; 339 for (j=0; j<n; j++) { 340 if (in[j] >= cstart_orig && in[j] < cend_orig){ 341 col = in[j] - cstart_orig; 342 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 343 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 344 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 345 } else if (in[j] < 0) continue; 346 #if defined(PETSC_USE_DEBUG) 347 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap.N-1);} 348 #endif 349 else { 350 if (mat->was_assembled) { 351 if (!baij->colmap) { 352 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 353 } 354 #if defined (PETSC_USE_CTABLE) 355 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 356 col = col - 1; 357 #else 358 col = baij->colmap[in[j]/bs] - 1; 359 #endif 360 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 361 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 362 col = in[j]; 363 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 364 B = baij->B; 365 b = (Mat_SeqBAIJ*)(B)->data; 366 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 367 ba=b->a; 368 } else col += in[j]%bs; 369 } else col = in[j]; 370 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 371 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 372 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 373 } 374 } 375 } else { 376 if (!baij->donotstash) { 377 if (roworiented) { 378 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 379 } else { 380 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 381 } 382 } 383 } 384 } 385 PetscFunctionReturn(0); 386 } 387 388 #undef __FUNCT__ 389 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 390 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 391 { 392 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 393 const MatScalar *value; 394 MatScalar *barray=baij->barray; 395 PetscTruth roworiented = baij->roworiented; 396 PetscErrorCode ierr; 397 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 398 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 399 PetscInt cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2; 400 401 PetscFunctionBegin; 402 if(!barray) { 403 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 404 baij->barray = barray; 405 } 406 407 if (roworiented) { 408 stepval = (n-1)*bs; 409 } else { 410 stepval = (m-1)*bs; 411 } 412 for (i=0; i<m; i++) { 413 if (im[i] < 0) continue; 414 #if defined(PETSC_USE_DEBUG) 415 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 416 #endif 417 if (im[i] >= rstart && im[i] < rend) { 418 row = im[i] - rstart; 419 for (j=0; j<n; j++) { 420 /* If NumCol = 1 then a copy is not required */ 421 if ((roworiented) && (n == 1)) { 422 barray = (MatScalar*)v + i*bs2; 423 } else if((!roworiented) && (m == 1)) { 424 barray = (MatScalar*)v + j*bs2; 425 } else { /* Here a copy is required */ 426 if (roworiented) { 427 value = v + i*(stepval+bs)*bs + j*bs; 428 } else { 429 value = v + j*(stepval+bs)*bs + i*bs; 430 } 431 for (ii=0; ii<bs; ii++,value+=stepval) { 432 for (jj=0; jj<bs; jj++) { 433 *barray++ = *value++; 434 } 435 } 436 barray -=bs2; 437 } 438 439 if (in[j] >= cstart && in[j] < cend){ 440 col = in[j] - cstart; 441 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 442 } 443 else if (in[j] < 0) continue; 444 #if defined(PETSC_USE_DEBUG) 445 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 446 #endif 447 else { 448 if (mat->was_assembled) { 449 if (!baij->colmap) { 450 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 451 } 452 453 #if defined(PETSC_USE_DEBUG) 454 #if defined (PETSC_USE_CTABLE) 455 { PetscInt data; 456 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 457 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 458 } 459 #else 460 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 461 #endif 462 #endif 463 #if defined (PETSC_USE_CTABLE) 464 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 465 col = (col - 1)/bs; 466 #else 467 col = (baij->colmap[in[j]] - 1)/bs; 468 #endif 469 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 470 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 471 col = in[j]; 472 } 473 } 474 else col = in[j]; 475 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 476 } 477 } 478 } else { 479 if (!baij->donotstash) { 480 if (roworiented) { 481 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 482 } else { 483 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 484 } 485 } 486 } 487 } 488 PetscFunctionReturn(0); 489 } 490 491 #define HASH_KEY 0.6180339887 492 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 493 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 494 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 495 #undef __FUNCT__ 496 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 497 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 498 { 499 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 500 PetscTruth roworiented = baij->roworiented; 501 PetscErrorCode ierr; 502 PetscInt i,j,row,col; 503 PetscInt rstart_orig=mat->rmap.rstart; 504 PetscInt rend_orig=mat->rmap.rend,Nbs=baij->Nbs; 505 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap.bs,*HT=baij->ht,idx; 506 PetscReal tmp; 507 MatScalar **HD = baij->hd,value; 508 #if defined(PETSC_USE_DEBUG) 509 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 510 #endif 511 512 PetscFunctionBegin; 513 514 for (i=0; i<m; i++) { 515 #if defined(PETSC_USE_DEBUG) 516 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 517 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 518 #endif 519 row = im[i]; 520 if (row >= rstart_orig && row < rend_orig) { 521 for (j=0; j<n; j++) { 522 col = in[j]; 523 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 524 /* Look up PetscInto the Hash Table */ 525 key = (row/bs)*Nbs+(col/bs)+1; 526 h1 = HASH(size,key,tmp); 527 528 529 idx = h1; 530 #if defined(PETSC_USE_DEBUG) 531 insert_ct++; 532 total_ct++; 533 if (HT[idx] != key) { 534 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 535 if (idx == size) { 536 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 537 if (idx == h1) { 538 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 539 } 540 } 541 } 542 #else 543 if (HT[idx] != key) { 544 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 545 if (idx == size) { 546 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 547 if (idx == h1) { 548 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 549 } 550 } 551 } 552 #endif 553 /* A HASH table entry is found, so insert the values at the correct address */ 554 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 555 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 556 } 557 } else { 558 if (!baij->donotstash) { 559 if (roworiented) { 560 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 561 } else { 562 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 563 } 564 } 565 } 566 } 567 #if defined(PETSC_USE_DEBUG) 568 baij->ht_total_ct = total_ct; 569 baij->ht_insert_ct = insert_ct; 570 #endif 571 PetscFunctionReturn(0); 572 } 573 574 #undef __FUNCT__ 575 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 576 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 577 { 578 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 579 PetscTruth roworiented = baij->roworiented; 580 PetscErrorCode ierr; 581 PetscInt i,j,ii,jj,row,col; 582 PetscInt rstart=baij->rstartbs; 583 PetscInt rend=mat->rmap.rend,stepval,bs=mat->rmap.bs,bs2=baij->bs2; 584 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 585 PetscReal tmp; 586 MatScalar **HD = baij->hd,*baij_a; 587 const MatScalar *v_t,*value; 588 #if defined(PETSC_USE_DEBUG) 589 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 590 #endif 591 592 PetscFunctionBegin; 593 594 if (roworiented) { 595 stepval = (n-1)*bs; 596 } else { 597 stepval = (m-1)*bs; 598 } 599 for (i=0; i<m; i++) { 600 #if defined(PETSC_USE_DEBUG) 601 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 602 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 603 #endif 604 row = im[i]; 605 v_t = v + i*bs2; 606 if (row >= rstart && row < rend) { 607 for (j=0; j<n; j++) { 608 col = in[j]; 609 610 /* Look up into the Hash Table */ 611 key = row*Nbs+col+1; 612 h1 = HASH(size,key,tmp); 613 614 idx = h1; 615 #if defined(PETSC_USE_DEBUG) 616 total_ct++; 617 insert_ct++; 618 if (HT[idx] != key) { 619 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 620 if (idx == size) { 621 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 622 if (idx == h1) { 623 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 624 } 625 } 626 } 627 #else 628 if (HT[idx] != key) { 629 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 630 if (idx == size) { 631 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 632 if (idx == h1) { 633 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 634 } 635 } 636 } 637 #endif 638 baij_a = HD[idx]; 639 if (roworiented) { 640 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 641 /* value = v + (i*(stepval+bs)+j)*bs; */ 642 value = v_t; 643 v_t += bs; 644 if (addv == ADD_VALUES) { 645 for (ii=0; ii<bs; ii++,value+=stepval) { 646 for (jj=ii; jj<bs2; jj+=bs) { 647 baij_a[jj] += *value++; 648 } 649 } 650 } else { 651 for (ii=0; ii<bs; ii++,value+=stepval) { 652 for (jj=ii; jj<bs2; jj+=bs) { 653 baij_a[jj] = *value++; 654 } 655 } 656 } 657 } else { 658 value = v + j*(stepval+bs)*bs + i*bs; 659 if (addv == ADD_VALUES) { 660 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 661 for (jj=0; jj<bs; jj++) { 662 baij_a[jj] += *value++; 663 } 664 } 665 } else { 666 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 667 for (jj=0; jj<bs; jj++) { 668 baij_a[jj] = *value++; 669 } 670 } 671 } 672 } 673 } 674 } else { 675 if (!baij->donotstash) { 676 if (roworiented) { 677 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 678 } else { 679 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 680 } 681 } 682 } 683 } 684 #if defined(PETSC_USE_DEBUG) 685 baij->ht_total_ct = total_ct; 686 baij->ht_insert_ct = insert_ct; 687 #endif 688 PetscFunctionReturn(0); 689 } 690 691 #undef __FUNCT__ 692 #define __FUNCT__ "MatGetValues_MPIBAIJ" 693 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 694 { 695 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 696 PetscErrorCode ierr; 697 PetscInt bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend; 698 PetscInt bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data; 699 700 PetscFunctionBegin; 701 for (i=0; i<m; i++) { 702 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 703 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 704 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 705 row = idxm[i] - bsrstart; 706 for (j=0; j<n; j++) { 707 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 708 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 709 if (idxn[j] >= bscstart && idxn[j] < bscend){ 710 col = idxn[j] - bscstart; 711 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 712 } else { 713 if (!baij->colmap) { 714 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 715 } 716 #if defined (PETSC_USE_CTABLE) 717 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 718 data --; 719 #else 720 data = baij->colmap[idxn[j]/bs]-1; 721 #endif 722 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 723 else { 724 col = data + idxn[j]%bs; 725 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 726 } 727 } 728 } 729 } else { 730 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 731 } 732 } 733 PetscFunctionReturn(0); 734 } 735 736 #undef __FUNCT__ 737 #define __FUNCT__ "MatNorm_MPIBAIJ" 738 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 739 { 740 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 741 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 742 PetscErrorCode ierr; 743 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap.bs,nz,row,col; 744 PetscReal sum = 0.0; 745 MatScalar *v; 746 747 PetscFunctionBegin; 748 if (baij->size == 1) { 749 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 750 } else { 751 if (type == NORM_FROBENIUS) { 752 v = amat->a; 753 nz = amat->nz*bs2; 754 for (i=0; i<nz; i++) { 755 #if defined(PETSC_USE_COMPLEX) 756 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 757 #else 758 sum += (*v)*(*v); v++; 759 #endif 760 } 761 v = bmat->a; 762 nz = bmat->nz*bs2; 763 for (i=0; i<nz; i++) { 764 #if defined(PETSC_USE_COMPLEX) 765 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 766 #else 767 sum += (*v)*(*v); v++; 768 #endif 769 } 770 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 771 *nrm = sqrt(*nrm); 772 } else if (type == NORM_1) { /* max column sum */ 773 PetscReal *tmp,*tmp2; 774 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 775 ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 776 tmp2 = tmp + mat->cmap.N; 777 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 778 v = amat->a; jj = amat->j; 779 for (i=0; i<amat->nz; i++) { 780 for (j=0; j<bs; j++){ 781 col = bs*(cstart + *jj) + j; /* column index */ 782 for (row=0; row<bs; row++){ 783 tmp[col] += PetscAbsScalar(*v); v++; 784 } 785 } 786 jj++; 787 } 788 v = bmat->a; jj = bmat->j; 789 for (i=0; i<bmat->nz; i++) { 790 for (j=0; j<bs; j++){ 791 col = bs*garray[*jj] + j; 792 for (row=0; row<bs; row++){ 793 tmp[col] += PetscAbsScalar(*v); v++; 794 } 795 } 796 jj++; 797 } 798 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 799 *nrm = 0.0; 800 for (j=0; j<mat->cmap.N; j++) { 801 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 802 } 803 ierr = PetscFree(tmp);CHKERRQ(ierr); 804 } else if (type == NORM_INFINITY) { /* max row sum */ 805 PetscReal *sums; 806 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr) 807 sum = 0.0; 808 for (j=0; j<amat->mbs; j++) { 809 for (row=0; row<bs; row++) sums[row] = 0.0; 810 v = amat->a + bs2*amat->i[j]; 811 nz = amat->i[j+1]-amat->i[j]; 812 for (i=0; i<nz; i++) { 813 for (col=0; col<bs; col++){ 814 for (row=0; row<bs; row++){ 815 sums[row] += PetscAbsScalar(*v); v++; 816 } 817 } 818 } 819 v = bmat->a + bs2*bmat->i[j]; 820 nz = bmat->i[j+1]-bmat->i[j]; 821 for (i=0; i<nz; i++) { 822 for (col=0; col<bs; col++){ 823 for (row=0; row<bs; row++){ 824 sums[row] += PetscAbsScalar(*v); v++; 825 } 826 } 827 } 828 for (row=0; row<bs; row++){ 829 if (sums[row] > sum) sum = sums[row]; 830 } 831 } 832 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 833 ierr = PetscFree(sums);CHKERRQ(ierr); 834 } else { 835 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 836 } 837 } 838 PetscFunctionReturn(0); 839 } 840 841 /* 842 Creates the hash table, and sets the table 843 This table is created only once. 844 If new entried need to be added to the matrix 845 then the hash table has to be destroyed and 846 recreated. 847 */ 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 850 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 851 { 852 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 853 Mat A = baij->A,B=baij->B; 854 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 855 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 856 PetscErrorCode ierr; 857 PetscInt size,bs2=baij->bs2,rstart=baij->rstartbs; 858 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 859 PetscInt *HT,key; 860 MatScalar **HD; 861 PetscReal tmp; 862 #if defined(PETSC_USE_INFO) 863 PetscInt ct=0,max=0; 864 #endif 865 866 PetscFunctionBegin; 867 baij->ht_size=(PetscInt)(factor*nz); 868 size = baij->ht_size; 869 870 if (baij->ht) { 871 PetscFunctionReturn(0); 872 } 873 874 /* Allocate Memory for Hash Table */ 875 ierr = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 876 baij->ht = (PetscInt*)(baij->hd + size); 877 HD = baij->hd; 878 HT = baij->ht; 879 880 881 ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr); 882 883 884 /* Loop Over A */ 885 for (i=0; i<a->mbs; i++) { 886 for (j=ai[i]; j<ai[i+1]; j++) { 887 row = i+rstart; 888 col = aj[j]+cstart; 889 890 key = row*Nbs + col + 1; 891 h1 = HASH(size,key,tmp); 892 for (k=0; k<size; k++){ 893 if (!HT[(h1+k)%size]) { 894 HT[(h1+k)%size] = key; 895 HD[(h1+k)%size] = a->a + j*bs2; 896 break; 897 #if defined(PETSC_USE_INFO) 898 } else { 899 ct++; 900 #endif 901 } 902 } 903 #if defined(PETSC_USE_INFO) 904 if (k> max) max = k; 905 #endif 906 } 907 } 908 /* Loop Over B */ 909 for (i=0; i<b->mbs; i++) { 910 for (j=bi[i]; j<bi[i+1]; j++) { 911 row = i+rstart; 912 col = garray[bj[j]]; 913 key = row*Nbs + col + 1; 914 h1 = HASH(size,key,tmp); 915 for (k=0; k<size; k++){ 916 if (!HT[(h1+k)%size]) { 917 HT[(h1+k)%size] = key; 918 HD[(h1+k)%size] = b->a + j*bs2; 919 break; 920 #if defined(PETSC_USE_INFO) 921 } else { 922 ct++; 923 #endif 924 } 925 } 926 #if defined(PETSC_USE_INFO) 927 if (k> max) max = k; 928 #endif 929 } 930 } 931 932 /* Print Summary */ 933 #if defined(PETSC_USE_INFO) 934 for (i=0,j=0; i<size; i++) { 935 if (HT[i]) {j++;} 936 } 937 ierr = PetscInfo2(0,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 938 #endif 939 PetscFunctionReturn(0); 940 } 941 942 #undef __FUNCT__ 943 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 944 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 945 { 946 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 947 PetscErrorCode ierr; 948 PetscInt nstash,reallocs; 949 InsertMode addv; 950 951 PetscFunctionBegin; 952 if (baij->donotstash) { 953 PetscFunctionReturn(0); 954 } 955 956 /* make sure all processors are either in INSERTMODE or ADDMODE */ 957 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 958 if (addv == (ADD_VALUES|INSERT_VALUES)) { 959 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 960 } 961 mat->insertmode = addv; /* in case this processor had no cache */ 962 963 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 964 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr); 965 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 966 ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 967 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 968 ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } 971 972 #undef __FUNCT__ 973 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 974 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 975 { 976 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 977 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 978 PetscErrorCode ierr; 979 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 980 PetscInt *row,*col,other_disassembled; 981 PetscTruth r1,r2,r3; 982 MatScalar *val; 983 InsertMode addv = mat->insertmode; 984 PetscMPIInt n; 985 986 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 987 PetscFunctionBegin; 988 if (!baij->donotstash) { 989 while (1) { 990 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 991 if (!flg) break; 992 993 for (i=0; i<n;) { 994 /* Now identify the consecutive vals belonging to the same row */ 995 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 996 if (j < n) ncols = j-i; 997 else ncols = n-i; 998 /* Now assemble all these values with a single function call */ 999 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 1000 i = j; 1001 } 1002 } 1003 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1004 /* Now process the block-stash. Since the values are stashed column-oriented, 1005 set the roworiented flag to column oriented, and after MatSetValues() 1006 restore the original flags */ 1007 r1 = baij->roworiented; 1008 r2 = a->roworiented; 1009 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 1010 baij->roworiented = PETSC_FALSE; 1011 a->roworiented = PETSC_FALSE; 1012 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 1013 while (1) { 1014 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1015 if (!flg) break; 1016 1017 for (i=0; i<n;) { 1018 /* Now identify the consecutive vals belonging to the same row */ 1019 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1020 if (j < n) ncols = j-i; 1021 else ncols = n-i; 1022 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1023 i = j; 1024 } 1025 } 1026 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1027 baij->roworiented = r1; 1028 a->roworiented = r2; 1029 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 1030 } 1031 1032 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1033 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1034 1035 /* determine if any processor has disassembled, if so we must 1036 also disassemble ourselfs, in order that we may reassemble. */ 1037 /* 1038 if nonzero structure of submatrix B cannot change then we know that 1039 no processor disassembled thus we can skip this stuff 1040 */ 1041 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1042 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1043 if (mat->was_assembled && !other_disassembled) { 1044 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1045 } 1046 } 1047 1048 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1049 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1050 } 1051 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 1052 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1053 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1054 1055 #if defined(PETSC_USE_INFO) 1056 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1057 ierr = PetscInfo1(0,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 1058 baij->ht_total_ct = 0; 1059 baij->ht_insert_ct = 0; 1060 } 1061 #endif 1062 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1063 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1064 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1065 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1066 } 1067 1068 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1069 baij->rowvalues = 0; 1070 PetscFunctionReturn(0); 1071 } 1072 1073 #undef __FUNCT__ 1074 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1075 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1076 { 1077 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1078 PetscErrorCode ierr; 1079 PetscMPIInt size = baij->size,rank = baij->rank; 1080 PetscInt bs = mat->rmap.bs; 1081 PetscTruth iascii,isdraw; 1082 PetscViewer sviewer; 1083 PetscViewerFormat format; 1084 1085 PetscFunctionBegin; 1086 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1087 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1088 if (iascii) { 1089 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1090 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1091 MatInfo info; 1092 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1093 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1094 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 1095 rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 1096 mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr); 1097 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1098 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1099 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1100 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1101 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1102 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1103 PetscFunctionReturn(0); 1104 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1105 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1106 PetscFunctionReturn(0); 1107 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1108 PetscFunctionReturn(0); 1109 } 1110 } 1111 1112 if (isdraw) { 1113 PetscDraw draw; 1114 PetscTruth isnull; 1115 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1116 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1117 } 1118 1119 if (size == 1) { 1120 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 1121 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1122 } else { 1123 /* assemble the entire matrix onto first processor. */ 1124 Mat A; 1125 Mat_SeqBAIJ *Aloc; 1126 PetscInt M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1127 MatScalar *a; 1128 1129 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1130 /* Perhaps this should be the type of mat? */ 1131 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 1132 if (!rank) { 1133 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1134 } else { 1135 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1136 } 1137 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1138 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1139 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1140 1141 /* copy over the A part */ 1142 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1143 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1144 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1145 1146 for (i=0; i<mbs; i++) { 1147 rvals[0] = bs*(baij->rstartbs + i); 1148 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1149 for (j=ai[i]; j<ai[i+1]; j++) { 1150 col = (baij->cstartbs+aj[j])*bs; 1151 for (k=0; k<bs; k++) { 1152 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1153 col++; a += bs; 1154 } 1155 } 1156 } 1157 /* copy over the B part */ 1158 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1159 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1160 for (i=0; i<mbs; i++) { 1161 rvals[0] = bs*(baij->rstartbs + i); 1162 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1163 for (j=ai[i]; j<ai[i+1]; j++) { 1164 col = baij->garray[aj[j]]*bs; 1165 for (k=0; k<bs; k++) { 1166 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1167 col++; a += bs; 1168 } 1169 } 1170 } 1171 ierr = PetscFree(rvals);CHKERRQ(ierr); 1172 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1173 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1174 /* 1175 Everyone has to call to draw the matrix since the graphics waits are 1176 synchronized across all processors that share the PetscDraw object 1177 */ 1178 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1179 if (!rank) { 1180 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1181 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1182 } 1183 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1184 ierr = MatDestroy(A);CHKERRQ(ierr); 1185 } 1186 PetscFunctionReturn(0); 1187 } 1188 1189 #undef __FUNCT__ 1190 #define __FUNCT__ "MatView_MPIBAIJ" 1191 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1192 { 1193 PetscErrorCode ierr; 1194 PetscTruth iascii,isdraw,issocket,isbinary; 1195 1196 PetscFunctionBegin; 1197 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1198 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1199 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1200 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1201 if (iascii || isdraw || issocket || isbinary) { 1202 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1203 } else { 1204 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1205 } 1206 PetscFunctionReturn(0); 1207 } 1208 1209 #undef __FUNCT__ 1210 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1211 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1212 { 1213 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1214 PetscErrorCode ierr; 1215 1216 PetscFunctionBegin; 1217 #if defined(PETSC_USE_LOG) 1218 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N); 1219 #endif 1220 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1221 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1222 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1223 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1224 #if defined (PETSC_USE_CTABLE) 1225 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1226 #else 1227 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1228 #endif 1229 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1230 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1231 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1232 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1233 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1234 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1235 #if defined(PETSC_USE_MAT_SINGLE) 1236 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 1237 #endif 1238 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1239 ierr = PetscFree(baij);CHKERRQ(ierr); 1240 1241 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1242 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1243 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1244 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1245 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1246 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1247 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1248 PetscFunctionReturn(0); 1249 } 1250 1251 #undef __FUNCT__ 1252 #define __FUNCT__ "MatMult_MPIBAIJ" 1253 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1254 { 1255 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1256 PetscErrorCode ierr; 1257 PetscInt nt; 1258 1259 PetscFunctionBegin; 1260 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1261 if (nt != A->cmap.n) { 1262 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1263 } 1264 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1265 if (nt != A->rmap.n) { 1266 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1267 } 1268 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1269 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1270 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1271 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1272 PetscFunctionReturn(0); 1273 } 1274 1275 #undef __FUNCT__ 1276 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1277 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1278 { 1279 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1280 PetscErrorCode ierr; 1281 1282 PetscFunctionBegin; 1283 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1284 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1285 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1286 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1287 PetscFunctionReturn(0); 1288 } 1289 1290 #undef __FUNCT__ 1291 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1292 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1293 { 1294 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1295 PetscErrorCode ierr; 1296 PetscTruth merged; 1297 1298 PetscFunctionBegin; 1299 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1300 /* do nondiagonal part */ 1301 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1302 if (!merged) { 1303 /* send it on its way */ 1304 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1305 /* do local part */ 1306 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1307 /* receive remote parts: note this assumes the values are not actually */ 1308 /* inserted in yy until the next line */ 1309 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1310 } else { 1311 /* do local part */ 1312 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1313 /* send it on its way */ 1314 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1315 /* values actually were received in the Begin() but we need to call this nop */ 1316 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1317 } 1318 PetscFunctionReturn(0); 1319 } 1320 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1323 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1324 { 1325 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1326 PetscErrorCode ierr; 1327 1328 PetscFunctionBegin; 1329 /* do nondiagonal part */ 1330 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1331 /* send it on its way */ 1332 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1333 /* do local part */ 1334 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1335 /* receive remote parts: note this assumes the values are not actually */ 1336 /* inserted in yy until the next line, which is true for my implementation*/ 1337 /* but is not perhaps always true. */ 1338 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } 1341 1342 /* 1343 This only works correctly for square matrices where the subblock A->A is the 1344 diagonal block 1345 */ 1346 #undef __FUNCT__ 1347 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1348 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1349 { 1350 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1351 PetscErrorCode ierr; 1352 1353 PetscFunctionBegin; 1354 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1355 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1356 PetscFunctionReturn(0); 1357 } 1358 1359 #undef __FUNCT__ 1360 #define __FUNCT__ "MatScale_MPIBAIJ" 1361 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1362 { 1363 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1364 PetscErrorCode ierr; 1365 1366 PetscFunctionBegin; 1367 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1368 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1369 PetscFunctionReturn(0); 1370 } 1371 1372 #undef __FUNCT__ 1373 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1374 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1375 { 1376 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1377 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1378 PetscErrorCode ierr; 1379 PetscInt bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1380 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend; 1381 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1382 1383 PetscFunctionBegin; 1384 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1385 mat->getrowactive = PETSC_TRUE; 1386 1387 if (!mat->rowvalues && (idx || v)) { 1388 /* 1389 allocate enough space to hold information from the longest row. 1390 */ 1391 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1392 PetscInt max = 1,mbs = mat->mbs,tmp; 1393 for (i=0; i<mbs; i++) { 1394 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1395 if (max < tmp) { max = tmp; } 1396 } 1397 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1398 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1399 } 1400 1401 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1402 lrow = row - brstart; 1403 1404 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1405 if (!v) {pvA = 0; pvB = 0;} 1406 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1407 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1408 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1409 nztot = nzA + nzB; 1410 1411 cmap = mat->garray; 1412 if (v || idx) { 1413 if (nztot) { 1414 /* Sort by increasing column numbers, assuming A and B already sorted */ 1415 PetscInt imark = -1; 1416 if (v) { 1417 *v = v_p = mat->rowvalues; 1418 for (i=0; i<nzB; i++) { 1419 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1420 else break; 1421 } 1422 imark = i; 1423 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1424 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1425 } 1426 if (idx) { 1427 *idx = idx_p = mat->rowindices; 1428 if (imark > -1) { 1429 for (i=0; i<imark; i++) { 1430 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1431 } 1432 } else { 1433 for (i=0; i<nzB; i++) { 1434 if (cmap[cworkB[i]/bs] < cstart) 1435 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1436 else break; 1437 } 1438 imark = i; 1439 } 1440 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1441 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1442 } 1443 } else { 1444 if (idx) *idx = 0; 1445 if (v) *v = 0; 1446 } 1447 } 1448 *nz = nztot; 1449 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1450 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1451 PetscFunctionReturn(0); 1452 } 1453 1454 #undef __FUNCT__ 1455 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1456 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1457 { 1458 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1459 1460 PetscFunctionBegin; 1461 if (!baij->getrowactive) { 1462 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1463 } 1464 baij->getrowactive = PETSC_FALSE; 1465 PetscFunctionReturn(0); 1466 } 1467 1468 #undef __FUNCT__ 1469 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1470 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1471 { 1472 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1473 PetscErrorCode ierr; 1474 1475 PetscFunctionBegin; 1476 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1477 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1478 PetscFunctionReturn(0); 1479 } 1480 1481 #undef __FUNCT__ 1482 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1483 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1484 { 1485 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1486 Mat A = a->A,B = a->B; 1487 PetscErrorCode ierr; 1488 PetscReal isend[5],irecv[5]; 1489 1490 PetscFunctionBegin; 1491 info->block_size = (PetscReal)matin->rmap.bs; 1492 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1493 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1494 isend[3] = info->memory; isend[4] = info->mallocs; 1495 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1496 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1497 isend[3] += info->memory; isend[4] += info->mallocs; 1498 if (flag == MAT_LOCAL) { 1499 info->nz_used = isend[0]; 1500 info->nz_allocated = isend[1]; 1501 info->nz_unneeded = isend[2]; 1502 info->memory = isend[3]; 1503 info->mallocs = isend[4]; 1504 } else if (flag == MAT_GLOBAL_MAX) { 1505 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1506 info->nz_used = irecv[0]; 1507 info->nz_allocated = irecv[1]; 1508 info->nz_unneeded = irecv[2]; 1509 info->memory = irecv[3]; 1510 info->mallocs = irecv[4]; 1511 } else if (flag == MAT_GLOBAL_SUM) { 1512 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1513 info->nz_used = irecv[0]; 1514 info->nz_allocated = irecv[1]; 1515 info->nz_unneeded = irecv[2]; 1516 info->memory = irecv[3]; 1517 info->mallocs = irecv[4]; 1518 } else { 1519 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1520 } 1521 info->rows_global = (PetscReal)A->rmap.N; 1522 info->columns_global = (PetscReal)A->cmap.N; 1523 info->rows_local = (PetscReal)A->rmap.N; 1524 info->columns_local = (PetscReal)A->cmap.N; 1525 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1526 info->fill_ratio_needed = 0; 1527 info->factor_mallocs = 0; 1528 PetscFunctionReturn(0); 1529 } 1530 1531 #undef __FUNCT__ 1532 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1533 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op) 1534 { 1535 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1536 PetscErrorCode ierr; 1537 1538 PetscFunctionBegin; 1539 switch (op) { 1540 case MAT_NO_NEW_NONZERO_LOCATIONS: 1541 case MAT_YES_NEW_NONZERO_LOCATIONS: 1542 case MAT_COLUMNS_UNSORTED: 1543 case MAT_COLUMNS_SORTED: 1544 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1545 case MAT_KEEP_ZEROED_ROWS: 1546 case MAT_NEW_NONZERO_LOCATION_ERR: 1547 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1548 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1549 break; 1550 case MAT_ROW_ORIENTED: 1551 a->roworiented = PETSC_TRUE; 1552 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1553 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1554 break; 1555 case MAT_ROWS_SORTED: 1556 case MAT_ROWS_UNSORTED: 1557 case MAT_YES_NEW_DIAGONALS: 1558 ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr); 1559 break; 1560 case MAT_COLUMN_ORIENTED: 1561 a->roworiented = PETSC_FALSE; 1562 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1563 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1564 break; 1565 case MAT_IGNORE_OFF_PROC_ENTRIES: 1566 a->donotstash = PETSC_TRUE; 1567 break; 1568 case MAT_NO_NEW_DIAGONALS: 1569 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1570 case MAT_USE_HASH_TABLE: 1571 a->ht_flag = PETSC_TRUE; 1572 break; 1573 case MAT_SYMMETRIC: 1574 case MAT_STRUCTURALLY_SYMMETRIC: 1575 case MAT_HERMITIAN: 1576 case MAT_SYMMETRY_ETERNAL: 1577 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1578 break; 1579 case MAT_NOT_SYMMETRIC: 1580 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1581 case MAT_NOT_HERMITIAN: 1582 case MAT_NOT_SYMMETRY_ETERNAL: 1583 break; 1584 default: 1585 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1586 } 1587 PetscFunctionReturn(0); 1588 } 1589 1590 #undef __FUNCT__ 1591 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1592 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1593 { 1594 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1595 Mat_SeqBAIJ *Aloc; 1596 Mat B; 1597 PetscErrorCode ierr; 1598 PetscInt M=A->rmap.N,N=A->cmap.N,*ai,*aj,i,*rvals,j,k,col; 1599 PetscInt bs=A->rmap.bs,mbs=baij->mbs; 1600 MatScalar *a; 1601 1602 PetscFunctionBegin; 1603 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1604 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1605 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1606 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1607 ierr = MatMPIBAIJSetPreallocation(B,A->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1608 1609 /* copy over the A part */ 1610 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1611 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1612 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1613 1614 for (i=0; i<mbs; i++) { 1615 rvals[0] = bs*(baij->rstartbs + i); 1616 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1617 for (j=ai[i]; j<ai[i+1]; j++) { 1618 col = (baij->cstartbs+aj[j])*bs; 1619 for (k=0; k<bs; k++) { 1620 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1621 col++; a += bs; 1622 } 1623 } 1624 } 1625 /* copy over the B part */ 1626 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1627 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1628 for (i=0; i<mbs; i++) { 1629 rvals[0] = bs*(baij->rstartbs + i); 1630 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1631 for (j=ai[i]; j<ai[i+1]; j++) { 1632 col = baij->garray[aj[j]]*bs; 1633 for (k=0; k<bs; k++) { 1634 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1635 col++; a += bs; 1636 } 1637 } 1638 } 1639 ierr = PetscFree(rvals);CHKERRQ(ierr); 1640 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1641 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1642 1643 if (matout) { 1644 *matout = B; 1645 } else { 1646 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1647 } 1648 PetscFunctionReturn(0); 1649 } 1650 1651 #undef __FUNCT__ 1652 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1653 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1654 { 1655 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1656 Mat a = baij->A,b = baij->B; 1657 PetscErrorCode ierr; 1658 PetscInt s1,s2,s3; 1659 1660 PetscFunctionBegin; 1661 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1662 if (rr) { 1663 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1664 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1665 /* Overlap communication with computation. */ 1666 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1667 } 1668 if (ll) { 1669 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1670 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1671 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1672 } 1673 /* scale the diagonal block */ 1674 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1675 1676 if (rr) { 1677 /* Do a scatter end and then right scale the off-diagonal block */ 1678 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1679 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1680 } 1681 1682 PetscFunctionReturn(0); 1683 } 1684 1685 #undef __FUNCT__ 1686 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1687 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 1688 { 1689 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1690 PetscErrorCode ierr; 1691 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1692 PetscInt i,*owners = A->rmap.range; 1693 PetscInt *nprocs,j,idx,nsends,row; 1694 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1695 PetscInt *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1; 1696 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap.rstart; 1697 MPI_Comm comm = A->comm; 1698 MPI_Request *send_waits,*recv_waits; 1699 MPI_Status recv_status,*send_status; 1700 #if defined(PETSC_DEBUG) 1701 PetscTruth found = PETSC_FALSE; 1702 #endif 1703 1704 PetscFunctionBegin; 1705 /* first count number of contributors to each processor */ 1706 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1707 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1708 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1709 j = 0; 1710 for (i=0; i<N; i++) { 1711 if (lastidx > (idx = rows[i])) j = 0; 1712 lastidx = idx; 1713 for (; j<size; j++) { 1714 if (idx >= owners[j] && idx < owners[j+1]) { 1715 nprocs[2*j]++; 1716 nprocs[2*j+1] = 1; 1717 owner[i] = j; 1718 #if defined(PETSC_DEBUG) 1719 found = PETSC_TRUE; 1720 #endif 1721 break; 1722 } 1723 } 1724 #if defined(PETSC_DEBUG) 1725 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1726 found = PETSC_FALSE; 1727 #endif 1728 } 1729 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1730 1731 /* inform other processors of number of messages and max length*/ 1732 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1733 1734 /* post receives: */ 1735 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1736 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1737 for (i=0; i<nrecvs; i++) { 1738 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1739 } 1740 1741 /* do sends: 1742 1) starts[i] gives the starting index in svalues for stuff going to 1743 the ith processor 1744 */ 1745 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1746 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1747 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1748 starts[0] = 0; 1749 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1750 for (i=0; i<N; i++) { 1751 svalues[starts[owner[i]]++] = rows[i]; 1752 } 1753 1754 starts[0] = 0; 1755 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1756 count = 0; 1757 for (i=0; i<size; i++) { 1758 if (nprocs[2*i+1]) { 1759 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1760 } 1761 } 1762 ierr = PetscFree(starts);CHKERRQ(ierr); 1763 1764 base = owners[rank]; 1765 1766 /* wait on receives */ 1767 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 1768 source = lens + nrecvs; 1769 count = nrecvs; slen = 0; 1770 while (count) { 1771 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1772 /* unpack receives into our local space */ 1773 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1774 source[imdex] = recv_status.MPI_SOURCE; 1775 lens[imdex] = n; 1776 slen += n; 1777 count--; 1778 } 1779 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1780 1781 /* move the data into the send scatter */ 1782 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1783 count = 0; 1784 for (i=0; i<nrecvs; i++) { 1785 values = rvalues + i*nmax; 1786 for (j=0; j<lens[i]; j++) { 1787 lrows[count++] = values[j] - base; 1788 } 1789 } 1790 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1791 ierr = PetscFree(lens);CHKERRQ(ierr); 1792 ierr = PetscFree(owner);CHKERRQ(ierr); 1793 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1794 1795 /* actually zap the local rows */ 1796 /* 1797 Zero the required rows. If the "diagonal block" of the matrix 1798 is square and the user wishes to set the diagonal we use separate 1799 code so that MatSetValues() is not called for each diagonal allocating 1800 new memory, thus calling lots of mallocs and slowing things down. 1801 1802 Contributed by: Matthew Knepley 1803 */ 1804 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1805 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1806 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 1807 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1808 } else if (diag != 0.0) { 1809 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1810 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1811 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1812 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1813 } 1814 for (i=0; i<slen; i++) { 1815 row = lrows[i] + rstart_bs; 1816 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1817 } 1818 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1819 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1820 } else { 1821 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1822 } 1823 1824 ierr = PetscFree(lrows);CHKERRQ(ierr); 1825 1826 /* wait on sends */ 1827 if (nsends) { 1828 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1829 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1830 ierr = PetscFree(send_status);CHKERRQ(ierr); 1831 } 1832 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1833 ierr = PetscFree(svalues);CHKERRQ(ierr); 1834 1835 PetscFunctionReturn(0); 1836 } 1837 1838 #undef __FUNCT__ 1839 #define __FUNCT__ "MatPrintHelp_MPIBAIJ" 1840 PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A) 1841 { 1842 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1843 MPI_Comm comm = A->comm; 1844 static PetscTruth called = PETSC_FALSE; 1845 PetscErrorCode ierr; 1846 1847 PetscFunctionBegin; 1848 if (!a->rank) { 1849 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1850 } 1851 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1852 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1853 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1854 PetscFunctionReturn(0); 1855 } 1856 1857 #undef __FUNCT__ 1858 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1859 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1860 { 1861 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1862 PetscErrorCode ierr; 1863 1864 PetscFunctionBegin; 1865 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1866 PetscFunctionReturn(0); 1867 } 1868 1869 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1870 1871 #undef __FUNCT__ 1872 #define __FUNCT__ "MatEqual_MPIBAIJ" 1873 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1874 { 1875 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1876 Mat a,b,c,d; 1877 PetscTruth flg; 1878 PetscErrorCode ierr; 1879 1880 PetscFunctionBegin; 1881 a = matA->A; b = matA->B; 1882 c = matB->A; d = matB->B; 1883 1884 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1885 if (flg) { 1886 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1887 } 1888 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1889 PetscFunctionReturn(0); 1890 } 1891 1892 #undef __FUNCT__ 1893 #define __FUNCT__ "MatCopy_MPIBAIJ" 1894 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1895 { 1896 PetscErrorCode ierr; 1897 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1898 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1899 1900 PetscFunctionBegin; 1901 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1902 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1903 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1904 } else { 1905 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1906 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1907 } 1908 PetscFunctionReturn(0); 1909 } 1910 1911 #undef __FUNCT__ 1912 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1913 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1914 { 1915 PetscErrorCode ierr; 1916 1917 PetscFunctionBegin; 1918 ierr = MatMPIBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1919 PetscFunctionReturn(0); 1920 } 1921 1922 #include "petscblaslapack.h" 1923 #undef __FUNCT__ 1924 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1925 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1926 { 1927 PetscErrorCode ierr; 1928 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1929 PetscBLASInt bnz,one=1; 1930 Mat_SeqBAIJ *x,*y; 1931 1932 PetscFunctionBegin; 1933 if (str == SAME_NONZERO_PATTERN) { 1934 PetscScalar alpha = a; 1935 x = (Mat_SeqBAIJ *)xx->A->data; 1936 y = (Mat_SeqBAIJ *)yy->A->data; 1937 bnz = (PetscBLASInt)x->nz; 1938 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1939 x = (Mat_SeqBAIJ *)xx->B->data; 1940 y = (Mat_SeqBAIJ *)yy->B->data; 1941 bnz = (PetscBLASInt)x->nz; 1942 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1943 } else { 1944 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1945 } 1946 PetscFunctionReturn(0); 1947 } 1948 1949 #undef __FUNCT__ 1950 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1951 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1952 { 1953 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1954 PetscErrorCode ierr; 1955 1956 PetscFunctionBegin; 1957 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1958 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1959 PetscFunctionReturn(0); 1960 } 1961 1962 #undef __FUNCT__ 1963 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1964 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1965 { 1966 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1967 PetscErrorCode ierr; 1968 1969 PetscFunctionBegin; 1970 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1971 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1972 PetscFunctionReturn(0); 1973 } 1974 1975 /* -------------------------------------------------------------------*/ 1976 static struct _MatOps MatOps_Values = { 1977 MatSetValues_MPIBAIJ, 1978 MatGetRow_MPIBAIJ, 1979 MatRestoreRow_MPIBAIJ, 1980 MatMult_MPIBAIJ, 1981 /* 4*/ MatMultAdd_MPIBAIJ, 1982 MatMultTranspose_MPIBAIJ, 1983 MatMultTransposeAdd_MPIBAIJ, 1984 0, 1985 0, 1986 0, 1987 /*10*/ 0, 1988 0, 1989 0, 1990 0, 1991 MatTranspose_MPIBAIJ, 1992 /*15*/ MatGetInfo_MPIBAIJ, 1993 MatEqual_MPIBAIJ, 1994 MatGetDiagonal_MPIBAIJ, 1995 MatDiagonalScale_MPIBAIJ, 1996 MatNorm_MPIBAIJ, 1997 /*20*/ MatAssemblyBegin_MPIBAIJ, 1998 MatAssemblyEnd_MPIBAIJ, 1999 0, 2000 MatSetOption_MPIBAIJ, 2001 MatZeroEntries_MPIBAIJ, 2002 /*25*/ MatZeroRows_MPIBAIJ, 2003 0, 2004 0, 2005 0, 2006 0, 2007 /*30*/ MatSetUpPreallocation_MPIBAIJ, 2008 0, 2009 0, 2010 0, 2011 0, 2012 /*35*/ MatDuplicate_MPIBAIJ, 2013 0, 2014 0, 2015 0, 2016 0, 2017 /*40*/ MatAXPY_MPIBAIJ, 2018 MatGetSubMatrices_MPIBAIJ, 2019 MatIncreaseOverlap_MPIBAIJ, 2020 MatGetValues_MPIBAIJ, 2021 MatCopy_MPIBAIJ, 2022 /*45*/ MatPrintHelp_MPIBAIJ, 2023 MatScale_MPIBAIJ, 2024 0, 2025 0, 2026 0, 2027 /*50*/ 0, 2028 0, 2029 0, 2030 0, 2031 0, 2032 /*55*/ 0, 2033 0, 2034 MatSetUnfactored_MPIBAIJ, 2035 0, 2036 MatSetValuesBlocked_MPIBAIJ, 2037 /*60*/ 0, 2038 MatDestroy_MPIBAIJ, 2039 MatView_MPIBAIJ, 2040 0, 2041 0, 2042 /*65*/ 0, 2043 0, 2044 0, 2045 0, 2046 0, 2047 /*70*/ MatGetRowMax_MPIBAIJ, 2048 0, 2049 0, 2050 0, 2051 0, 2052 /*75*/ 0, 2053 0, 2054 0, 2055 0, 2056 0, 2057 /*80*/ 0, 2058 0, 2059 0, 2060 0, 2061 MatLoad_MPIBAIJ, 2062 /*85*/ 0, 2063 0, 2064 0, 2065 0, 2066 0, 2067 /*90*/ 0, 2068 0, 2069 0, 2070 0, 2071 0, 2072 /*95*/ 0, 2073 0, 2074 0, 2075 0, 2076 0, 2077 /*100*/0, 2078 0, 2079 0, 2080 0, 2081 0, 2082 /*105*/0, 2083 MatRealPart_MPIBAIJ, 2084 MatImaginaryPart_MPIBAIJ}; 2085 2086 2087 EXTERN_C_BEGIN 2088 #undef __FUNCT__ 2089 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2090 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2091 { 2092 PetscFunctionBegin; 2093 *a = ((Mat_MPIBAIJ *)A->data)->A; 2094 *iscopy = PETSC_FALSE; 2095 PetscFunctionReturn(0); 2096 } 2097 EXTERN_C_END 2098 2099 EXTERN_C_BEGIN 2100 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2101 EXTERN_C_END 2102 2103 #undef __FUNCT__ 2104 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2105 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 2106 { 2107 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)B->data; 2108 PetscInt m = B->rmap.n/bs,cstart = baij->cstartbs, cend = baij->cendbs,j,nnz,i,d; 2109 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = baij->rstartbs,ii; 2110 const PetscInt *JJ; 2111 PetscScalar *values; 2112 PetscErrorCode ierr; 2113 2114 PetscFunctionBegin; 2115 #if defined(PETSC_OPT_g) 2116 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"Ii[0] must be 0 it is %D",Ii[0]); 2117 #endif 2118 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2119 o_nnz = d_nnz + m; 2120 2121 for (i=0; i<m; i++) { 2122 nnz = Ii[i+1]- Ii[i]; 2123 JJ = J + Ii[i]; 2124 nnz_max = PetscMax(nnz_max,nnz); 2125 #if defined(PETSC_OPT_g) 2126 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2127 #endif 2128 for (j=0; j<nnz; j++) { 2129 if (*JJ >= cstart) break; 2130 JJ++; 2131 } 2132 d = 0; 2133 for (; j<nnz; j++) { 2134 if (*JJ++ >= cend) break; 2135 d++; 2136 } 2137 d_nnz[i] = d; 2138 o_nnz[i] = nnz - d; 2139 } 2140 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2141 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2142 2143 if (v) values = (PetscScalar*)v; 2144 else { 2145 ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2146 ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2147 } 2148 2149 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2150 for (i=0; i<m; i++) { 2151 ii = i + rstart; 2152 nnz = Ii[i+1]- Ii[i]; 2153 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2154 } 2155 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2156 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2157 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2158 2159 if (!v) { 2160 ierr = PetscFree(values);CHKERRQ(ierr); 2161 } 2162 PetscFunctionReturn(0); 2163 } 2164 2165 #undef __FUNCT__ 2166 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2167 /*@C 2168 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2169 (the default parallel PETSc format). 2170 2171 Collective on MPI_Comm 2172 2173 Input Parameters: 2174 + A - the matrix 2175 . i - the indices into j for the start of each local row (starts with zero) 2176 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2177 - v - optional values in the matrix 2178 2179 Level: developer 2180 2181 .keywords: matrix, aij, compressed row, sparse, parallel 2182 2183 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2184 @*/ 2185 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2186 { 2187 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2188 2189 PetscFunctionBegin; 2190 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2191 if (f) { 2192 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2193 } 2194 PetscFunctionReturn(0); 2195 } 2196 2197 EXTERN_C_BEGIN 2198 #undef __FUNCT__ 2199 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2200 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2201 { 2202 Mat_MPIBAIJ *b; 2203 PetscErrorCode ierr; 2204 PetscInt i; 2205 2206 PetscFunctionBegin; 2207 B->preallocated = PETSC_TRUE; 2208 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2209 2210 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2211 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2212 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2213 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2214 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2215 2216 B->rmap.bs = bs; 2217 B->cmap.bs = bs; 2218 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2219 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2220 2221 if (d_nnz) { 2222 for (i=0; i<B->rmap.n/bs; i++) { 2223 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2224 } 2225 } 2226 if (o_nnz) { 2227 for (i=0; i<B->rmap.n/bs; i++) { 2228 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2229 } 2230 } 2231 2232 b = (Mat_MPIBAIJ*)B->data; 2233 b->bs2 = bs*bs; 2234 b->mbs = B->rmap.n/bs; 2235 b->nbs = B->cmap.n/bs; 2236 b->Mbs = B->rmap.N/bs; 2237 b->Nbs = B->cmap.N/bs; 2238 2239 for (i=0; i<=b->size; i++) { 2240 b->rangebs[i] = B->rmap.range[i]/bs; 2241 } 2242 b->rstartbs = B->rmap.rstart/bs; 2243 b->rendbs = B->rmap.rend/bs; 2244 b->cstartbs = B->cmap.rstart/bs; 2245 b->cendbs = B->cmap.rend/bs; 2246 2247 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2248 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 2249 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2250 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2251 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2252 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2253 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 2254 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2255 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2256 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2257 2258 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2259 2260 PetscFunctionReturn(0); 2261 } 2262 EXTERN_C_END 2263 2264 EXTERN_C_BEGIN 2265 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2266 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2267 EXTERN_C_END 2268 2269 /*MC 2270 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2271 2272 Options Database Keys: 2273 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2274 2275 Level: beginner 2276 2277 .seealso: MatCreateMPIBAIJ 2278 M*/ 2279 2280 EXTERN_C_BEGIN 2281 #undef __FUNCT__ 2282 #define __FUNCT__ "MatCreate_MPIBAIJ" 2283 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2284 { 2285 Mat_MPIBAIJ *b; 2286 PetscErrorCode ierr; 2287 PetscTruth flg; 2288 2289 PetscFunctionBegin; 2290 ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2291 B->data = (void*)b; 2292 2293 2294 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2295 B->mapping = 0; 2296 B->factor = 0; 2297 B->assembled = PETSC_FALSE; 2298 2299 B->insertmode = NOT_SET_VALUES; 2300 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2301 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2302 2303 /* build local table of row and column ownerships */ 2304 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 2305 2306 /* build cache for off array entries formed */ 2307 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2308 b->donotstash = PETSC_FALSE; 2309 b->colmap = PETSC_NULL; 2310 b->garray = PETSC_NULL; 2311 b->roworiented = PETSC_TRUE; 2312 2313 #if defined(PETSC_USE_MAT_SINGLE) 2314 /* stuff for MatSetValues_XXX in single precision */ 2315 b->setvalueslen = 0; 2316 b->setvaluescopy = PETSC_NULL; 2317 #endif 2318 2319 /* stuff used in block assembly */ 2320 b->barray = 0; 2321 2322 /* stuff used for matrix vector multiply */ 2323 b->lvec = 0; 2324 b->Mvctx = 0; 2325 2326 /* stuff for MatGetRow() */ 2327 b->rowindices = 0; 2328 b->rowvalues = 0; 2329 b->getrowactive = PETSC_FALSE; 2330 2331 /* hash table stuff */ 2332 b->ht = 0; 2333 b->hd = 0; 2334 b->ht_size = 0; 2335 b->ht_flag = PETSC_FALSE; 2336 b->ht_fact = 0; 2337 b->ht_total_ct = 0; 2338 b->ht_insert_ct = 0; 2339 2340 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2341 if (flg) { 2342 PetscReal fact = 1.39; 2343 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2344 ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2345 if (fact <= 1.0) fact = 1.39; 2346 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2347 ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2348 } 2349 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2350 "MatStoreValues_MPIBAIJ", 2351 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2352 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2353 "MatRetrieveValues_MPIBAIJ", 2354 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2355 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2356 "MatGetDiagonalBlock_MPIBAIJ", 2357 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2358 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2359 "MatMPIBAIJSetPreallocation_MPIBAIJ", 2360 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2361 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 2362 "MatMPIBAIJSetPreallocationCSR_MPIAIJ", 2363 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 2364 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 2365 "MatDiagonalScaleLocal_MPIBAIJ", 2366 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2367 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 2368 "MatSetHashTableFactor_MPIBAIJ", 2369 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2370 PetscFunctionReturn(0); 2371 } 2372 EXTERN_C_END 2373 2374 /*MC 2375 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2376 2377 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2378 and MATMPIBAIJ otherwise. 2379 2380 Options Database Keys: 2381 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2382 2383 Level: beginner 2384 2385 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2386 M*/ 2387 2388 EXTERN_C_BEGIN 2389 #undef __FUNCT__ 2390 #define __FUNCT__ "MatCreate_BAIJ" 2391 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 2392 { 2393 PetscErrorCode ierr; 2394 PetscMPIInt size; 2395 2396 PetscFunctionBegin; 2397 ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr); 2398 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2399 if (size == 1) { 2400 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2401 } else { 2402 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2403 } 2404 PetscFunctionReturn(0); 2405 } 2406 EXTERN_C_END 2407 2408 #undef __FUNCT__ 2409 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2410 /*@C 2411 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2412 (block compressed row). For good matrix assembly performance 2413 the user should preallocate the matrix storage by setting the parameters 2414 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2415 performance can be increased by more than a factor of 50. 2416 2417 Collective on Mat 2418 2419 Input Parameters: 2420 + A - the matrix 2421 . bs - size of blockk 2422 . d_nz - number of block nonzeros per block row in diagonal portion of local 2423 submatrix (same for all local rows) 2424 . d_nnz - array containing the number of block nonzeros in the various block rows 2425 of the in diagonal portion of the local (possibly different for each block 2426 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2427 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2428 submatrix (same for all local rows). 2429 - o_nnz - array containing the number of nonzeros in the various block rows of the 2430 off-diagonal portion of the local submatrix (possibly different for 2431 each block row) or PETSC_NULL. 2432 2433 If the *_nnz parameter is given then the *_nz parameter is ignored 2434 2435 Options Database Keys: 2436 . -mat_no_unroll - uses code that does not unroll the loops in the 2437 block calculations (much slower) 2438 . -mat_block_size - size of the blocks to use 2439 2440 Notes: 2441 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2442 than it must be used on all processors that share the object for that argument. 2443 2444 Storage Information: 2445 For a square global matrix we define each processor's diagonal portion 2446 to be its local rows and the corresponding columns (a square submatrix); 2447 each processor's off-diagonal portion encompasses the remainder of the 2448 local matrix (a rectangular submatrix). 2449 2450 The user can specify preallocated storage for the diagonal part of 2451 the local submatrix with either d_nz or d_nnz (not both). Set 2452 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2453 memory allocation. Likewise, specify preallocated storage for the 2454 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2455 2456 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2457 the figure below we depict these three local rows and all columns (0-11). 2458 2459 .vb 2460 0 1 2 3 4 5 6 7 8 9 10 11 2461 ------------------- 2462 row 3 | o o o d d d o o o o o o 2463 row 4 | o o o d d d o o o o o o 2464 row 5 | o o o d d d o o o o o o 2465 ------------------- 2466 .ve 2467 2468 Thus, any entries in the d locations are stored in the d (diagonal) 2469 submatrix, and any entries in the o locations are stored in the 2470 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2471 stored simply in the MATSEQBAIJ format for compressed row storage. 2472 2473 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2474 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2475 In general, for PDE problems in which most nonzeros are near the diagonal, 2476 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2477 or you will get TERRIBLE performance; see the users' manual chapter on 2478 matrices. 2479 2480 Level: intermediate 2481 2482 .keywords: matrix, block, aij, compressed row, sparse, parallel 2483 2484 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 2485 @*/ 2486 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2487 { 2488 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2489 2490 PetscFunctionBegin; 2491 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2492 if (f) { 2493 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2494 } 2495 PetscFunctionReturn(0); 2496 } 2497 2498 #undef __FUNCT__ 2499 #define __FUNCT__ "MatCreateMPIBAIJ" 2500 /*@C 2501 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2502 (block compressed row). For good matrix assembly performance 2503 the user should preallocate the matrix storage by setting the parameters 2504 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2505 performance can be increased by more than a factor of 50. 2506 2507 Collective on MPI_Comm 2508 2509 Input Parameters: 2510 + comm - MPI communicator 2511 . bs - size of blockk 2512 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2513 This value should be the same as the local size used in creating the 2514 y vector for the matrix-vector product y = Ax. 2515 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2516 This value should be the same as the local size used in creating the 2517 x vector for the matrix-vector product y = Ax. 2518 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2519 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2520 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2521 submatrix (same for all local rows) 2522 . d_nnz - array containing the number of nonzero blocks in the various block rows 2523 of the in diagonal portion of the local (possibly different for each block 2524 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2525 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2526 submatrix (same for all local rows). 2527 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2528 off-diagonal portion of the local submatrix (possibly different for 2529 each block row) or PETSC_NULL. 2530 2531 Output Parameter: 2532 . A - the matrix 2533 2534 Options Database Keys: 2535 . -mat_no_unroll - uses code that does not unroll the loops in the 2536 block calculations (much slower) 2537 . -mat_block_size - size of the blocks to use 2538 2539 Notes: 2540 If the *_nnz parameter is given then the *_nz parameter is ignored 2541 2542 A nonzero block is any block that as 1 or more nonzeros in it 2543 2544 The user MUST specify either the local or global matrix dimensions 2545 (possibly both). 2546 2547 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2548 than it must be used on all processors that share the object for that argument. 2549 2550 Storage Information: 2551 For a square global matrix we define each processor's diagonal portion 2552 to be its local rows and the corresponding columns (a square submatrix); 2553 each processor's off-diagonal portion encompasses the remainder of the 2554 local matrix (a rectangular submatrix). 2555 2556 The user can specify preallocated storage for the diagonal part of 2557 the local submatrix with either d_nz or d_nnz (not both). Set 2558 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2559 memory allocation. Likewise, specify preallocated storage for the 2560 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2561 2562 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2563 the figure below we depict these three local rows and all columns (0-11). 2564 2565 .vb 2566 0 1 2 3 4 5 6 7 8 9 10 11 2567 ------------------- 2568 row 3 | o o o d d d o o o o o o 2569 row 4 | o o o d d d o o o o o o 2570 row 5 | o o o d d d o o o o o o 2571 ------------------- 2572 .ve 2573 2574 Thus, any entries in the d locations are stored in the d (diagonal) 2575 submatrix, and any entries in the o locations are stored in the 2576 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2577 stored simply in the MATSEQBAIJ format for compressed row storage. 2578 2579 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2580 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2581 In general, for PDE problems in which most nonzeros are near the diagonal, 2582 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2583 or you will get TERRIBLE performance; see the users' manual chapter on 2584 matrices. 2585 2586 Level: intermediate 2587 2588 .keywords: matrix, block, aij, compressed row, sparse, parallel 2589 2590 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2591 @*/ 2592 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2593 { 2594 PetscErrorCode ierr; 2595 PetscMPIInt size; 2596 2597 PetscFunctionBegin; 2598 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2599 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2600 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2601 if (size > 1) { 2602 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2603 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2604 } else { 2605 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2606 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2607 } 2608 PetscFunctionReturn(0); 2609 } 2610 2611 #undef __FUNCT__ 2612 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 2613 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2614 { 2615 Mat mat; 2616 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2617 PetscErrorCode ierr; 2618 PetscInt len=0; 2619 2620 PetscFunctionBegin; 2621 *newmat = 0; 2622 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2623 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2624 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2625 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2626 2627 mat->factor = matin->factor; 2628 mat->preallocated = PETSC_TRUE; 2629 mat->assembled = PETSC_TRUE; 2630 mat->insertmode = NOT_SET_VALUES; 2631 2632 a = (Mat_MPIBAIJ*)mat->data; 2633 mat->rmap.bs = matin->rmap.bs; 2634 a->bs2 = oldmat->bs2; 2635 a->mbs = oldmat->mbs; 2636 a->nbs = oldmat->nbs; 2637 a->Mbs = oldmat->Mbs; 2638 a->Nbs = oldmat->Nbs; 2639 2640 ierr = PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2641 ierr = PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2642 2643 a->size = oldmat->size; 2644 a->rank = oldmat->rank; 2645 a->donotstash = oldmat->donotstash; 2646 a->roworiented = oldmat->roworiented; 2647 a->rowindices = 0; 2648 a->rowvalues = 0; 2649 a->getrowactive = PETSC_FALSE; 2650 a->barray = 0; 2651 a->rstartbs = oldmat->rstartbs; 2652 a->rendbs = oldmat->rendbs; 2653 a->cstartbs = oldmat->cstartbs; 2654 a->cendbs = oldmat->cendbs; 2655 2656 /* hash table stuff */ 2657 a->ht = 0; 2658 a->hd = 0; 2659 a->ht_size = 0; 2660 a->ht_flag = oldmat->ht_flag; 2661 a->ht_fact = oldmat->ht_fact; 2662 a->ht_total_ct = 0; 2663 a->ht_insert_ct = 0; 2664 2665 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 2666 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2667 ierr = MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr); 2668 if (oldmat->colmap) { 2669 #if defined (PETSC_USE_CTABLE) 2670 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2671 #else 2672 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2673 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2674 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2675 #endif 2676 } else a->colmap = 0; 2677 2678 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2679 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2680 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2681 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2682 } else a->garray = 0; 2683 2684 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2685 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2686 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2687 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2688 2689 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2690 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2691 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2692 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2693 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2694 *newmat = mat; 2695 2696 PetscFunctionReturn(0); 2697 } 2698 2699 #include "petscsys.h" 2700 2701 #undef __FUNCT__ 2702 #define __FUNCT__ "MatLoad_MPIBAIJ" 2703 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2704 { 2705 Mat A; 2706 PetscErrorCode ierr; 2707 int fd; 2708 PetscInt i,nz,j,rstart,rend; 2709 PetscScalar *vals,*buf; 2710 MPI_Comm comm = ((PetscObject)viewer)->comm; 2711 MPI_Status status; 2712 PetscMPIInt rank,size,maxnz; 2713 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 2714 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 2715 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 2716 PetscMPIInt tag = ((PetscObject)viewer)->tag; 2717 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 2718 PetscInt dcount,kmax,k,nzcount,tmp,mend; 2719 2720 PetscFunctionBegin; 2721 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2722 2723 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2724 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2725 if (!rank) { 2726 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2727 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2728 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2729 } 2730 2731 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2732 M = header[1]; N = header[2]; 2733 2734 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2735 2736 /* 2737 This code adds extra rows to make sure the number of rows is 2738 divisible by the blocksize 2739 */ 2740 Mbs = M/bs; 2741 extra_rows = bs - M + bs*Mbs; 2742 if (extra_rows == bs) extra_rows = 0; 2743 else Mbs++; 2744 if (extra_rows && !rank) { 2745 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2746 } 2747 2748 /* determine ownership of all rows */ 2749 mbs = Mbs/size + ((Mbs % size) > rank); 2750 m = mbs*bs; 2751 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 2752 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2753 2754 /* process 0 needs enough room for process with most rows */ 2755 if (!rank) { 2756 mmax = rowners[1]; 2757 for (i=2; i<size; i++) { 2758 mmax = PetscMax(mmax,rowners[i]); 2759 } 2760 mmax*=bs; 2761 } else mmax = m; 2762 2763 rowners[0] = 0; 2764 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2765 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2766 rstart = rowners[rank]; 2767 rend = rowners[rank+1]; 2768 2769 /* distribute row lengths to all processors */ 2770 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 2771 if (!rank) { 2772 mend = m; 2773 if (size == 1) mend = mend - extra_rows; 2774 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 2775 for (j=mend; j<m; j++) locrowlens[j] = 1; 2776 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2777 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2778 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2779 for (j=0; j<m; j++) { 2780 procsnz[0] += locrowlens[j]; 2781 } 2782 for (i=1; i<size; i++) { 2783 mend = browners[i+1] - browners[i]; 2784 if (i == size-1) mend = mend - extra_rows; 2785 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 2786 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 2787 /* calculate the number of nonzeros on each processor */ 2788 for (j=0; j<browners[i+1]-browners[i]; j++) { 2789 procsnz[i] += rowlengths[j]; 2790 } 2791 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2792 } 2793 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2794 } else { 2795 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2796 } 2797 2798 if (!rank) { 2799 /* determine max buffer needed and allocate it */ 2800 maxnz = procsnz[0]; 2801 for (i=1; i<size; i++) { 2802 maxnz = PetscMax(maxnz,procsnz[i]); 2803 } 2804 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2805 2806 /* read in my part of the matrix column indices */ 2807 nz = procsnz[0]; 2808 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2809 mycols = ibuf; 2810 if (size == 1) nz -= extra_rows; 2811 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2812 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2813 2814 /* read in every ones (except the last) and ship off */ 2815 for (i=1; i<size-1; i++) { 2816 nz = procsnz[i]; 2817 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2818 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2819 } 2820 /* read in the stuff for the last proc */ 2821 if (size != 1) { 2822 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2823 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2824 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2825 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2826 } 2827 ierr = PetscFree(cols);CHKERRQ(ierr); 2828 } else { 2829 /* determine buffer space needed for message */ 2830 nz = 0; 2831 for (i=0; i<m; i++) { 2832 nz += locrowlens[i]; 2833 } 2834 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2835 mycols = ibuf; 2836 /* receive message of column indices*/ 2837 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2838 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2839 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2840 } 2841 2842 /* loop over local rows, determining number of off diagonal entries */ 2843 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2844 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2845 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2846 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2847 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2848 rowcount = 0; nzcount = 0; 2849 for (i=0; i<mbs; i++) { 2850 dcount = 0; 2851 odcount = 0; 2852 for (j=0; j<bs; j++) { 2853 kmax = locrowlens[rowcount]; 2854 for (k=0; k<kmax; k++) { 2855 tmp = mycols[nzcount++]/bs; 2856 if (!mask[tmp]) { 2857 mask[tmp] = 1; 2858 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2859 else masked1[dcount++] = tmp; 2860 } 2861 } 2862 rowcount++; 2863 } 2864 2865 dlens[i] = dcount; 2866 odlens[i] = odcount; 2867 2868 /* zero out the mask elements we set */ 2869 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2870 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2871 } 2872 2873 /* create our matrix */ 2874 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2875 ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2876 ierr = MatSetType(A,type);CHKERRQ(ierr) 2877 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2878 2879 /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */ 2880 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2881 2882 if (!rank) { 2883 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2884 /* read in my part of the matrix numerical values */ 2885 nz = procsnz[0]; 2886 vals = buf; 2887 mycols = ibuf; 2888 if (size == 1) nz -= extra_rows; 2889 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2890 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2891 2892 /* insert into matrix */ 2893 jj = rstart*bs; 2894 for (i=0; i<m; i++) { 2895 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2896 mycols += locrowlens[i]; 2897 vals += locrowlens[i]; 2898 jj++; 2899 } 2900 /* read in other processors (except the last one) and ship out */ 2901 for (i=1; i<size-1; i++) { 2902 nz = procsnz[i]; 2903 vals = buf; 2904 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2905 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2906 } 2907 /* the last proc */ 2908 if (size != 1){ 2909 nz = procsnz[i] - extra_rows; 2910 vals = buf; 2911 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2912 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2913 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2914 } 2915 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2916 } else { 2917 /* receive numeric values */ 2918 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2919 2920 /* receive message of values*/ 2921 vals = buf; 2922 mycols = ibuf; 2923 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2924 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2925 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2926 2927 /* insert into matrix */ 2928 jj = rstart*bs; 2929 for (i=0; i<m; i++) { 2930 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2931 mycols += locrowlens[i]; 2932 vals += locrowlens[i]; 2933 jj++; 2934 } 2935 } 2936 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2937 ierr = PetscFree(buf);CHKERRQ(ierr); 2938 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2939 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2940 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2941 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2942 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2943 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2944 2945 *newmat = A; 2946 PetscFunctionReturn(0); 2947 } 2948 2949 #undef __FUNCT__ 2950 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2951 /*@ 2952 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2953 2954 Input Parameters: 2955 . mat - the matrix 2956 . fact - factor 2957 2958 Collective on Mat 2959 2960 Level: advanced 2961 2962 Notes: 2963 This can also be set by the command line option: -mat_use_hash_table fact 2964 2965 .keywords: matrix, hashtable, factor, HT 2966 2967 .seealso: MatSetOption() 2968 @*/ 2969 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2970 { 2971 PetscErrorCode ierr,(*f)(Mat,PetscReal); 2972 2973 PetscFunctionBegin; 2974 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 2975 if (f) { 2976 ierr = (*f)(mat,fact);CHKERRQ(ierr); 2977 } 2978 PetscFunctionReturn(0); 2979 } 2980 2981 EXTERN_C_BEGIN 2982 #undef __FUNCT__ 2983 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 2984 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 2985 { 2986 Mat_MPIBAIJ *baij; 2987 2988 PetscFunctionBegin; 2989 baij = (Mat_MPIBAIJ*)mat->data; 2990 baij->ht_fact = fact; 2991 PetscFunctionReturn(0); 2992 } 2993 EXTERN_C_END 2994 2995 #undef __FUNCT__ 2996 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2997 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2998 { 2999 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3000 PetscFunctionBegin; 3001 *Ad = a->A; 3002 *Ao = a->B; 3003 *colmap = a->garray; 3004 PetscFunctionReturn(0); 3005 } 3006