1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 15 16 /* UGLY, ugly, ugly 17 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 18 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 19 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 20 converts the entries into single precision and then calls ..._MatScalar() to put them 21 into the single precision data structures. 22 */ 23 #if defined(PETSC_USE_MAT_SINGLE) 24 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 25 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 26 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 27 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 28 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 29 #else 30 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 31 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 32 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 33 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 34 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 35 #endif 36 37 #undef __FUNCT__ 38 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 39 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 40 { 41 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 42 PetscErrorCode ierr; 43 PetscInt i,*idxb = 0; 44 PetscScalar *va,*vb; 45 Vec vtmp; 46 47 PetscFunctionBegin; 48 49 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 50 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 51 if (idx) { 52 for (i=0; i<A->cmap.n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap.rstart;} 53 } 54 55 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap.n,&vtmp);CHKERRQ(ierr); 56 if (idx) {ierr = PetscMalloc(A->rmap.n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 57 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 58 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 59 60 for (i=0; i<A->rmap.n; i++){ 61 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap.bs*a->garray[idxb[i]/A->cmap.bs] + (idxb[i] % A->cmap.bs);} 62 } 63 64 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 65 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 66 if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);} 67 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 68 PetscFunctionReturn(0); 69 } 70 71 EXTERN_C_BEGIN 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 74 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 75 { 76 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 77 PetscErrorCode ierr; 78 79 PetscFunctionBegin; 80 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 81 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 82 PetscFunctionReturn(0); 83 } 84 EXTERN_C_END 85 86 EXTERN_C_BEGIN 87 #undef __FUNCT__ 88 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 89 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 90 { 91 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 92 PetscErrorCode ierr; 93 94 PetscFunctionBegin; 95 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 96 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 97 PetscFunctionReturn(0); 98 } 99 EXTERN_C_END 100 101 /* 102 Local utility routine that creates a mapping from the global column 103 number to the local number in the off-diagonal part of the local 104 storage of the matrix. This is done in a non scable way since the 105 length of colmap equals the global matrix length. 106 */ 107 #undef __FUNCT__ 108 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 109 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 110 { 111 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 112 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 113 PetscErrorCode ierr; 114 PetscInt nbs = B->nbs,i,bs=mat->rmap.bs; 115 116 PetscFunctionBegin; 117 #if defined (PETSC_USE_CTABLE) 118 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 119 for (i=0; i<nbs; i++){ 120 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 121 } 122 #else 123 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 124 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 125 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 126 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 127 #endif 128 PetscFunctionReturn(0); 129 } 130 131 #define CHUNKSIZE 10 132 133 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 134 { \ 135 \ 136 brow = row/bs; \ 137 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 138 rmax = aimax[brow]; nrow = ailen[brow]; \ 139 bcol = col/bs; \ 140 ridx = row % bs; cidx = col % bs; \ 141 low = 0; high = nrow; \ 142 while (high-low > 3) { \ 143 t = (low+high)/2; \ 144 if (rp[t] > bcol) high = t; \ 145 else low = t; \ 146 } \ 147 for (_i=low; _i<high; _i++) { \ 148 if (rp[_i] > bcol) break; \ 149 if (rp[_i] == bcol) { \ 150 bap = ap + bs2*_i + bs*cidx + ridx; \ 151 if (addv == ADD_VALUES) *bap += value; \ 152 else *bap = value; \ 153 goto a_noinsert; \ 154 } \ 155 } \ 156 if (a->nonew == 1) goto a_noinsert; \ 157 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 158 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 159 N = nrow++ - 1; \ 160 /* shift up all the later entries in this row */ \ 161 for (ii=N; ii>=_i; ii--) { \ 162 rp[ii+1] = rp[ii]; \ 163 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 164 } \ 165 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 166 rp[_i] = bcol; \ 167 ap[bs2*_i + bs*cidx + ridx] = value; \ 168 a_noinsert:; \ 169 ailen[brow] = nrow; \ 170 } 171 172 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 173 { \ 174 brow = row/bs; \ 175 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 176 rmax = bimax[brow]; nrow = bilen[brow]; \ 177 bcol = col/bs; \ 178 ridx = row % bs; cidx = col % bs; \ 179 low = 0; high = nrow; \ 180 while (high-low > 3) { \ 181 t = (low+high)/2; \ 182 if (rp[t] > bcol) high = t; \ 183 else low = t; \ 184 } \ 185 for (_i=low; _i<high; _i++) { \ 186 if (rp[_i] > bcol) break; \ 187 if (rp[_i] == bcol) { \ 188 bap = ap + bs2*_i + bs*cidx + ridx; \ 189 if (addv == ADD_VALUES) *bap += value; \ 190 else *bap = value; \ 191 goto b_noinsert; \ 192 } \ 193 } \ 194 if (b->nonew == 1) goto b_noinsert; \ 195 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 196 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 197 CHKMEMQ;\ 198 N = nrow++ - 1; \ 199 /* shift up all the later entries in this row */ \ 200 for (ii=N; ii>=_i; ii--) { \ 201 rp[ii+1] = rp[ii]; \ 202 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 203 } \ 204 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 205 rp[_i] = bcol; \ 206 ap[bs2*_i + bs*cidx + ridx] = value; \ 207 b_noinsert:; \ 208 bilen[brow] = nrow; \ 209 } 210 211 #if defined(PETSC_USE_MAT_SINGLE) 212 #undef __FUNCT__ 213 #define __FUNCT__ "MatSetValues_MPIBAIJ" 214 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 215 { 216 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 217 PetscErrorCode ierr; 218 PetscInt i,N = m*n; 219 MatScalar *vsingle; 220 221 PetscFunctionBegin; 222 if (N > b->setvalueslen) { 223 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 224 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 225 b->setvalueslen = N; 226 } 227 vsingle = b->setvaluescopy; 228 229 for (i=0; i<N; i++) { 230 vsingle[i] = v[i]; 231 } 232 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 233 PetscFunctionReturn(0); 234 } 235 236 #undef __FUNCT__ 237 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 238 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 239 { 240 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 241 PetscErrorCode ierr; 242 PetscInt i,N = m*n*b->bs2; 243 MatScalar *vsingle; 244 245 PetscFunctionBegin; 246 if (N > b->setvalueslen) { 247 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 248 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 249 b->setvalueslen = N; 250 } 251 vsingle = b->setvaluescopy; 252 for (i=0; i<N; i++) { 253 vsingle[i] = v[i]; 254 } 255 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 256 PetscFunctionReturn(0); 257 } 258 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 261 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 262 { 263 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 264 PetscErrorCode ierr; 265 PetscInt i,N = m*n; 266 MatScalar *vsingle; 267 268 PetscFunctionBegin; 269 if (N > b->setvalueslen) { 270 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 271 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 272 b->setvalueslen = N; 273 } 274 vsingle = b->setvaluescopy; 275 for (i=0; i<N; i++) { 276 vsingle[i] = v[i]; 277 } 278 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 284 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 285 { 286 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 287 PetscErrorCode ierr; 288 PetscInt i,N = m*n*b->bs2; 289 MatScalar *vsingle; 290 291 PetscFunctionBegin; 292 if (N > b->setvalueslen) { 293 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 294 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 295 b->setvalueslen = N; 296 } 297 vsingle = b->setvaluescopy; 298 for (i=0; i<N; i++) { 299 vsingle[i] = v[i]; 300 } 301 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 302 PetscFunctionReturn(0); 303 } 304 #endif 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 308 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 309 { 310 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 311 MatScalar value; 312 PetscTruth roworiented = baij->roworiented; 313 PetscErrorCode ierr; 314 PetscInt i,j,row,col; 315 PetscInt rstart_orig=mat->rmap.rstart; 316 PetscInt rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart; 317 PetscInt cend_orig=mat->cmap.rend,bs=mat->rmap.bs; 318 319 /* Some Variables required in the macro */ 320 Mat A = baij->A; 321 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 322 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 323 MatScalar *aa=a->a; 324 325 Mat B = baij->B; 326 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 327 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 328 MatScalar *ba=b->a; 329 330 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 331 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 332 MatScalar *ap,*bap; 333 334 PetscFunctionBegin; 335 for (i=0; i<m; i++) { 336 if (im[i] < 0) continue; 337 #if defined(PETSC_USE_DEBUG) 338 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 339 #endif 340 if (im[i] >= rstart_orig && im[i] < rend_orig) { 341 row = im[i] - rstart_orig; 342 for (j=0; j<n; j++) { 343 if (in[j] >= cstart_orig && in[j] < cend_orig){ 344 col = in[j] - cstart_orig; 345 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 346 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 347 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 348 } else if (in[j] < 0) continue; 349 #if defined(PETSC_USE_DEBUG) 350 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap.N-1);} 351 #endif 352 else { 353 if (mat->was_assembled) { 354 if (!baij->colmap) { 355 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 356 } 357 #if defined (PETSC_USE_CTABLE) 358 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 359 col = col - 1; 360 #else 361 col = baij->colmap[in[j]/bs] - 1; 362 #endif 363 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 364 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 365 col = in[j]; 366 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 367 B = baij->B; 368 b = (Mat_SeqBAIJ*)(B)->data; 369 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 370 ba=b->a; 371 } else col += in[j]%bs; 372 } else col = in[j]; 373 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 374 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 375 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 376 } 377 } 378 } else { 379 if (!baij->donotstash) { 380 if (roworiented) { 381 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 382 } else { 383 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 384 } 385 } 386 } 387 } 388 PetscFunctionReturn(0); 389 } 390 391 #undef __FUNCT__ 392 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 393 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 394 { 395 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 396 const MatScalar *value; 397 MatScalar *barray=baij->barray; 398 PetscTruth roworiented = baij->roworiented; 399 PetscErrorCode ierr; 400 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 401 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 402 PetscInt cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2; 403 404 PetscFunctionBegin; 405 if(!barray) { 406 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 407 baij->barray = barray; 408 } 409 410 if (roworiented) { 411 stepval = (n-1)*bs; 412 } else { 413 stepval = (m-1)*bs; 414 } 415 for (i=0; i<m; i++) { 416 if (im[i] < 0) continue; 417 #if defined(PETSC_USE_DEBUG) 418 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 419 #endif 420 if (im[i] >= rstart && im[i] < rend) { 421 row = im[i] - rstart; 422 for (j=0; j<n; j++) { 423 /* If NumCol = 1 then a copy is not required */ 424 if ((roworiented) && (n == 1)) { 425 barray = (MatScalar*)v + i*bs2; 426 } else if((!roworiented) && (m == 1)) { 427 barray = (MatScalar*)v + j*bs2; 428 } else { /* Here a copy is required */ 429 if (roworiented) { 430 value = v + i*(stepval+bs)*bs + j*bs; 431 } else { 432 value = v + j*(stepval+bs)*bs + i*bs; 433 } 434 for (ii=0; ii<bs; ii++,value+=stepval) { 435 for (jj=0; jj<bs; jj++) { 436 *barray++ = *value++; 437 } 438 } 439 barray -=bs2; 440 } 441 442 if (in[j] >= cstart && in[j] < cend){ 443 col = in[j] - cstart; 444 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 445 } 446 else if (in[j] < 0) continue; 447 #if defined(PETSC_USE_DEBUG) 448 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 449 #endif 450 else { 451 if (mat->was_assembled) { 452 if (!baij->colmap) { 453 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 454 } 455 456 #if defined(PETSC_USE_DEBUG) 457 #if defined (PETSC_USE_CTABLE) 458 { PetscInt data; 459 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 460 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 461 } 462 #else 463 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 464 #endif 465 #endif 466 #if defined (PETSC_USE_CTABLE) 467 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 468 col = (col - 1)/bs; 469 #else 470 col = (baij->colmap[in[j]] - 1)/bs; 471 #endif 472 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 473 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 474 col = in[j]; 475 } 476 } 477 else col = in[j]; 478 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 479 } 480 } 481 } else { 482 if (!baij->donotstash) { 483 if (roworiented) { 484 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 485 } else { 486 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 487 } 488 } 489 } 490 } 491 PetscFunctionReturn(0); 492 } 493 494 #define HASH_KEY 0.6180339887 495 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 496 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 497 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 498 #undef __FUNCT__ 499 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 500 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 501 { 502 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 503 PetscTruth roworiented = baij->roworiented; 504 PetscErrorCode ierr; 505 PetscInt i,j,row,col; 506 PetscInt rstart_orig=mat->rmap.rstart; 507 PetscInt rend_orig=mat->rmap.rend,Nbs=baij->Nbs; 508 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap.bs,*HT=baij->ht,idx; 509 PetscReal tmp; 510 MatScalar **HD = baij->hd,value; 511 #if defined(PETSC_USE_DEBUG) 512 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 513 #endif 514 515 PetscFunctionBegin; 516 517 for (i=0; i<m; i++) { 518 #if defined(PETSC_USE_DEBUG) 519 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 520 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 521 #endif 522 row = im[i]; 523 if (row >= rstart_orig && row < rend_orig) { 524 for (j=0; j<n; j++) { 525 col = in[j]; 526 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 527 /* Look up PetscInto the Hash Table */ 528 key = (row/bs)*Nbs+(col/bs)+1; 529 h1 = HASH(size,key,tmp); 530 531 532 idx = h1; 533 #if defined(PETSC_USE_DEBUG) 534 insert_ct++; 535 total_ct++; 536 if (HT[idx] != key) { 537 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 538 if (idx == size) { 539 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 540 if (idx == h1) { 541 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 542 } 543 } 544 } 545 #else 546 if (HT[idx] != key) { 547 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 548 if (idx == size) { 549 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 550 if (idx == h1) { 551 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 552 } 553 } 554 } 555 #endif 556 /* A HASH table entry is found, so insert the values at the correct address */ 557 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 558 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 559 } 560 } else { 561 if (!baij->donotstash) { 562 if (roworiented) { 563 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 564 } else { 565 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 566 } 567 } 568 } 569 } 570 #if defined(PETSC_USE_DEBUG) 571 baij->ht_total_ct = total_ct; 572 baij->ht_insert_ct = insert_ct; 573 #endif 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 579 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 580 { 581 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 582 PetscTruth roworiented = baij->roworiented; 583 PetscErrorCode ierr; 584 PetscInt i,j,ii,jj,row,col; 585 PetscInt rstart=baij->rstartbs; 586 PetscInt rend=mat->rmap.rend,stepval,bs=mat->rmap.bs,bs2=baij->bs2; 587 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 588 PetscReal tmp; 589 MatScalar **HD = baij->hd,*baij_a; 590 const MatScalar *v_t,*value; 591 #if defined(PETSC_USE_DEBUG) 592 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 593 #endif 594 595 PetscFunctionBegin; 596 597 if (roworiented) { 598 stepval = (n-1)*bs; 599 } else { 600 stepval = (m-1)*bs; 601 } 602 for (i=0; i<m; i++) { 603 #if defined(PETSC_USE_DEBUG) 604 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 605 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 606 #endif 607 row = im[i]; 608 v_t = v + i*bs2; 609 if (row >= rstart && row < rend) { 610 for (j=0; j<n; j++) { 611 col = in[j]; 612 613 /* Look up into the Hash Table */ 614 key = row*Nbs+col+1; 615 h1 = HASH(size,key,tmp); 616 617 idx = h1; 618 #if defined(PETSC_USE_DEBUG) 619 total_ct++; 620 insert_ct++; 621 if (HT[idx] != key) { 622 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 623 if (idx == size) { 624 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 625 if (idx == h1) { 626 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 627 } 628 } 629 } 630 #else 631 if (HT[idx] != key) { 632 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 633 if (idx == size) { 634 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 635 if (idx == h1) { 636 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 637 } 638 } 639 } 640 #endif 641 baij_a = HD[idx]; 642 if (roworiented) { 643 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 644 /* value = v + (i*(stepval+bs)+j)*bs; */ 645 value = v_t; 646 v_t += bs; 647 if (addv == ADD_VALUES) { 648 for (ii=0; ii<bs; ii++,value+=stepval) { 649 for (jj=ii; jj<bs2; jj+=bs) { 650 baij_a[jj] += *value++; 651 } 652 } 653 } else { 654 for (ii=0; ii<bs; ii++,value+=stepval) { 655 for (jj=ii; jj<bs2; jj+=bs) { 656 baij_a[jj] = *value++; 657 } 658 } 659 } 660 } else { 661 value = v + j*(stepval+bs)*bs + i*bs; 662 if (addv == ADD_VALUES) { 663 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 664 for (jj=0; jj<bs; jj++) { 665 baij_a[jj] += *value++; 666 } 667 } 668 } else { 669 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 670 for (jj=0; jj<bs; jj++) { 671 baij_a[jj] = *value++; 672 } 673 } 674 } 675 } 676 } 677 } else { 678 if (!baij->donotstash) { 679 if (roworiented) { 680 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 681 } else { 682 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 683 } 684 } 685 } 686 } 687 #if defined(PETSC_USE_DEBUG) 688 baij->ht_total_ct = total_ct; 689 baij->ht_insert_ct = insert_ct; 690 #endif 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatGetValues_MPIBAIJ" 696 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 697 { 698 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 699 PetscErrorCode ierr; 700 PetscInt bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend; 701 PetscInt bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data; 702 703 PetscFunctionBegin; 704 for (i=0; i<m; i++) { 705 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 706 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 707 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 708 row = idxm[i] - bsrstart; 709 for (j=0; j<n; j++) { 710 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 711 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 712 if (idxn[j] >= bscstart && idxn[j] < bscend){ 713 col = idxn[j] - bscstart; 714 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 715 } else { 716 if (!baij->colmap) { 717 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 718 } 719 #if defined (PETSC_USE_CTABLE) 720 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 721 data --; 722 #else 723 data = baij->colmap[idxn[j]/bs]-1; 724 #endif 725 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 726 else { 727 col = data + idxn[j]%bs; 728 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 729 } 730 } 731 } 732 } else { 733 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 734 } 735 } 736 PetscFunctionReturn(0); 737 } 738 739 #undef __FUNCT__ 740 #define __FUNCT__ "MatNorm_MPIBAIJ" 741 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 742 { 743 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 744 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 745 PetscErrorCode ierr; 746 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap.bs,nz,row,col; 747 PetscReal sum = 0.0; 748 MatScalar *v; 749 750 PetscFunctionBegin; 751 if (baij->size == 1) { 752 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 753 } else { 754 if (type == NORM_FROBENIUS) { 755 v = amat->a; 756 nz = amat->nz*bs2; 757 for (i=0; i<nz; i++) { 758 #if defined(PETSC_USE_COMPLEX) 759 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 760 #else 761 sum += (*v)*(*v); v++; 762 #endif 763 } 764 v = bmat->a; 765 nz = bmat->nz*bs2; 766 for (i=0; i<nz; i++) { 767 #if defined(PETSC_USE_COMPLEX) 768 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 769 #else 770 sum += (*v)*(*v); v++; 771 #endif 772 } 773 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 774 *nrm = sqrt(*nrm); 775 } else if (type == NORM_1) { /* max column sum */ 776 PetscReal *tmp,*tmp2; 777 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 778 ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 779 tmp2 = tmp + mat->cmap.N; 780 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 781 v = amat->a; jj = amat->j; 782 for (i=0; i<amat->nz; i++) { 783 for (j=0; j<bs; j++){ 784 col = bs*(cstart + *jj) + j; /* column index */ 785 for (row=0; row<bs; row++){ 786 tmp[col] += PetscAbsScalar(*v); v++; 787 } 788 } 789 jj++; 790 } 791 v = bmat->a; jj = bmat->j; 792 for (i=0; i<bmat->nz; i++) { 793 for (j=0; j<bs; j++){ 794 col = bs*garray[*jj] + j; 795 for (row=0; row<bs; row++){ 796 tmp[col] += PetscAbsScalar(*v); v++; 797 } 798 } 799 jj++; 800 } 801 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 802 *nrm = 0.0; 803 for (j=0; j<mat->cmap.N; j++) { 804 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 805 } 806 ierr = PetscFree(tmp);CHKERRQ(ierr); 807 } else if (type == NORM_INFINITY) { /* max row sum */ 808 PetscReal *sums; 809 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr) 810 sum = 0.0; 811 for (j=0; j<amat->mbs; j++) { 812 for (row=0; row<bs; row++) sums[row] = 0.0; 813 v = amat->a + bs2*amat->i[j]; 814 nz = amat->i[j+1]-amat->i[j]; 815 for (i=0; i<nz; i++) { 816 for (col=0; col<bs; col++){ 817 for (row=0; row<bs; row++){ 818 sums[row] += PetscAbsScalar(*v); v++; 819 } 820 } 821 } 822 v = bmat->a + bs2*bmat->i[j]; 823 nz = bmat->i[j+1]-bmat->i[j]; 824 for (i=0; i<nz; i++) { 825 for (col=0; col<bs; col++){ 826 for (row=0; row<bs; row++){ 827 sums[row] += PetscAbsScalar(*v); v++; 828 } 829 } 830 } 831 for (row=0; row<bs; row++){ 832 if (sums[row] > sum) sum = sums[row]; 833 } 834 } 835 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 836 ierr = PetscFree(sums);CHKERRQ(ierr); 837 } else { 838 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 839 } 840 } 841 PetscFunctionReturn(0); 842 } 843 844 /* 845 Creates the hash table, and sets the table 846 This table is created only once. 847 If new entried need to be added to the matrix 848 then the hash table has to be destroyed and 849 recreated. 850 */ 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 853 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 854 { 855 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 856 Mat A = baij->A,B=baij->B; 857 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 858 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 859 PetscErrorCode ierr; 860 PetscInt size,bs2=baij->bs2,rstart=baij->rstartbs; 861 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 862 PetscInt *HT,key; 863 MatScalar **HD; 864 PetscReal tmp; 865 #if defined(PETSC_USE_INFO) 866 PetscInt ct=0,max=0; 867 #endif 868 869 PetscFunctionBegin; 870 baij->ht_size=(PetscInt)(factor*nz); 871 size = baij->ht_size; 872 873 if (baij->ht) { 874 PetscFunctionReturn(0); 875 } 876 877 /* Allocate Memory for Hash Table */ 878 ierr = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 879 baij->ht = (PetscInt*)(baij->hd + size); 880 HD = baij->hd; 881 HT = baij->ht; 882 883 884 ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr); 885 886 887 /* Loop Over A */ 888 for (i=0; i<a->mbs; i++) { 889 for (j=ai[i]; j<ai[i+1]; j++) { 890 row = i+rstart; 891 col = aj[j]+cstart; 892 893 key = row*Nbs + col + 1; 894 h1 = HASH(size,key,tmp); 895 for (k=0; k<size; k++){ 896 if (!HT[(h1+k)%size]) { 897 HT[(h1+k)%size] = key; 898 HD[(h1+k)%size] = a->a + j*bs2; 899 break; 900 #if defined(PETSC_USE_INFO) 901 } else { 902 ct++; 903 #endif 904 } 905 } 906 #if defined(PETSC_USE_INFO) 907 if (k> max) max = k; 908 #endif 909 } 910 } 911 /* Loop Over B */ 912 for (i=0; i<b->mbs; i++) { 913 for (j=bi[i]; j<bi[i+1]; j++) { 914 row = i+rstart; 915 col = garray[bj[j]]; 916 key = row*Nbs + col + 1; 917 h1 = HASH(size,key,tmp); 918 for (k=0; k<size; k++){ 919 if (!HT[(h1+k)%size]) { 920 HT[(h1+k)%size] = key; 921 HD[(h1+k)%size] = b->a + j*bs2; 922 break; 923 #if defined(PETSC_USE_INFO) 924 } else { 925 ct++; 926 #endif 927 } 928 } 929 #if defined(PETSC_USE_INFO) 930 if (k> max) max = k; 931 #endif 932 } 933 } 934 935 /* Print Summary */ 936 #if defined(PETSC_USE_INFO) 937 for (i=0,j=0; i<size; i++) { 938 if (HT[i]) {j++;} 939 } 940 ierr = PetscInfo2(0,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 941 #endif 942 PetscFunctionReturn(0); 943 } 944 945 #undef __FUNCT__ 946 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 947 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 948 { 949 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 950 PetscErrorCode ierr; 951 PetscInt nstash,reallocs; 952 InsertMode addv; 953 954 PetscFunctionBegin; 955 if (baij->donotstash) { 956 PetscFunctionReturn(0); 957 } 958 959 /* make sure all processors are either in INSERTMODE or ADDMODE */ 960 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 961 if (addv == (ADD_VALUES|INSERT_VALUES)) { 962 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 963 } 964 mat->insertmode = addv; /* in case this processor had no cache */ 965 966 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 967 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr); 968 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 969 ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 970 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 971 ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 972 PetscFunctionReturn(0); 973 } 974 975 #undef __FUNCT__ 976 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 977 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 978 { 979 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 980 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 981 PetscErrorCode ierr; 982 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 983 PetscInt *row,*col,other_disassembled; 984 PetscTruth r1,r2,r3; 985 MatScalar *val; 986 InsertMode addv = mat->insertmode; 987 PetscMPIInt n; 988 989 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 990 PetscFunctionBegin; 991 if (!baij->donotstash) { 992 while (1) { 993 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 994 if (!flg) break; 995 996 for (i=0; i<n;) { 997 /* Now identify the consecutive vals belonging to the same row */ 998 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 999 if (j < n) ncols = j-i; 1000 else ncols = n-i; 1001 /* Now assemble all these values with a single function call */ 1002 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 1003 i = j; 1004 } 1005 } 1006 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1007 /* Now process the block-stash. Since the values are stashed column-oriented, 1008 set the roworiented flag to column oriented, and after MatSetValues() 1009 restore the original flags */ 1010 r1 = baij->roworiented; 1011 r2 = a->roworiented; 1012 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 1013 baij->roworiented = PETSC_FALSE; 1014 a->roworiented = PETSC_FALSE; 1015 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 1016 while (1) { 1017 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1018 if (!flg) break; 1019 1020 for (i=0; i<n;) { 1021 /* Now identify the consecutive vals belonging to the same row */ 1022 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1023 if (j < n) ncols = j-i; 1024 else ncols = n-i; 1025 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1026 i = j; 1027 } 1028 } 1029 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1030 baij->roworiented = r1; 1031 a->roworiented = r2; 1032 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 1033 } 1034 1035 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1036 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1037 1038 /* determine if any processor has disassembled, if so we must 1039 also disassemble ourselfs, in order that we may reassemble. */ 1040 /* 1041 if nonzero structure of submatrix B cannot change then we know that 1042 no processor disassembled thus we can skip this stuff 1043 */ 1044 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1045 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1046 if (mat->was_assembled && !other_disassembled) { 1047 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1048 } 1049 } 1050 1051 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1052 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1053 } 1054 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 1055 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1056 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1057 1058 #if defined(PETSC_USE_INFO) 1059 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1060 ierr = PetscInfo1(0,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 1061 baij->ht_total_ct = 0; 1062 baij->ht_insert_ct = 0; 1063 } 1064 #endif 1065 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1066 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1067 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1068 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1069 } 1070 1071 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1072 baij->rowvalues = 0; 1073 PetscFunctionReturn(0); 1074 } 1075 1076 #undef __FUNCT__ 1077 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1078 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1079 { 1080 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1081 PetscErrorCode ierr; 1082 PetscMPIInt size = baij->size,rank = baij->rank; 1083 PetscInt bs = mat->rmap.bs; 1084 PetscTruth iascii,isdraw; 1085 PetscViewer sviewer; 1086 PetscViewerFormat format; 1087 1088 PetscFunctionBegin; 1089 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1090 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1091 if (iascii) { 1092 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1093 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1094 MatInfo info; 1095 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1096 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1097 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 1098 rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 1099 mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr); 1100 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1101 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1102 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1103 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1104 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1105 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1106 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1107 PetscFunctionReturn(0); 1108 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1109 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1110 PetscFunctionReturn(0); 1111 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1112 PetscFunctionReturn(0); 1113 } 1114 } 1115 1116 if (isdraw) { 1117 PetscDraw draw; 1118 PetscTruth isnull; 1119 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1120 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1121 } 1122 1123 if (size == 1) { 1124 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 1125 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1126 } else { 1127 /* assemble the entire matrix onto first processor. */ 1128 Mat A; 1129 Mat_SeqBAIJ *Aloc; 1130 PetscInt M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1131 MatScalar *a; 1132 1133 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1134 /* Perhaps this should be the type of mat? */ 1135 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 1136 if (!rank) { 1137 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1138 } else { 1139 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1140 } 1141 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1142 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1143 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1144 1145 /* copy over the A part */ 1146 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1147 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1148 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1149 1150 for (i=0; i<mbs; i++) { 1151 rvals[0] = bs*(baij->rstartbs + i); 1152 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1153 for (j=ai[i]; j<ai[i+1]; j++) { 1154 col = (baij->cstartbs+aj[j])*bs; 1155 for (k=0; k<bs; k++) { 1156 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1157 col++; a += bs; 1158 } 1159 } 1160 } 1161 /* copy over the B part */ 1162 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1163 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1164 for (i=0; i<mbs; i++) { 1165 rvals[0] = bs*(baij->rstartbs + i); 1166 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1167 for (j=ai[i]; j<ai[i+1]; j++) { 1168 col = baij->garray[aj[j]]*bs; 1169 for (k=0; k<bs; k++) { 1170 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1171 col++; a += bs; 1172 } 1173 } 1174 } 1175 ierr = PetscFree(rvals);CHKERRQ(ierr); 1176 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1177 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1178 /* 1179 Everyone has to call to draw the matrix since the graphics waits are 1180 synchronized across all processors that share the PetscDraw object 1181 */ 1182 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1183 if (!rank) { 1184 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1185 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1186 } 1187 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1188 ierr = MatDestroy(A);CHKERRQ(ierr); 1189 } 1190 PetscFunctionReturn(0); 1191 } 1192 1193 #undef __FUNCT__ 1194 #define __FUNCT__ "MatView_MPIBAIJ" 1195 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1196 { 1197 PetscErrorCode ierr; 1198 PetscTruth iascii,isdraw,issocket,isbinary; 1199 1200 PetscFunctionBegin; 1201 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1202 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1203 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1204 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1205 if (iascii || isdraw || issocket || isbinary) { 1206 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1207 } else { 1208 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1209 } 1210 PetscFunctionReturn(0); 1211 } 1212 1213 #undef __FUNCT__ 1214 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1215 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1216 { 1217 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1218 PetscErrorCode ierr; 1219 1220 PetscFunctionBegin; 1221 #if defined(PETSC_USE_LOG) 1222 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N); 1223 #endif 1224 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1225 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1226 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1227 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1228 #if defined (PETSC_USE_CTABLE) 1229 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1230 #else 1231 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1232 #endif 1233 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1234 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1235 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1236 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1237 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1238 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1239 #if defined(PETSC_USE_MAT_SINGLE) 1240 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 1241 #endif 1242 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1243 ierr = PetscFree(baij);CHKERRQ(ierr); 1244 1245 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1246 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1247 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1248 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1249 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1250 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1251 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1252 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1253 PetscFunctionReturn(0); 1254 } 1255 1256 #undef __FUNCT__ 1257 #define __FUNCT__ "MatMult_MPIBAIJ" 1258 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1259 { 1260 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1261 PetscErrorCode ierr; 1262 PetscInt nt; 1263 1264 PetscFunctionBegin; 1265 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1266 if (nt != A->cmap.n) { 1267 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1268 } 1269 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1270 if (nt != A->rmap.n) { 1271 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1272 } 1273 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1274 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1275 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1276 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1277 PetscFunctionReturn(0); 1278 } 1279 1280 #undef __FUNCT__ 1281 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1282 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1283 { 1284 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1285 PetscErrorCode ierr; 1286 1287 PetscFunctionBegin; 1288 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1289 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1290 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1291 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1292 PetscFunctionReturn(0); 1293 } 1294 1295 #undef __FUNCT__ 1296 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1297 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1298 { 1299 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1300 PetscErrorCode ierr; 1301 PetscTruth merged; 1302 1303 PetscFunctionBegin; 1304 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1305 /* do nondiagonal part */ 1306 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1307 if (!merged) { 1308 /* send it on its way */ 1309 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1310 /* do local part */ 1311 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1312 /* receive remote parts: note this assumes the values are not actually */ 1313 /* inserted in yy until the next line */ 1314 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1315 } else { 1316 /* do local part */ 1317 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1318 /* send it on its way */ 1319 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1320 /* values actually were received in the Begin() but we need to call this nop */ 1321 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1322 } 1323 PetscFunctionReturn(0); 1324 } 1325 1326 #undef __FUNCT__ 1327 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1328 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1329 { 1330 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1331 PetscErrorCode ierr; 1332 1333 PetscFunctionBegin; 1334 /* do nondiagonal part */ 1335 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1336 /* send it on its way */ 1337 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1338 /* do local part */ 1339 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1340 /* receive remote parts: note this assumes the values are not actually */ 1341 /* inserted in yy until the next line, which is true for my implementation*/ 1342 /* but is not perhaps always true. */ 1343 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1344 PetscFunctionReturn(0); 1345 } 1346 1347 /* 1348 This only works correctly for square matrices where the subblock A->A is the 1349 diagonal block 1350 */ 1351 #undef __FUNCT__ 1352 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1353 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1354 { 1355 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1356 PetscErrorCode ierr; 1357 1358 PetscFunctionBegin; 1359 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1360 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1361 PetscFunctionReturn(0); 1362 } 1363 1364 #undef __FUNCT__ 1365 #define __FUNCT__ "MatScale_MPIBAIJ" 1366 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1367 { 1368 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1369 PetscErrorCode ierr; 1370 1371 PetscFunctionBegin; 1372 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1373 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1374 PetscFunctionReturn(0); 1375 } 1376 1377 #undef __FUNCT__ 1378 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1379 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1380 { 1381 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1382 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1383 PetscErrorCode ierr; 1384 PetscInt bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1385 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend; 1386 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1387 1388 PetscFunctionBegin; 1389 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1390 mat->getrowactive = PETSC_TRUE; 1391 1392 if (!mat->rowvalues && (idx || v)) { 1393 /* 1394 allocate enough space to hold information from the longest row. 1395 */ 1396 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1397 PetscInt max = 1,mbs = mat->mbs,tmp; 1398 for (i=0; i<mbs; i++) { 1399 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1400 if (max < tmp) { max = tmp; } 1401 } 1402 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1403 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1404 } 1405 1406 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1407 lrow = row - brstart; 1408 1409 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1410 if (!v) {pvA = 0; pvB = 0;} 1411 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1412 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1413 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1414 nztot = nzA + nzB; 1415 1416 cmap = mat->garray; 1417 if (v || idx) { 1418 if (nztot) { 1419 /* Sort by increasing column numbers, assuming A and B already sorted */ 1420 PetscInt imark = -1; 1421 if (v) { 1422 *v = v_p = mat->rowvalues; 1423 for (i=0; i<nzB; i++) { 1424 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1425 else break; 1426 } 1427 imark = i; 1428 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1429 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1430 } 1431 if (idx) { 1432 *idx = idx_p = mat->rowindices; 1433 if (imark > -1) { 1434 for (i=0; i<imark; i++) { 1435 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1436 } 1437 } else { 1438 for (i=0; i<nzB; i++) { 1439 if (cmap[cworkB[i]/bs] < cstart) 1440 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1441 else break; 1442 } 1443 imark = i; 1444 } 1445 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1446 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1447 } 1448 } else { 1449 if (idx) *idx = 0; 1450 if (v) *v = 0; 1451 } 1452 } 1453 *nz = nztot; 1454 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1455 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1456 PetscFunctionReturn(0); 1457 } 1458 1459 #undef __FUNCT__ 1460 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1461 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1462 { 1463 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1464 1465 PetscFunctionBegin; 1466 if (!baij->getrowactive) { 1467 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1468 } 1469 baij->getrowactive = PETSC_FALSE; 1470 PetscFunctionReturn(0); 1471 } 1472 1473 #undef __FUNCT__ 1474 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1475 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1476 { 1477 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1478 PetscErrorCode ierr; 1479 1480 PetscFunctionBegin; 1481 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1482 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1483 PetscFunctionReturn(0); 1484 } 1485 1486 #undef __FUNCT__ 1487 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1488 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1489 { 1490 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1491 Mat A = a->A,B = a->B; 1492 PetscErrorCode ierr; 1493 PetscReal isend[5],irecv[5]; 1494 1495 PetscFunctionBegin; 1496 info->block_size = (PetscReal)matin->rmap.bs; 1497 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1498 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1499 isend[3] = info->memory; isend[4] = info->mallocs; 1500 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1501 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1502 isend[3] += info->memory; isend[4] += info->mallocs; 1503 if (flag == MAT_LOCAL) { 1504 info->nz_used = isend[0]; 1505 info->nz_allocated = isend[1]; 1506 info->nz_unneeded = isend[2]; 1507 info->memory = isend[3]; 1508 info->mallocs = isend[4]; 1509 } else if (flag == MAT_GLOBAL_MAX) { 1510 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1511 info->nz_used = irecv[0]; 1512 info->nz_allocated = irecv[1]; 1513 info->nz_unneeded = irecv[2]; 1514 info->memory = irecv[3]; 1515 info->mallocs = irecv[4]; 1516 } else if (flag == MAT_GLOBAL_SUM) { 1517 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1518 info->nz_used = irecv[0]; 1519 info->nz_allocated = irecv[1]; 1520 info->nz_unneeded = irecv[2]; 1521 info->memory = irecv[3]; 1522 info->mallocs = irecv[4]; 1523 } else { 1524 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1525 } 1526 info->rows_global = (PetscReal)A->rmap.N; 1527 info->columns_global = (PetscReal)A->cmap.N; 1528 info->rows_local = (PetscReal)A->rmap.N; 1529 info->columns_local = (PetscReal)A->cmap.N; 1530 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1531 info->fill_ratio_needed = 0; 1532 info->factor_mallocs = 0; 1533 PetscFunctionReturn(0); 1534 } 1535 1536 #undef __FUNCT__ 1537 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1538 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op) 1539 { 1540 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1541 PetscErrorCode ierr; 1542 1543 PetscFunctionBegin; 1544 switch (op) { 1545 case MAT_NO_NEW_NONZERO_LOCATIONS: 1546 case MAT_YES_NEW_NONZERO_LOCATIONS: 1547 case MAT_COLUMNS_UNSORTED: 1548 case MAT_COLUMNS_SORTED: 1549 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1550 case MAT_KEEP_ZEROED_ROWS: 1551 case MAT_NEW_NONZERO_LOCATION_ERR: 1552 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1553 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1554 break; 1555 case MAT_ROW_ORIENTED: 1556 a->roworiented = PETSC_TRUE; 1557 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1558 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1559 break; 1560 case MAT_ROWS_SORTED: 1561 case MAT_ROWS_UNSORTED: 1562 case MAT_YES_NEW_DIAGONALS: 1563 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1564 break; 1565 case MAT_COLUMN_ORIENTED: 1566 a->roworiented = PETSC_FALSE; 1567 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1568 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1569 break; 1570 case MAT_IGNORE_OFF_PROC_ENTRIES: 1571 a->donotstash = PETSC_TRUE; 1572 break; 1573 case MAT_NO_NEW_DIAGONALS: 1574 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1575 case MAT_USE_HASH_TABLE: 1576 a->ht_flag = PETSC_TRUE; 1577 break; 1578 case MAT_SYMMETRIC: 1579 case MAT_STRUCTURALLY_SYMMETRIC: 1580 case MAT_HERMITIAN: 1581 case MAT_SYMMETRY_ETERNAL: 1582 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1583 break; 1584 case MAT_NOT_SYMMETRIC: 1585 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1586 case MAT_NOT_HERMITIAN: 1587 case MAT_NOT_SYMMETRY_ETERNAL: 1588 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1589 break; 1590 default: 1591 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1592 } 1593 PetscFunctionReturn(0); 1594 } 1595 1596 #undef __FUNCT__ 1597 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1598 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1599 { 1600 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1601 Mat_SeqBAIJ *Aloc; 1602 Mat B; 1603 PetscErrorCode ierr; 1604 PetscInt M=A->rmap.N,N=A->cmap.N,*ai,*aj,i,*rvals,j,k,col; 1605 PetscInt bs=A->rmap.bs,mbs=baij->mbs; 1606 MatScalar *a; 1607 1608 PetscFunctionBegin; 1609 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1610 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1611 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1612 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1613 ierr = MatMPIBAIJSetPreallocation(B,A->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1614 1615 /* copy over the A part */ 1616 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1617 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1618 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1619 1620 for (i=0; i<mbs; i++) { 1621 rvals[0] = bs*(baij->rstartbs + i); 1622 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1623 for (j=ai[i]; j<ai[i+1]; j++) { 1624 col = (baij->cstartbs+aj[j])*bs; 1625 for (k=0; k<bs; k++) { 1626 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1627 col++; a += bs; 1628 } 1629 } 1630 } 1631 /* copy over the B part */ 1632 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1633 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1634 for (i=0; i<mbs; i++) { 1635 rvals[0] = bs*(baij->rstartbs + i); 1636 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1637 for (j=ai[i]; j<ai[i+1]; j++) { 1638 col = baij->garray[aj[j]]*bs; 1639 for (k=0; k<bs; k++) { 1640 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1641 col++; a += bs; 1642 } 1643 } 1644 } 1645 ierr = PetscFree(rvals);CHKERRQ(ierr); 1646 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1647 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1648 1649 if (matout) { 1650 *matout = B; 1651 } else { 1652 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1653 } 1654 PetscFunctionReturn(0); 1655 } 1656 1657 #undef __FUNCT__ 1658 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1659 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1660 { 1661 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1662 Mat a = baij->A,b = baij->B; 1663 PetscErrorCode ierr; 1664 PetscInt s1,s2,s3; 1665 1666 PetscFunctionBegin; 1667 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1668 if (rr) { 1669 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1670 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1671 /* Overlap communication with computation. */ 1672 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1673 } 1674 if (ll) { 1675 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1676 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1677 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1678 } 1679 /* scale the diagonal block */ 1680 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1681 1682 if (rr) { 1683 /* Do a scatter end and then right scale the off-diagonal block */ 1684 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1685 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1686 } 1687 1688 PetscFunctionReturn(0); 1689 } 1690 1691 #undef __FUNCT__ 1692 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1693 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 1694 { 1695 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1696 PetscErrorCode ierr; 1697 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1698 PetscInt i,*owners = A->rmap.range; 1699 PetscInt *nprocs,j,idx,nsends,row; 1700 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1701 PetscInt *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1; 1702 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap.rstart; 1703 MPI_Comm comm = A->comm; 1704 MPI_Request *send_waits,*recv_waits; 1705 MPI_Status recv_status,*send_status; 1706 #if defined(PETSC_DEBUG) 1707 PetscTruth found = PETSC_FALSE; 1708 #endif 1709 1710 PetscFunctionBegin; 1711 /* first count number of contributors to each processor */ 1712 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1713 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1714 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1715 j = 0; 1716 for (i=0; i<N; i++) { 1717 if (lastidx > (idx = rows[i])) j = 0; 1718 lastidx = idx; 1719 for (; j<size; j++) { 1720 if (idx >= owners[j] && idx < owners[j+1]) { 1721 nprocs[2*j]++; 1722 nprocs[2*j+1] = 1; 1723 owner[i] = j; 1724 #if defined(PETSC_DEBUG) 1725 found = PETSC_TRUE; 1726 #endif 1727 break; 1728 } 1729 } 1730 #if defined(PETSC_DEBUG) 1731 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1732 found = PETSC_FALSE; 1733 #endif 1734 } 1735 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1736 1737 /* inform other processors of number of messages and max length*/ 1738 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1739 1740 /* post receives: */ 1741 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1742 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1743 for (i=0; i<nrecvs; i++) { 1744 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1745 } 1746 1747 /* do sends: 1748 1) starts[i] gives the starting index in svalues for stuff going to 1749 the ith processor 1750 */ 1751 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1752 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1753 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1754 starts[0] = 0; 1755 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1756 for (i=0; i<N; i++) { 1757 svalues[starts[owner[i]]++] = rows[i]; 1758 } 1759 1760 starts[0] = 0; 1761 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1762 count = 0; 1763 for (i=0; i<size; i++) { 1764 if (nprocs[2*i+1]) { 1765 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1766 } 1767 } 1768 ierr = PetscFree(starts);CHKERRQ(ierr); 1769 1770 base = owners[rank]; 1771 1772 /* wait on receives */ 1773 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 1774 source = lens + nrecvs; 1775 count = nrecvs; slen = 0; 1776 while (count) { 1777 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1778 /* unpack receives into our local space */ 1779 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1780 source[imdex] = recv_status.MPI_SOURCE; 1781 lens[imdex] = n; 1782 slen += n; 1783 count--; 1784 } 1785 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1786 1787 /* move the data into the send scatter */ 1788 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1789 count = 0; 1790 for (i=0; i<nrecvs; i++) { 1791 values = rvalues + i*nmax; 1792 for (j=0; j<lens[i]; j++) { 1793 lrows[count++] = values[j] - base; 1794 } 1795 } 1796 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1797 ierr = PetscFree(lens);CHKERRQ(ierr); 1798 ierr = PetscFree(owner);CHKERRQ(ierr); 1799 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1800 1801 /* actually zap the local rows */ 1802 /* 1803 Zero the required rows. If the "diagonal block" of the matrix 1804 is square and the user wishes to set the diagonal we use separate 1805 code so that MatSetValues() is not called for each diagonal allocating 1806 new memory, thus calling lots of mallocs and slowing things down. 1807 1808 Contributed by: Matthew Knepley 1809 */ 1810 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1811 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1812 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 1813 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1814 } else if (diag != 0.0) { 1815 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1816 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1817 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1818 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1819 } 1820 for (i=0; i<slen; i++) { 1821 row = lrows[i] + rstart_bs; 1822 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1823 } 1824 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1825 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1826 } else { 1827 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1828 } 1829 1830 ierr = PetscFree(lrows);CHKERRQ(ierr); 1831 1832 /* wait on sends */ 1833 if (nsends) { 1834 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1835 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1836 ierr = PetscFree(send_status);CHKERRQ(ierr); 1837 } 1838 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1839 ierr = PetscFree(svalues);CHKERRQ(ierr); 1840 1841 PetscFunctionReturn(0); 1842 } 1843 1844 #undef __FUNCT__ 1845 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1846 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1847 { 1848 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1849 PetscErrorCode ierr; 1850 1851 PetscFunctionBegin; 1852 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1853 PetscFunctionReturn(0); 1854 } 1855 1856 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1857 1858 #undef __FUNCT__ 1859 #define __FUNCT__ "MatEqual_MPIBAIJ" 1860 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1861 { 1862 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1863 Mat a,b,c,d; 1864 PetscTruth flg; 1865 PetscErrorCode ierr; 1866 1867 PetscFunctionBegin; 1868 a = matA->A; b = matA->B; 1869 c = matB->A; d = matB->B; 1870 1871 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1872 if (flg) { 1873 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1874 } 1875 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1876 PetscFunctionReturn(0); 1877 } 1878 1879 #undef __FUNCT__ 1880 #define __FUNCT__ "MatCopy_MPIBAIJ" 1881 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1882 { 1883 PetscErrorCode ierr; 1884 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1885 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1886 1887 PetscFunctionBegin; 1888 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1889 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1890 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1891 } else { 1892 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1893 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1894 } 1895 PetscFunctionReturn(0); 1896 } 1897 1898 #undef __FUNCT__ 1899 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1900 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1901 { 1902 PetscErrorCode ierr; 1903 1904 PetscFunctionBegin; 1905 ierr = MatMPIBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1906 PetscFunctionReturn(0); 1907 } 1908 1909 #include "petscblaslapack.h" 1910 #undef __FUNCT__ 1911 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1912 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1913 { 1914 PetscErrorCode ierr; 1915 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1916 PetscBLASInt bnz,one=1; 1917 Mat_SeqBAIJ *x,*y; 1918 1919 PetscFunctionBegin; 1920 if (str == SAME_NONZERO_PATTERN) { 1921 PetscScalar alpha = a; 1922 x = (Mat_SeqBAIJ *)xx->A->data; 1923 y = (Mat_SeqBAIJ *)yy->A->data; 1924 bnz = (PetscBLASInt)x->nz; 1925 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1926 x = (Mat_SeqBAIJ *)xx->B->data; 1927 y = (Mat_SeqBAIJ *)yy->B->data; 1928 bnz = (PetscBLASInt)x->nz; 1929 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1930 } else { 1931 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1932 } 1933 PetscFunctionReturn(0); 1934 } 1935 1936 #undef __FUNCT__ 1937 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1938 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1939 { 1940 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1941 PetscErrorCode ierr; 1942 1943 PetscFunctionBegin; 1944 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1945 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1946 PetscFunctionReturn(0); 1947 } 1948 1949 #undef __FUNCT__ 1950 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1951 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1952 { 1953 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1954 PetscErrorCode ierr; 1955 1956 PetscFunctionBegin; 1957 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1958 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1959 PetscFunctionReturn(0); 1960 } 1961 1962 /* -------------------------------------------------------------------*/ 1963 static struct _MatOps MatOps_Values = { 1964 MatSetValues_MPIBAIJ, 1965 MatGetRow_MPIBAIJ, 1966 MatRestoreRow_MPIBAIJ, 1967 MatMult_MPIBAIJ, 1968 /* 4*/ MatMultAdd_MPIBAIJ, 1969 MatMultTranspose_MPIBAIJ, 1970 MatMultTransposeAdd_MPIBAIJ, 1971 0, 1972 0, 1973 0, 1974 /*10*/ 0, 1975 0, 1976 0, 1977 0, 1978 MatTranspose_MPIBAIJ, 1979 /*15*/ MatGetInfo_MPIBAIJ, 1980 MatEqual_MPIBAIJ, 1981 MatGetDiagonal_MPIBAIJ, 1982 MatDiagonalScale_MPIBAIJ, 1983 MatNorm_MPIBAIJ, 1984 /*20*/ MatAssemblyBegin_MPIBAIJ, 1985 MatAssemblyEnd_MPIBAIJ, 1986 0, 1987 MatSetOption_MPIBAIJ, 1988 MatZeroEntries_MPIBAIJ, 1989 /*25*/ MatZeroRows_MPIBAIJ, 1990 0, 1991 0, 1992 0, 1993 0, 1994 /*30*/ MatSetUpPreallocation_MPIBAIJ, 1995 0, 1996 0, 1997 0, 1998 0, 1999 /*35*/ MatDuplicate_MPIBAIJ, 2000 0, 2001 0, 2002 0, 2003 0, 2004 /*40*/ MatAXPY_MPIBAIJ, 2005 MatGetSubMatrices_MPIBAIJ, 2006 MatIncreaseOverlap_MPIBAIJ, 2007 MatGetValues_MPIBAIJ, 2008 MatCopy_MPIBAIJ, 2009 /*45*/ 0, 2010 MatScale_MPIBAIJ, 2011 0, 2012 0, 2013 0, 2014 /*50*/ 0, 2015 0, 2016 0, 2017 0, 2018 0, 2019 /*55*/ 0, 2020 0, 2021 MatSetUnfactored_MPIBAIJ, 2022 0, 2023 MatSetValuesBlocked_MPIBAIJ, 2024 /*60*/ 0, 2025 MatDestroy_MPIBAIJ, 2026 MatView_MPIBAIJ, 2027 0, 2028 0, 2029 /*65*/ 0, 2030 0, 2031 0, 2032 0, 2033 0, 2034 /*70*/ MatGetRowMaxAbs_MPIBAIJ, 2035 0, 2036 0, 2037 0, 2038 0, 2039 /*75*/ 0, 2040 0, 2041 0, 2042 0, 2043 0, 2044 /*80*/ 0, 2045 0, 2046 0, 2047 0, 2048 MatLoad_MPIBAIJ, 2049 /*85*/ 0, 2050 0, 2051 0, 2052 0, 2053 0, 2054 /*90*/ 0, 2055 0, 2056 0, 2057 0, 2058 0, 2059 /*95*/ 0, 2060 0, 2061 0, 2062 0, 2063 0, 2064 /*100*/0, 2065 0, 2066 0, 2067 0, 2068 0, 2069 /*105*/0, 2070 MatRealPart_MPIBAIJ, 2071 MatImaginaryPart_MPIBAIJ}; 2072 2073 2074 EXTERN_C_BEGIN 2075 #undef __FUNCT__ 2076 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2077 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2078 { 2079 PetscFunctionBegin; 2080 *a = ((Mat_MPIBAIJ *)A->data)->A; 2081 *iscopy = PETSC_FALSE; 2082 PetscFunctionReturn(0); 2083 } 2084 EXTERN_C_END 2085 2086 EXTERN_C_BEGIN 2087 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2088 EXTERN_C_END 2089 2090 #undef __FUNCT__ 2091 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2092 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 2093 { 2094 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)B->data; 2095 PetscInt m = B->rmap.n/bs,cstart = baij->cstartbs, cend = baij->cendbs,j,nnz,i,d; 2096 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = baij->rstartbs,ii; 2097 const PetscInt *JJ; 2098 PetscScalar *values; 2099 PetscErrorCode ierr; 2100 2101 PetscFunctionBegin; 2102 #if defined(PETSC_OPT_g) 2103 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"Ii[0] must be 0 it is %D",Ii[0]); 2104 #endif 2105 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2106 o_nnz = d_nnz + m; 2107 2108 for (i=0; i<m; i++) { 2109 nnz = Ii[i+1]- Ii[i]; 2110 JJ = J + Ii[i]; 2111 nnz_max = PetscMax(nnz_max,nnz); 2112 #if defined(PETSC_OPT_g) 2113 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2114 #endif 2115 for (j=0; j<nnz; j++) { 2116 if (*JJ >= cstart) break; 2117 JJ++; 2118 } 2119 d = 0; 2120 for (; j<nnz; j++) { 2121 if (*JJ++ >= cend) break; 2122 d++; 2123 } 2124 d_nnz[i] = d; 2125 o_nnz[i] = nnz - d; 2126 } 2127 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2128 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2129 2130 if (v) values = (PetscScalar*)v; 2131 else { 2132 ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2133 ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2134 } 2135 2136 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2137 for (i=0; i<m; i++) { 2138 ii = i + rstart; 2139 nnz = Ii[i+1]- Ii[i]; 2140 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2141 } 2142 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2143 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2144 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2145 2146 if (!v) { 2147 ierr = PetscFree(values);CHKERRQ(ierr); 2148 } 2149 PetscFunctionReturn(0); 2150 } 2151 2152 #undef __FUNCT__ 2153 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2154 /*@C 2155 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2156 (the default parallel PETSc format). 2157 2158 Collective on MPI_Comm 2159 2160 Input Parameters: 2161 + A - the matrix 2162 . i - the indices into j for the start of each local row (starts with zero) 2163 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2164 - v - optional values in the matrix 2165 2166 Level: developer 2167 2168 .keywords: matrix, aij, compressed row, sparse, parallel 2169 2170 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2171 @*/ 2172 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2173 { 2174 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2175 2176 PetscFunctionBegin; 2177 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2178 if (f) { 2179 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2180 } 2181 PetscFunctionReturn(0); 2182 } 2183 2184 EXTERN_C_BEGIN 2185 #undef __FUNCT__ 2186 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2187 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2188 { 2189 Mat_MPIBAIJ *b; 2190 PetscErrorCode ierr; 2191 PetscInt i; 2192 2193 PetscFunctionBegin; 2194 B->preallocated = PETSC_TRUE; 2195 ierr = PetscOptionsBegin(B->comm,B->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2196 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2197 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2198 2199 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2200 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2201 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2202 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2203 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2204 2205 B->rmap.bs = bs; 2206 B->cmap.bs = bs; 2207 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2208 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2209 2210 if (d_nnz) { 2211 for (i=0; i<B->rmap.n/bs; i++) { 2212 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2213 } 2214 } 2215 if (o_nnz) { 2216 for (i=0; i<B->rmap.n/bs; i++) { 2217 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2218 } 2219 } 2220 2221 b = (Mat_MPIBAIJ*)B->data; 2222 b->bs2 = bs*bs; 2223 b->mbs = B->rmap.n/bs; 2224 b->nbs = B->cmap.n/bs; 2225 b->Mbs = B->rmap.N/bs; 2226 b->Nbs = B->cmap.N/bs; 2227 2228 for (i=0; i<=b->size; i++) { 2229 b->rangebs[i] = B->rmap.range[i]/bs; 2230 } 2231 b->rstartbs = B->rmap.rstart/bs; 2232 b->rendbs = B->rmap.rend/bs; 2233 b->cstartbs = B->cmap.rstart/bs; 2234 b->cendbs = B->cmap.rend/bs; 2235 2236 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2237 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 2238 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2239 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2240 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2241 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2242 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 2243 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2244 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2245 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2246 2247 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2248 2249 PetscFunctionReturn(0); 2250 } 2251 EXTERN_C_END 2252 2253 EXTERN_C_BEGIN 2254 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2255 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2256 EXTERN_C_END 2257 2258 /*MC 2259 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2260 2261 Options Database Keys: 2262 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2263 . -mat_block_size <bs> - set the blocksize used to store the matrix 2264 - -mat_use_hash_table <fact> 2265 2266 Level: beginner 2267 2268 .seealso: MatCreateMPIBAIJ 2269 M*/ 2270 2271 EXTERN_C_BEGIN 2272 #undef __FUNCT__ 2273 #define __FUNCT__ "MatCreate_MPIBAIJ" 2274 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2275 { 2276 Mat_MPIBAIJ *b; 2277 PetscErrorCode ierr; 2278 PetscTruth flg; 2279 2280 PetscFunctionBegin; 2281 ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2282 B->data = (void*)b; 2283 2284 2285 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2286 B->mapping = 0; 2287 B->factor = 0; 2288 B->assembled = PETSC_FALSE; 2289 2290 B->insertmode = NOT_SET_VALUES; 2291 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2292 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2293 2294 /* build local table of row and column ownerships */ 2295 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 2296 2297 /* build cache for off array entries formed */ 2298 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2299 b->donotstash = PETSC_FALSE; 2300 b->colmap = PETSC_NULL; 2301 b->garray = PETSC_NULL; 2302 b->roworiented = PETSC_TRUE; 2303 2304 #if defined(PETSC_USE_MAT_SINGLE) 2305 /* stuff for MatSetValues_XXX in single precision */ 2306 b->setvalueslen = 0; 2307 b->setvaluescopy = PETSC_NULL; 2308 #endif 2309 2310 /* stuff used in block assembly */ 2311 b->barray = 0; 2312 2313 /* stuff used for matrix vector multiply */ 2314 b->lvec = 0; 2315 b->Mvctx = 0; 2316 2317 /* stuff for MatGetRow() */ 2318 b->rowindices = 0; 2319 b->rowvalues = 0; 2320 b->getrowactive = PETSC_FALSE; 2321 2322 /* hash table stuff */ 2323 b->ht = 0; 2324 b->hd = 0; 2325 b->ht_size = 0; 2326 b->ht_flag = PETSC_FALSE; 2327 b->ht_fact = 0; 2328 b->ht_total_ct = 0; 2329 b->ht_insert_ct = 0; 2330 2331 ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 2332 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2333 if (flg) { 2334 PetscReal fact = 1.39; 2335 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2336 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 2337 if (fact <= 1.0) fact = 1.39; 2338 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2339 ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2340 } 2341 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2342 2343 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2344 "MatStoreValues_MPIBAIJ", 2345 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2346 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2347 "MatRetrieveValues_MPIBAIJ", 2348 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2349 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2350 "MatGetDiagonalBlock_MPIBAIJ", 2351 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2352 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2353 "MatMPIBAIJSetPreallocation_MPIBAIJ", 2354 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2355 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 2356 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 2357 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 2358 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 2359 "MatDiagonalScaleLocal_MPIBAIJ", 2360 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2361 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 2362 "MatSetHashTableFactor_MPIBAIJ", 2363 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2364 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 2365 PetscFunctionReturn(0); 2366 } 2367 EXTERN_C_END 2368 2369 /*MC 2370 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2371 2372 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2373 and MATMPIBAIJ otherwise. 2374 2375 Options Database Keys: 2376 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2377 2378 Level: beginner 2379 2380 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2381 M*/ 2382 2383 EXTERN_C_BEGIN 2384 #undef __FUNCT__ 2385 #define __FUNCT__ "MatCreate_BAIJ" 2386 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 2387 { 2388 PetscErrorCode ierr; 2389 PetscMPIInt size; 2390 2391 PetscFunctionBegin; 2392 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2393 if (size == 1) { 2394 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2395 } else { 2396 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2397 } 2398 PetscFunctionReturn(0); 2399 } 2400 EXTERN_C_END 2401 2402 #undef __FUNCT__ 2403 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2404 /*@C 2405 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2406 (block compressed row). For good matrix assembly performance 2407 the user should preallocate the matrix storage by setting the parameters 2408 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2409 performance can be increased by more than a factor of 50. 2410 2411 Collective on Mat 2412 2413 Input Parameters: 2414 + A - the matrix 2415 . bs - size of blockk 2416 . d_nz - number of block nonzeros per block row in diagonal portion of local 2417 submatrix (same for all local rows) 2418 . d_nnz - array containing the number of block nonzeros in the various block rows 2419 of the in diagonal portion of the local (possibly different for each block 2420 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2421 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2422 submatrix (same for all local rows). 2423 - o_nnz - array containing the number of nonzeros in the various block rows of the 2424 off-diagonal portion of the local submatrix (possibly different for 2425 each block row) or PETSC_NULL. 2426 2427 If the *_nnz parameter is given then the *_nz parameter is ignored 2428 2429 Options Database Keys: 2430 + -mat_block_size - size of the blocks to use 2431 - -mat_use_hash_table <fact> 2432 2433 Notes: 2434 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2435 than it must be used on all processors that share the object for that argument. 2436 2437 Storage Information: 2438 For a square global matrix we define each processor's diagonal portion 2439 to be its local rows and the corresponding columns (a square submatrix); 2440 each processor's off-diagonal portion encompasses the remainder of the 2441 local matrix (a rectangular submatrix). 2442 2443 The user can specify preallocated storage for the diagonal part of 2444 the local submatrix with either d_nz or d_nnz (not both). Set 2445 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2446 memory allocation. Likewise, specify preallocated storage for the 2447 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2448 2449 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2450 the figure below we depict these three local rows and all columns (0-11). 2451 2452 .vb 2453 0 1 2 3 4 5 6 7 8 9 10 11 2454 ------------------- 2455 row 3 | o o o d d d o o o o o o 2456 row 4 | o o o d d d o o o o o o 2457 row 5 | o o o d d d o o o o o o 2458 ------------------- 2459 .ve 2460 2461 Thus, any entries in the d locations are stored in the d (diagonal) 2462 submatrix, and any entries in the o locations are stored in the 2463 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2464 stored simply in the MATSEQBAIJ format for compressed row storage. 2465 2466 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2467 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2468 In general, for PDE problems in which most nonzeros are near the diagonal, 2469 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2470 or you will get TERRIBLE performance; see the users' manual chapter on 2471 matrices. 2472 2473 Level: intermediate 2474 2475 .keywords: matrix, block, aij, compressed row, sparse, parallel 2476 2477 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 2478 @*/ 2479 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2480 { 2481 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2482 2483 PetscFunctionBegin; 2484 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2485 if (f) { 2486 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2487 } 2488 PetscFunctionReturn(0); 2489 } 2490 2491 #undef __FUNCT__ 2492 #define __FUNCT__ "MatCreateMPIBAIJ" 2493 /*@C 2494 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2495 (block compressed row). For good matrix assembly performance 2496 the user should preallocate the matrix storage by setting the parameters 2497 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2498 performance can be increased by more than a factor of 50. 2499 2500 Collective on MPI_Comm 2501 2502 Input Parameters: 2503 + comm - MPI communicator 2504 . bs - size of blockk 2505 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2506 This value should be the same as the local size used in creating the 2507 y vector for the matrix-vector product y = Ax. 2508 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2509 This value should be the same as the local size used in creating the 2510 x vector for the matrix-vector product y = Ax. 2511 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2512 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2513 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2514 submatrix (same for all local rows) 2515 . d_nnz - array containing the number of nonzero blocks in the various block rows 2516 of the in diagonal portion of the local (possibly different for each block 2517 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2518 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2519 submatrix (same for all local rows). 2520 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2521 off-diagonal portion of the local submatrix (possibly different for 2522 each block row) or PETSC_NULL. 2523 2524 Output Parameter: 2525 . A - the matrix 2526 2527 Options Database Keys: 2528 + -mat_block_size - size of the blocks to use 2529 - -mat_use_hash_table <fact> 2530 2531 Notes: 2532 If the *_nnz parameter is given then the *_nz parameter is ignored 2533 2534 A nonzero block is any block that as 1 or more nonzeros in it 2535 2536 The user MUST specify either the local or global matrix dimensions 2537 (possibly both). 2538 2539 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2540 than it must be used on all processors that share the object for that argument. 2541 2542 Storage Information: 2543 For a square global matrix we define each processor's diagonal portion 2544 to be its local rows and the corresponding columns (a square submatrix); 2545 each processor's off-diagonal portion encompasses the remainder of the 2546 local matrix (a rectangular submatrix). 2547 2548 The user can specify preallocated storage for the diagonal part of 2549 the local submatrix with either d_nz or d_nnz (not both). Set 2550 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2551 memory allocation. Likewise, specify preallocated storage for the 2552 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2553 2554 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2555 the figure below we depict these three local rows and all columns (0-11). 2556 2557 .vb 2558 0 1 2 3 4 5 6 7 8 9 10 11 2559 ------------------- 2560 row 3 | o o o d d d o o o o o o 2561 row 4 | o o o d d d o o o o o o 2562 row 5 | o o o d d d o o o o o o 2563 ------------------- 2564 .ve 2565 2566 Thus, any entries in the d locations are stored in the d (diagonal) 2567 submatrix, and any entries in the o locations are stored in the 2568 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2569 stored simply in the MATSEQBAIJ format for compressed row storage. 2570 2571 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2572 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2573 In general, for PDE problems in which most nonzeros are near the diagonal, 2574 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2575 or you will get TERRIBLE performance; see the users' manual chapter on 2576 matrices. 2577 2578 Level: intermediate 2579 2580 .keywords: matrix, block, aij, compressed row, sparse, parallel 2581 2582 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2583 @*/ 2584 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2585 { 2586 PetscErrorCode ierr; 2587 PetscMPIInt size; 2588 2589 PetscFunctionBegin; 2590 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2591 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2592 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2593 if (size > 1) { 2594 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2595 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2596 } else { 2597 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2598 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2599 } 2600 PetscFunctionReturn(0); 2601 } 2602 2603 #undef __FUNCT__ 2604 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 2605 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2606 { 2607 Mat mat; 2608 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2609 PetscErrorCode ierr; 2610 PetscInt len=0; 2611 2612 PetscFunctionBegin; 2613 *newmat = 0; 2614 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2615 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2616 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2617 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2618 2619 mat->factor = matin->factor; 2620 mat->preallocated = PETSC_TRUE; 2621 mat->assembled = PETSC_TRUE; 2622 mat->insertmode = NOT_SET_VALUES; 2623 2624 a = (Mat_MPIBAIJ*)mat->data; 2625 mat->rmap.bs = matin->rmap.bs; 2626 a->bs2 = oldmat->bs2; 2627 a->mbs = oldmat->mbs; 2628 a->nbs = oldmat->nbs; 2629 a->Mbs = oldmat->Mbs; 2630 a->Nbs = oldmat->Nbs; 2631 2632 ierr = PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2633 ierr = PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2634 2635 a->size = oldmat->size; 2636 a->rank = oldmat->rank; 2637 a->donotstash = oldmat->donotstash; 2638 a->roworiented = oldmat->roworiented; 2639 a->rowindices = 0; 2640 a->rowvalues = 0; 2641 a->getrowactive = PETSC_FALSE; 2642 a->barray = 0; 2643 a->rstartbs = oldmat->rstartbs; 2644 a->rendbs = oldmat->rendbs; 2645 a->cstartbs = oldmat->cstartbs; 2646 a->cendbs = oldmat->cendbs; 2647 2648 /* hash table stuff */ 2649 a->ht = 0; 2650 a->hd = 0; 2651 a->ht_size = 0; 2652 a->ht_flag = oldmat->ht_flag; 2653 a->ht_fact = oldmat->ht_fact; 2654 a->ht_total_ct = 0; 2655 a->ht_insert_ct = 0; 2656 2657 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 2658 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2659 ierr = MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr); 2660 if (oldmat->colmap) { 2661 #if defined (PETSC_USE_CTABLE) 2662 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2663 #else 2664 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2665 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2666 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2667 #endif 2668 } else a->colmap = 0; 2669 2670 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2671 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2672 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2673 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2674 } else a->garray = 0; 2675 2676 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2677 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2678 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2679 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2680 2681 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2682 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2683 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2684 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2685 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2686 *newmat = mat; 2687 2688 PetscFunctionReturn(0); 2689 } 2690 2691 #include "petscsys.h" 2692 2693 #undef __FUNCT__ 2694 #define __FUNCT__ "MatLoad_MPIBAIJ" 2695 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2696 { 2697 Mat A; 2698 PetscErrorCode ierr; 2699 int fd; 2700 PetscInt i,nz,j,rstart,rend; 2701 PetscScalar *vals,*buf; 2702 MPI_Comm comm = ((PetscObject)viewer)->comm; 2703 MPI_Status status; 2704 PetscMPIInt rank,size,maxnz; 2705 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 2706 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 2707 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 2708 PetscMPIInt tag = ((PetscObject)viewer)->tag; 2709 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 2710 PetscInt dcount,kmax,k,nzcount,tmp,mend; 2711 2712 PetscFunctionBegin; 2713 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 2714 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2715 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2716 2717 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2718 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2719 if (!rank) { 2720 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2721 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2722 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2723 } 2724 2725 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2726 M = header[1]; N = header[2]; 2727 2728 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2729 2730 /* 2731 This code adds extra rows to make sure the number of rows is 2732 divisible by the blocksize 2733 */ 2734 Mbs = M/bs; 2735 extra_rows = bs - M + bs*Mbs; 2736 if (extra_rows == bs) extra_rows = 0; 2737 else Mbs++; 2738 if (extra_rows && !rank) { 2739 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2740 } 2741 2742 /* determine ownership of all rows */ 2743 mbs = Mbs/size + ((Mbs % size) > rank); 2744 m = mbs*bs; 2745 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 2746 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2747 2748 /* process 0 needs enough room for process with most rows */ 2749 if (!rank) { 2750 mmax = rowners[1]; 2751 for (i=2; i<size; i++) { 2752 mmax = PetscMax(mmax,rowners[i]); 2753 } 2754 mmax*=bs; 2755 } else mmax = m; 2756 2757 rowners[0] = 0; 2758 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2759 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2760 rstart = rowners[rank]; 2761 rend = rowners[rank+1]; 2762 2763 /* distribute row lengths to all processors */ 2764 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 2765 if (!rank) { 2766 mend = m; 2767 if (size == 1) mend = mend - extra_rows; 2768 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 2769 for (j=mend; j<m; j++) locrowlens[j] = 1; 2770 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2771 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2772 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2773 for (j=0; j<m; j++) { 2774 procsnz[0] += locrowlens[j]; 2775 } 2776 for (i=1; i<size; i++) { 2777 mend = browners[i+1] - browners[i]; 2778 if (i == size-1) mend = mend - extra_rows; 2779 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 2780 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 2781 /* calculate the number of nonzeros on each processor */ 2782 for (j=0; j<browners[i+1]-browners[i]; j++) { 2783 procsnz[i] += rowlengths[j]; 2784 } 2785 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2786 } 2787 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2788 } else { 2789 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2790 } 2791 2792 if (!rank) { 2793 /* determine max buffer needed and allocate it */ 2794 maxnz = procsnz[0]; 2795 for (i=1; i<size; i++) { 2796 maxnz = PetscMax(maxnz,procsnz[i]); 2797 } 2798 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2799 2800 /* read in my part of the matrix column indices */ 2801 nz = procsnz[0]; 2802 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2803 mycols = ibuf; 2804 if (size == 1) nz -= extra_rows; 2805 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2806 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2807 2808 /* read in every ones (except the last) and ship off */ 2809 for (i=1; i<size-1; i++) { 2810 nz = procsnz[i]; 2811 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2812 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2813 } 2814 /* read in the stuff for the last proc */ 2815 if (size != 1) { 2816 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2817 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2818 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2819 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2820 } 2821 ierr = PetscFree(cols);CHKERRQ(ierr); 2822 } else { 2823 /* determine buffer space needed for message */ 2824 nz = 0; 2825 for (i=0; i<m; i++) { 2826 nz += locrowlens[i]; 2827 } 2828 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2829 mycols = ibuf; 2830 /* receive message of column indices*/ 2831 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2832 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2833 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2834 } 2835 2836 /* loop over local rows, determining number of off diagonal entries */ 2837 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2838 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2839 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2840 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2841 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2842 rowcount = 0; nzcount = 0; 2843 for (i=0; i<mbs; i++) { 2844 dcount = 0; 2845 odcount = 0; 2846 for (j=0; j<bs; j++) { 2847 kmax = locrowlens[rowcount]; 2848 for (k=0; k<kmax; k++) { 2849 tmp = mycols[nzcount++]/bs; 2850 if (!mask[tmp]) { 2851 mask[tmp] = 1; 2852 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2853 else masked1[dcount++] = tmp; 2854 } 2855 } 2856 rowcount++; 2857 } 2858 2859 dlens[i] = dcount; 2860 odlens[i] = odcount; 2861 2862 /* zero out the mask elements we set */ 2863 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2864 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2865 } 2866 2867 /* create our matrix */ 2868 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2869 ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2870 ierr = MatSetType(A,type);CHKERRQ(ierr) 2871 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2872 2873 /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */ 2874 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2875 2876 if (!rank) { 2877 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2878 /* read in my part of the matrix numerical values */ 2879 nz = procsnz[0]; 2880 vals = buf; 2881 mycols = ibuf; 2882 if (size == 1) nz -= extra_rows; 2883 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2884 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2885 2886 /* insert into matrix */ 2887 jj = rstart*bs; 2888 for (i=0; i<m; i++) { 2889 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2890 mycols += locrowlens[i]; 2891 vals += locrowlens[i]; 2892 jj++; 2893 } 2894 /* read in other processors (except the last one) and ship out */ 2895 for (i=1; i<size-1; i++) { 2896 nz = procsnz[i]; 2897 vals = buf; 2898 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2899 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2900 } 2901 /* the last proc */ 2902 if (size != 1){ 2903 nz = procsnz[i] - extra_rows; 2904 vals = buf; 2905 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2906 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2907 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2908 } 2909 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2910 } else { 2911 /* receive numeric values */ 2912 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2913 2914 /* receive message of values*/ 2915 vals = buf; 2916 mycols = ibuf; 2917 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2918 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2919 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2920 2921 /* insert into matrix */ 2922 jj = rstart*bs; 2923 for (i=0; i<m; i++) { 2924 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2925 mycols += locrowlens[i]; 2926 vals += locrowlens[i]; 2927 jj++; 2928 } 2929 } 2930 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2931 ierr = PetscFree(buf);CHKERRQ(ierr); 2932 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2933 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2934 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2935 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2936 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2937 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2938 2939 *newmat = A; 2940 PetscFunctionReturn(0); 2941 } 2942 2943 #undef __FUNCT__ 2944 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2945 /*@ 2946 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2947 2948 Input Parameters: 2949 . mat - the matrix 2950 . fact - factor 2951 2952 Collective on Mat 2953 2954 Level: advanced 2955 2956 Notes: 2957 This can also be set by the command line option: -mat_use_hash_table <fact> 2958 2959 .keywords: matrix, hashtable, factor, HT 2960 2961 .seealso: MatSetOption() 2962 @*/ 2963 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2964 { 2965 PetscErrorCode ierr,(*f)(Mat,PetscReal); 2966 2967 PetscFunctionBegin; 2968 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 2969 if (f) { 2970 ierr = (*f)(mat,fact);CHKERRQ(ierr); 2971 } 2972 PetscFunctionReturn(0); 2973 } 2974 2975 EXTERN_C_BEGIN 2976 #undef __FUNCT__ 2977 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 2978 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 2979 { 2980 Mat_MPIBAIJ *baij; 2981 2982 PetscFunctionBegin; 2983 baij = (Mat_MPIBAIJ*)mat->data; 2984 baij->ht_fact = fact; 2985 PetscFunctionReturn(0); 2986 } 2987 EXTERN_C_END 2988 2989 #undef __FUNCT__ 2990 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2991 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2992 { 2993 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2994 PetscFunctionBegin; 2995 *Ad = a->A; 2996 *Ao = a->B; 2997 *colmap = a->garray; 2998 PetscFunctionReturn(0); 2999 } 3000