1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 15 16 /* UGLY, ugly, ugly 17 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 18 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 19 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 20 converts the entries into single precision and then calls ..._MatScalar() to put them 21 into the single precision data structures. 22 */ 23 #if defined(PETSC_USE_MAT_SINGLE) 24 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 25 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 26 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 27 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 28 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 29 #else 30 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 31 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 32 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 33 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 34 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 35 #endif 36 37 #undef __FUNCT__ 38 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 39 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 40 { 41 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 42 PetscErrorCode ierr; 43 PetscInt i,*idxb = 0; 44 PetscScalar *va,*vb; 45 Vec vtmp; 46 47 PetscFunctionBegin; 48 49 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 50 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 51 if (idx) { 52 for (i=0; i<A->cmap.n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap.rstart;} 53 } 54 55 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap.n,&vtmp);CHKERRQ(ierr); 56 if (idx) {ierr = PetscMalloc(A->rmap.n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 57 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 58 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 59 60 for (i=0; i<A->rmap.n; i++){ 61 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap.bs*a->garray[idxb[i]/A->cmap.bs] + (idxb[i] % A->cmap.bs);} 62 } 63 64 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 65 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 66 if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);} 67 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 68 PetscFunctionReturn(0); 69 } 70 71 EXTERN_C_BEGIN 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 74 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 75 { 76 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 77 PetscErrorCode ierr; 78 79 PetscFunctionBegin; 80 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 81 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 82 PetscFunctionReturn(0); 83 } 84 EXTERN_C_END 85 86 EXTERN_C_BEGIN 87 #undef __FUNCT__ 88 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 89 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 90 { 91 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 92 PetscErrorCode ierr; 93 94 PetscFunctionBegin; 95 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 96 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 97 PetscFunctionReturn(0); 98 } 99 EXTERN_C_END 100 101 /* 102 Local utility routine that creates a mapping from the global column 103 number to the local number in the off-diagonal part of the local 104 storage of the matrix. This is done in a non scable way since the 105 length of colmap equals the global matrix length. 106 */ 107 #undef __FUNCT__ 108 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 109 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 110 { 111 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 112 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 113 PetscErrorCode ierr; 114 PetscInt nbs = B->nbs,i,bs=mat->rmap.bs; 115 116 PetscFunctionBegin; 117 #if defined (PETSC_USE_CTABLE) 118 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 119 for (i=0; i<nbs; i++){ 120 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 121 } 122 #else 123 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 124 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 125 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 126 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 127 #endif 128 PetscFunctionReturn(0); 129 } 130 131 #define CHUNKSIZE 10 132 133 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 134 { \ 135 \ 136 brow = row/bs; \ 137 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 138 rmax = aimax[brow]; nrow = ailen[brow]; \ 139 bcol = col/bs; \ 140 ridx = row % bs; cidx = col % bs; \ 141 low = 0; high = nrow; \ 142 while (high-low > 3) { \ 143 t = (low+high)/2; \ 144 if (rp[t] > bcol) high = t; \ 145 else low = t; \ 146 } \ 147 for (_i=low; _i<high; _i++) { \ 148 if (rp[_i] > bcol) break; \ 149 if (rp[_i] == bcol) { \ 150 bap = ap + bs2*_i + bs*cidx + ridx; \ 151 if (addv == ADD_VALUES) *bap += value; \ 152 else *bap = value; \ 153 goto a_noinsert; \ 154 } \ 155 } \ 156 if (a->nonew == 1) goto a_noinsert; \ 157 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 158 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 159 N = nrow++ - 1; \ 160 /* shift up all the later entries in this row */ \ 161 for (ii=N; ii>=_i; ii--) { \ 162 rp[ii+1] = rp[ii]; \ 163 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 164 } \ 165 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 166 rp[_i] = bcol; \ 167 ap[bs2*_i + bs*cidx + ridx] = value; \ 168 a_noinsert:; \ 169 ailen[brow] = nrow; \ 170 } 171 172 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 173 { \ 174 brow = row/bs; \ 175 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 176 rmax = bimax[brow]; nrow = bilen[brow]; \ 177 bcol = col/bs; \ 178 ridx = row % bs; cidx = col % bs; \ 179 low = 0; high = nrow; \ 180 while (high-low > 3) { \ 181 t = (low+high)/2; \ 182 if (rp[t] > bcol) high = t; \ 183 else low = t; \ 184 } \ 185 for (_i=low; _i<high; _i++) { \ 186 if (rp[_i] > bcol) break; \ 187 if (rp[_i] == bcol) { \ 188 bap = ap + bs2*_i + bs*cidx + ridx; \ 189 if (addv == ADD_VALUES) *bap += value; \ 190 else *bap = value; \ 191 goto b_noinsert; \ 192 } \ 193 } \ 194 if (b->nonew == 1) goto b_noinsert; \ 195 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 196 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 197 CHKMEMQ;\ 198 N = nrow++ - 1; \ 199 /* shift up all the later entries in this row */ \ 200 for (ii=N; ii>=_i; ii--) { \ 201 rp[ii+1] = rp[ii]; \ 202 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 203 } \ 204 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 205 rp[_i] = bcol; \ 206 ap[bs2*_i + bs*cidx + ridx] = value; \ 207 b_noinsert:; \ 208 bilen[brow] = nrow; \ 209 } 210 211 #if defined(PETSC_USE_MAT_SINGLE) 212 #undef __FUNCT__ 213 #define __FUNCT__ "MatSetValues_MPIBAIJ" 214 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 215 { 216 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 217 PetscErrorCode ierr; 218 PetscInt i,N = m*n; 219 MatScalar *vsingle; 220 221 PetscFunctionBegin; 222 if (N > b->setvalueslen) { 223 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 224 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 225 b->setvalueslen = N; 226 } 227 vsingle = b->setvaluescopy; 228 229 for (i=0; i<N; i++) { 230 vsingle[i] = v[i]; 231 } 232 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 233 PetscFunctionReturn(0); 234 } 235 236 #undef __FUNCT__ 237 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 238 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 239 { 240 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 241 PetscErrorCode ierr; 242 PetscInt i,N = m*n*b->bs2; 243 MatScalar *vsingle; 244 245 PetscFunctionBegin; 246 if (N > b->setvalueslen) { 247 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 248 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 249 b->setvalueslen = N; 250 } 251 vsingle = b->setvaluescopy; 252 for (i=0; i<N; i++) { 253 vsingle[i] = v[i]; 254 } 255 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 256 PetscFunctionReturn(0); 257 } 258 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 261 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 262 { 263 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 264 PetscErrorCode ierr; 265 PetscInt i,N = m*n; 266 MatScalar *vsingle; 267 268 PetscFunctionBegin; 269 if (N > b->setvalueslen) { 270 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 271 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 272 b->setvalueslen = N; 273 } 274 vsingle = b->setvaluescopy; 275 for (i=0; i<N; i++) { 276 vsingle[i] = v[i]; 277 } 278 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 284 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 285 { 286 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 287 PetscErrorCode ierr; 288 PetscInt i,N = m*n*b->bs2; 289 MatScalar *vsingle; 290 291 PetscFunctionBegin; 292 if (N > b->setvalueslen) { 293 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 294 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 295 b->setvalueslen = N; 296 } 297 vsingle = b->setvaluescopy; 298 for (i=0; i<N; i++) { 299 vsingle[i] = v[i]; 300 } 301 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 302 PetscFunctionReturn(0); 303 } 304 #endif 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 308 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 309 { 310 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 311 MatScalar value; 312 PetscTruth roworiented = baij->roworiented; 313 PetscErrorCode ierr; 314 PetscInt i,j,row,col; 315 PetscInt rstart_orig=mat->rmap.rstart; 316 PetscInt rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart; 317 PetscInt cend_orig=mat->cmap.rend,bs=mat->rmap.bs; 318 319 /* Some Variables required in the macro */ 320 Mat A = baij->A; 321 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 322 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 323 MatScalar *aa=a->a; 324 325 Mat B = baij->B; 326 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 327 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 328 MatScalar *ba=b->a; 329 330 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 331 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 332 MatScalar *ap,*bap; 333 334 PetscFunctionBegin; 335 for (i=0; i<m; i++) { 336 if (im[i] < 0) continue; 337 #if defined(PETSC_USE_DEBUG) 338 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 339 #endif 340 if (im[i] >= rstart_orig && im[i] < rend_orig) { 341 row = im[i] - rstart_orig; 342 for (j=0; j<n; j++) { 343 if (in[j] >= cstart_orig && in[j] < cend_orig){ 344 col = in[j] - cstart_orig; 345 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 346 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 347 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 348 } else if (in[j] < 0) continue; 349 #if defined(PETSC_USE_DEBUG) 350 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap.N-1);} 351 #endif 352 else { 353 if (mat->was_assembled) { 354 if (!baij->colmap) { 355 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 356 } 357 #if defined (PETSC_USE_CTABLE) 358 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 359 col = col - 1; 360 #else 361 col = baij->colmap[in[j]/bs] - 1; 362 #endif 363 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 364 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 365 col = in[j]; 366 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 367 B = baij->B; 368 b = (Mat_SeqBAIJ*)(B)->data; 369 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 370 ba=b->a; 371 } else col += in[j]%bs; 372 } else col = in[j]; 373 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 374 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 375 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 376 } 377 } 378 } else { 379 if (!baij->donotstash) { 380 if (roworiented) { 381 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 382 } else { 383 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 384 } 385 } 386 } 387 } 388 PetscFunctionReturn(0); 389 } 390 391 #undef __FUNCT__ 392 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 393 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 394 { 395 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 396 const MatScalar *value; 397 MatScalar *barray=baij->barray; 398 PetscTruth roworiented = baij->roworiented; 399 PetscErrorCode ierr; 400 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 401 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 402 PetscInt cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2; 403 404 PetscFunctionBegin; 405 if(!barray) { 406 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 407 baij->barray = barray; 408 } 409 410 if (roworiented) { 411 stepval = (n-1)*bs; 412 } else { 413 stepval = (m-1)*bs; 414 } 415 for (i=0; i<m; i++) { 416 if (im[i] < 0) continue; 417 #if defined(PETSC_USE_DEBUG) 418 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 419 #endif 420 if (im[i] >= rstart && im[i] < rend) { 421 row = im[i] - rstart; 422 for (j=0; j<n; j++) { 423 /* If NumCol = 1 then a copy is not required */ 424 if ((roworiented) && (n == 1)) { 425 barray = (MatScalar*)v + i*bs2; 426 } else if((!roworiented) && (m == 1)) { 427 barray = (MatScalar*)v + j*bs2; 428 } else { /* Here a copy is required */ 429 if (roworiented) { 430 value = v + i*(stepval+bs)*bs + j*bs; 431 } else { 432 value = v + j*(stepval+bs)*bs + i*bs; 433 } 434 for (ii=0; ii<bs; ii++,value+=stepval) { 435 for (jj=0; jj<bs; jj++) { 436 *barray++ = *value++; 437 } 438 } 439 barray -=bs2; 440 } 441 442 if (in[j] >= cstart && in[j] < cend){ 443 col = in[j] - cstart; 444 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 445 } 446 else if (in[j] < 0) continue; 447 #if defined(PETSC_USE_DEBUG) 448 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 449 #endif 450 else { 451 if (mat->was_assembled) { 452 if (!baij->colmap) { 453 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 454 } 455 456 #if defined(PETSC_USE_DEBUG) 457 #if defined (PETSC_USE_CTABLE) 458 { PetscInt data; 459 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 460 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 461 } 462 #else 463 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 464 #endif 465 #endif 466 #if defined (PETSC_USE_CTABLE) 467 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 468 col = (col - 1)/bs; 469 #else 470 col = (baij->colmap[in[j]] - 1)/bs; 471 #endif 472 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 473 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 474 col = in[j]; 475 } 476 } 477 else col = in[j]; 478 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 479 } 480 } 481 } else { 482 if (!baij->donotstash) { 483 if (roworiented) { 484 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 485 } else { 486 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 487 } 488 } 489 } 490 } 491 PetscFunctionReturn(0); 492 } 493 494 #define HASH_KEY 0.6180339887 495 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 496 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 497 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 498 #undef __FUNCT__ 499 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 500 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 501 { 502 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 503 PetscTruth roworiented = baij->roworiented; 504 PetscErrorCode ierr; 505 PetscInt i,j,row,col; 506 PetscInt rstart_orig=mat->rmap.rstart; 507 PetscInt rend_orig=mat->rmap.rend,Nbs=baij->Nbs; 508 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap.bs,*HT=baij->ht,idx; 509 PetscReal tmp; 510 MatScalar **HD = baij->hd,value; 511 #if defined(PETSC_USE_DEBUG) 512 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 513 #endif 514 515 PetscFunctionBegin; 516 517 for (i=0; i<m; i++) { 518 #if defined(PETSC_USE_DEBUG) 519 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 520 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 521 #endif 522 row = im[i]; 523 if (row >= rstart_orig && row < rend_orig) { 524 for (j=0; j<n; j++) { 525 col = in[j]; 526 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 527 /* Look up PetscInto the Hash Table */ 528 key = (row/bs)*Nbs+(col/bs)+1; 529 h1 = HASH(size,key,tmp); 530 531 532 idx = h1; 533 #if defined(PETSC_USE_DEBUG) 534 insert_ct++; 535 total_ct++; 536 if (HT[idx] != key) { 537 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 538 if (idx == size) { 539 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 540 if (idx == h1) { 541 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 542 } 543 } 544 } 545 #else 546 if (HT[idx] != key) { 547 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 548 if (idx == size) { 549 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 550 if (idx == h1) { 551 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 552 } 553 } 554 } 555 #endif 556 /* A HASH table entry is found, so insert the values at the correct address */ 557 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 558 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 559 } 560 } else { 561 if (!baij->donotstash) { 562 if (roworiented) { 563 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 564 } else { 565 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 566 } 567 } 568 } 569 } 570 #if defined(PETSC_USE_DEBUG) 571 baij->ht_total_ct = total_ct; 572 baij->ht_insert_ct = insert_ct; 573 #endif 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 579 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 580 { 581 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 582 PetscTruth roworiented = baij->roworiented; 583 PetscErrorCode ierr; 584 PetscInt i,j,ii,jj,row,col; 585 PetscInt rstart=baij->rstartbs; 586 PetscInt rend=mat->rmap.rend,stepval,bs=mat->rmap.bs,bs2=baij->bs2; 587 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 588 PetscReal tmp; 589 MatScalar **HD = baij->hd,*baij_a; 590 const MatScalar *v_t,*value; 591 #if defined(PETSC_USE_DEBUG) 592 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 593 #endif 594 595 PetscFunctionBegin; 596 597 if (roworiented) { 598 stepval = (n-1)*bs; 599 } else { 600 stepval = (m-1)*bs; 601 } 602 for (i=0; i<m; i++) { 603 #if defined(PETSC_USE_DEBUG) 604 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 605 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 606 #endif 607 row = im[i]; 608 v_t = v + i*bs2; 609 if (row >= rstart && row < rend) { 610 for (j=0; j<n; j++) { 611 col = in[j]; 612 613 /* Look up into the Hash Table */ 614 key = row*Nbs+col+1; 615 h1 = HASH(size,key,tmp); 616 617 idx = h1; 618 #if defined(PETSC_USE_DEBUG) 619 total_ct++; 620 insert_ct++; 621 if (HT[idx] != key) { 622 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 623 if (idx == size) { 624 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 625 if (idx == h1) { 626 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 627 } 628 } 629 } 630 #else 631 if (HT[idx] != key) { 632 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 633 if (idx == size) { 634 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 635 if (idx == h1) { 636 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 637 } 638 } 639 } 640 #endif 641 baij_a = HD[idx]; 642 if (roworiented) { 643 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 644 /* value = v + (i*(stepval+bs)+j)*bs; */ 645 value = v_t; 646 v_t += bs; 647 if (addv == ADD_VALUES) { 648 for (ii=0; ii<bs; ii++,value+=stepval) { 649 for (jj=ii; jj<bs2; jj+=bs) { 650 baij_a[jj] += *value++; 651 } 652 } 653 } else { 654 for (ii=0; ii<bs; ii++,value+=stepval) { 655 for (jj=ii; jj<bs2; jj+=bs) { 656 baij_a[jj] = *value++; 657 } 658 } 659 } 660 } else { 661 value = v + j*(stepval+bs)*bs + i*bs; 662 if (addv == ADD_VALUES) { 663 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 664 for (jj=0; jj<bs; jj++) { 665 baij_a[jj] += *value++; 666 } 667 } 668 } else { 669 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 670 for (jj=0; jj<bs; jj++) { 671 baij_a[jj] = *value++; 672 } 673 } 674 } 675 } 676 } 677 } else { 678 if (!baij->donotstash) { 679 if (roworiented) { 680 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 681 } else { 682 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 683 } 684 } 685 } 686 } 687 #if defined(PETSC_USE_DEBUG) 688 baij->ht_total_ct = total_ct; 689 baij->ht_insert_ct = insert_ct; 690 #endif 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatGetValues_MPIBAIJ" 696 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 697 { 698 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 699 PetscErrorCode ierr; 700 PetscInt bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend; 701 PetscInt bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data; 702 703 PetscFunctionBegin; 704 for (i=0; i<m; i++) { 705 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 706 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 707 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 708 row = idxm[i] - bsrstart; 709 for (j=0; j<n; j++) { 710 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 711 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 712 if (idxn[j] >= bscstart && idxn[j] < bscend){ 713 col = idxn[j] - bscstart; 714 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 715 } else { 716 if (!baij->colmap) { 717 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 718 } 719 #if defined (PETSC_USE_CTABLE) 720 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 721 data --; 722 #else 723 data = baij->colmap[idxn[j]/bs]-1; 724 #endif 725 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 726 else { 727 col = data + idxn[j]%bs; 728 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 729 } 730 } 731 } 732 } else { 733 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 734 } 735 } 736 PetscFunctionReturn(0); 737 } 738 739 #undef __FUNCT__ 740 #define __FUNCT__ "MatNorm_MPIBAIJ" 741 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 742 { 743 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 744 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 745 PetscErrorCode ierr; 746 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap.bs,nz,row,col; 747 PetscReal sum = 0.0; 748 MatScalar *v; 749 750 PetscFunctionBegin; 751 if (baij->size == 1) { 752 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 753 } else { 754 if (type == NORM_FROBENIUS) { 755 v = amat->a; 756 nz = amat->nz*bs2; 757 for (i=0; i<nz; i++) { 758 #if defined(PETSC_USE_COMPLEX) 759 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 760 #else 761 sum += (*v)*(*v); v++; 762 #endif 763 } 764 v = bmat->a; 765 nz = bmat->nz*bs2; 766 for (i=0; i<nz; i++) { 767 #if defined(PETSC_USE_COMPLEX) 768 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 769 #else 770 sum += (*v)*(*v); v++; 771 #endif 772 } 773 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 774 *nrm = sqrt(*nrm); 775 } else if (type == NORM_1) { /* max column sum */ 776 PetscReal *tmp,*tmp2; 777 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 778 ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 779 tmp2 = tmp + mat->cmap.N; 780 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 781 v = amat->a; jj = amat->j; 782 for (i=0; i<amat->nz; i++) { 783 for (j=0; j<bs; j++){ 784 col = bs*(cstart + *jj) + j; /* column index */ 785 for (row=0; row<bs; row++){ 786 tmp[col] += PetscAbsScalar(*v); v++; 787 } 788 } 789 jj++; 790 } 791 v = bmat->a; jj = bmat->j; 792 for (i=0; i<bmat->nz; i++) { 793 for (j=0; j<bs; j++){ 794 col = bs*garray[*jj] + j; 795 for (row=0; row<bs; row++){ 796 tmp[col] += PetscAbsScalar(*v); v++; 797 } 798 } 799 jj++; 800 } 801 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 802 *nrm = 0.0; 803 for (j=0; j<mat->cmap.N; j++) { 804 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 805 } 806 ierr = PetscFree(tmp);CHKERRQ(ierr); 807 } else if (type == NORM_INFINITY) { /* max row sum */ 808 PetscReal *sums; 809 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr) 810 sum = 0.0; 811 for (j=0; j<amat->mbs; j++) { 812 for (row=0; row<bs; row++) sums[row] = 0.0; 813 v = amat->a + bs2*amat->i[j]; 814 nz = amat->i[j+1]-amat->i[j]; 815 for (i=0; i<nz; i++) { 816 for (col=0; col<bs; col++){ 817 for (row=0; row<bs; row++){ 818 sums[row] += PetscAbsScalar(*v); v++; 819 } 820 } 821 } 822 v = bmat->a + bs2*bmat->i[j]; 823 nz = bmat->i[j+1]-bmat->i[j]; 824 for (i=0; i<nz; i++) { 825 for (col=0; col<bs; col++){ 826 for (row=0; row<bs; row++){ 827 sums[row] += PetscAbsScalar(*v); v++; 828 } 829 } 830 } 831 for (row=0; row<bs; row++){ 832 if (sums[row] > sum) sum = sums[row]; 833 } 834 } 835 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 836 ierr = PetscFree(sums);CHKERRQ(ierr); 837 } else { 838 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 839 } 840 } 841 PetscFunctionReturn(0); 842 } 843 844 /* 845 Creates the hash table, and sets the table 846 This table is created only once. 847 If new entried need to be added to the matrix 848 then the hash table has to be destroyed and 849 recreated. 850 */ 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 853 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 854 { 855 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 856 Mat A = baij->A,B=baij->B; 857 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 858 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 859 PetscErrorCode ierr; 860 PetscInt size,bs2=baij->bs2,rstart=baij->rstartbs; 861 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 862 PetscInt *HT,key; 863 MatScalar **HD; 864 PetscReal tmp; 865 #if defined(PETSC_USE_INFO) 866 PetscInt ct=0,max=0; 867 #endif 868 869 PetscFunctionBegin; 870 baij->ht_size=(PetscInt)(factor*nz); 871 size = baij->ht_size; 872 873 if (baij->ht) { 874 PetscFunctionReturn(0); 875 } 876 877 /* Allocate Memory for Hash Table */ 878 ierr = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 879 baij->ht = (PetscInt*)(baij->hd + size); 880 HD = baij->hd; 881 HT = baij->ht; 882 883 884 ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr); 885 886 887 /* Loop Over A */ 888 for (i=0; i<a->mbs; i++) { 889 for (j=ai[i]; j<ai[i+1]; j++) { 890 row = i+rstart; 891 col = aj[j]+cstart; 892 893 key = row*Nbs + col + 1; 894 h1 = HASH(size,key,tmp); 895 for (k=0; k<size; k++){ 896 if (!HT[(h1+k)%size]) { 897 HT[(h1+k)%size] = key; 898 HD[(h1+k)%size] = a->a + j*bs2; 899 break; 900 #if defined(PETSC_USE_INFO) 901 } else { 902 ct++; 903 #endif 904 } 905 } 906 #if defined(PETSC_USE_INFO) 907 if (k> max) max = k; 908 #endif 909 } 910 } 911 /* Loop Over B */ 912 for (i=0; i<b->mbs; i++) { 913 for (j=bi[i]; j<bi[i+1]; j++) { 914 row = i+rstart; 915 col = garray[bj[j]]; 916 key = row*Nbs + col + 1; 917 h1 = HASH(size,key,tmp); 918 for (k=0; k<size; k++){ 919 if (!HT[(h1+k)%size]) { 920 HT[(h1+k)%size] = key; 921 HD[(h1+k)%size] = b->a + j*bs2; 922 break; 923 #if defined(PETSC_USE_INFO) 924 } else { 925 ct++; 926 #endif 927 } 928 } 929 #if defined(PETSC_USE_INFO) 930 if (k> max) max = k; 931 #endif 932 } 933 } 934 935 /* Print Summary */ 936 #if defined(PETSC_USE_INFO) 937 for (i=0,j=0; i<size; i++) { 938 if (HT[i]) {j++;} 939 } 940 ierr = PetscInfo2(0,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 941 #endif 942 PetscFunctionReturn(0); 943 } 944 945 #undef __FUNCT__ 946 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 947 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 948 { 949 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 950 PetscErrorCode ierr; 951 PetscInt nstash,reallocs; 952 InsertMode addv; 953 954 PetscFunctionBegin; 955 if (baij->donotstash) { 956 PetscFunctionReturn(0); 957 } 958 959 /* make sure all processors are either in INSERTMODE or ADDMODE */ 960 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 961 if (addv == (ADD_VALUES|INSERT_VALUES)) { 962 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 963 } 964 mat->insertmode = addv; /* in case this processor had no cache */ 965 966 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 967 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr); 968 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 969 ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 970 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 971 ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 972 PetscFunctionReturn(0); 973 } 974 975 #undef __FUNCT__ 976 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 977 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 978 { 979 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 980 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 981 PetscErrorCode ierr; 982 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 983 PetscInt *row,*col,other_disassembled; 984 PetscTruth r1,r2,r3; 985 MatScalar *val; 986 InsertMode addv = mat->insertmode; 987 PetscMPIInt n; 988 989 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 990 PetscFunctionBegin; 991 if (!baij->donotstash) { 992 while (1) { 993 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 994 if (!flg) break; 995 996 for (i=0; i<n;) { 997 /* Now identify the consecutive vals belonging to the same row */ 998 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 999 if (j < n) ncols = j-i; 1000 else ncols = n-i; 1001 /* Now assemble all these values with a single function call */ 1002 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 1003 i = j; 1004 } 1005 } 1006 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1007 /* Now process the block-stash. Since the values are stashed column-oriented, 1008 set the roworiented flag to column oriented, and after MatSetValues() 1009 restore the original flags */ 1010 r1 = baij->roworiented; 1011 r2 = a->roworiented; 1012 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 1013 baij->roworiented = PETSC_FALSE; 1014 a->roworiented = PETSC_FALSE; 1015 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 1016 while (1) { 1017 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1018 if (!flg) break; 1019 1020 for (i=0; i<n;) { 1021 /* Now identify the consecutive vals belonging to the same row */ 1022 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1023 if (j < n) ncols = j-i; 1024 else ncols = n-i; 1025 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1026 i = j; 1027 } 1028 } 1029 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1030 baij->roworiented = r1; 1031 a->roworiented = r2; 1032 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 1033 } 1034 1035 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1036 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1037 1038 /* determine if any processor has disassembled, if so we must 1039 also disassemble ourselfs, in order that we may reassemble. */ 1040 /* 1041 if nonzero structure of submatrix B cannot change then we know that 1042 no processor disassembled thus we can skip this stuff 1043 */ 1044 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1045 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1046 if (mat->was_assembled && !other_disassembled) { 1047 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1048 } 1049 } 1050 1051 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1052 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1053 } 1054 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 1055 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1056 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1057 1058 #if defined(PETSC_USE_INFO) 1059 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1060 ierr = PetscInfo1(0,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 1061 baij->ht_total_ct = 0; 1062 baij->ht_insert_ct = 0; 1063 } 1064 #endif 1065 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1066 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1067 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1068 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1069 } 1070 1071 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1072 baij->rowvalues = 0; 1073 PetscFunctionReturn(0); 1074 } 1075 1076 #undef __FUNCT__ 1077 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1078 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1079 { 1080 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1081 PetscErrorCode ierr; 1082 PetscMPIInt size = baij->size,rank = baij->rank; 1083 PetscInt bs = mat->rmap.bs; 1084 PetscTruth iascii,isdraw; 1085 PetscViewer sviewer; 1086 PetscViewerFormat format; 1087 1088 PetscFunctionBegin; 1089 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1090 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1091 if (iascii) { 1092 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1093 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1094 MatInfo info; 1095 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1096 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1097 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 1098 rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 1099 mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr); 1100 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1101 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1102 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1103 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1104 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1105 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1106 PetscFunctionReturn(0); 1107 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1108 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1109 PetscFunctionReturn(0); 1110 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1111 PetscFunctionReturn(0); 1112 } 1113 } 1114 1115 if (isdraw) { 1116 PetscDraw draw; 1117 PetscTruth isnull; 1118 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1119 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1120 } 1121 1122 if (size == 1) { 1123 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 1124 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1125 } else { 1126 /* assemble the entire matrix onto first processor. */ 1127 Mat A; 1128 Mat_SeqBAIJ *Aloc; 1129 PetscInt M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1130 MatScalar *a; 1131 1132 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1133 /* Perhaps this should be the type of mat? */ 1134 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 1135 if (!rank) { 1136 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1137 } else { 1138 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1139 } 1140 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1141 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1142 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1143 1144 /* copy over the A part */ 1145 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1146 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1147 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1148 1149 for (i=0; i<mbs; i++) { 1150 rvals[0] = bs*(baij->rstartbs + i); 1151 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1152 for (j=ai[i]; j<ai[i+1]; j++) { 1153 col = (baij->cstartbs+aj[j])*bs; 1154 for (k=0; k<bs; k++) { 1155 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1156 col++; a += bs; 1157 } 1158 } 1159 } 1160 /* copy over the B part */ 1161 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1162 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1163 for (i=0; i<mbs; i++) { 1164 rvals[0] = bs*(baij->rstartbs + i); 1165 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1166 for (j=ai[i]; j<ai[i+1]; j++) { 1167 col = baij->garray[aj[j]]*bs; 1168 for (k=0; k<bs; k++) { 1169 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1170 col++; a += bs; 1171 } 1172 } 1173 } 1174 ierr = PetscFree(rvals);CHKERRQ(ierr); 1175 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1176 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1177 /* 1178 Everyone has to call to draw the matrix since the graphics waits are 1179 synchronized across all processors that share the PetscDraw object 1180 */ 1181 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1182 if (!rank) { 1183 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1184 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1185 } 1186 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1187 ierr = MatDestroy(A);CHKERRQ(ierr); 1188 } 1189 PetscFunctionReturn(0); 1190 } 1191 1192 #undef __FUNCT__ 1193 #define __FUNCT__ "MatView_MPIBAIJ" 1194 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1195 { 1196 PetscErrorCode ierr; 1197 PetscTruth iascii,isdraw,issocket,isbinary; 1198 1199 PetscFunctionBegin; 1200 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1201 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1202 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1203 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1204 if (iascii || isdraw || issocket || isbinary) { 1205 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1206 } else { 1207 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1208 } 1209 PetscFunctionReturn(0); 1210 } 1211 1212 #undef __FUNCT__ 1213 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1214 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1215 { 1216 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1217 PetscErrorCode ierr; 1218 1219 PetscFunctionBegin; 1220 #if defined(PETSC_USE_LOG) 1221 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N); 1222 #endif 1223 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1224 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1225 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1226 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1227 #if defined (PETSC_USE_CTABLE) 1228 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1229 #else 1230 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1231 #endif 1232 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1233 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1234 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1235 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1236 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1237 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1238 #if defined(PETSC_USE_MAT_SINGLE) 1239 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 1240 #endif 1241 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1242 ierr = PetscFree(baij);CHKERRQ(ierr); 1243 1244 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1245 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1246 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1247 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1248 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1249 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1250 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1251 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatMult_MPIBAIJ" 1257 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1258 { 1259 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1260 PetscErrorCode ierr; 1261 PetscInt nt; 1262 1263 PetscFunctionBegin; 1264 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1265 if (nt != A->cmap.n) { 1266 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1267 } 1268 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1269 if (nt != A->rmap.n) { 1270 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1271 } 1272 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1273 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1274 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1275 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1276 PetscFunctionReturn(0); 1277 } 1278 1279 #undef __FUNCT__ 1280 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1281 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1282 { 1283 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1284 PetscErrorCode ierr; 1285 1286 PetscFunctionBegin; 1287 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1288 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1289 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1290 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1291 PetscFunctionReturn(0); 1292 } 1293 1294 #undef __FUNCT__ 1295 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1296 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1297 { 1298 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1299 PetscErrorCode ierr; 1300 PetscTruth merged; 1301 1302 PetscFunctionBegin; 1303 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1304 /* do nondiagonal part */ 1305 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1306 if (!merged) { 1307 /* send it on its way */ 1308 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1309 /* do local part */ 1310 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1311 /* receive remote parts: note this assumes the values are not actually */ 1312 /* inserted in yy until the next line */ 1313 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1314 } else { 1315 /* do local part */ 1316 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1317 /* send it on its way */ 1318 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1319 /* values actually were received in the Begin() but we need to call this nop */ 1320 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1321 } 1322 PetscFunctionReturn(0); 1323 } 1324 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1327 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1328 { 1329 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1330 PetscErrorCode ierr; 1331 1332 PetscFunctionBegin; 1333 /* do nondiagonal part */ 1334 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1335 /* send it on its way */ 1336 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1337 /* do local part */ 1338 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1339 /* receive remote parts: note this assumes the values are not actually */ 1340 /* inserted in yy until the next line, which is true for my implementation*/ 1341 /* but is not perhaps always true. */ 1342 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1343 PetscFunctionReturn(0); 1344 } 1345 1346 /* 1347 This only works correctly for square matrices where the subblock A->A is the 1348 diagonal block 1349 */ 1350 #undef __FUNCT__ 1351 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1352 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1353 { 1354 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1355 PetscErrorCode ierr; 1356 1357 PetscFunctionBegin; 1358 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1359 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1360 PetscFunctionReturn(0); 1361 } 1362 1363 #undef __FUNCT__ 1364 #define __FUNCT__ "MatScale_MPIBAIJ" 1365 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1366 { 1367 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1368 PetscErrorCode ierr; 1369 1370 PetscFunctionBegin; 1371 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1372 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1373 PetscFunctionReturn(0); 1374 } 1375 1376 #undef __FUNCT__ 1377 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1378 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1379 { 1380 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1381 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1382 PetscErrorCode ierr; 1383 PetscInt bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1384 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend; 1385 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1386 1387 PetscFunctionBegin; 1388 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1389 mat->getrowactive = PETSC_TRUE; 1390 1391 if (!mat->rowvalues && (idx || v)) { 1392 /* 1393 allocate enough space to hold information from the longest row. 1394 */ 1395 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1396 PetscInt max = 1,mbs = mat->mbs,tmp; 1397 for (i=0; i<mbs; i++) { 1398 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1399 if (max < tmp) { max = tmp; } 1400 } 1401 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1402 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1403 } 1404 1405 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1406 lrow = row - brstart; 1407 1408 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1409 if (!v) {pvA = 0; pvB = 0;} 1410 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1411 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1412 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1413 nztot = nzA + nzB; 1414 1415 cmap = mat->garray; 1416 if (v || idx) { 1417 if (nztot) { 1418 /* Sort by increasing column numbers, assuming A and B already sorted */ 1419 PetscInt imark = -1; 1420 if (v) { 1421 *v = v_p = mat->rowvalues; 1422 for (i=0; i<nzB; i++) { 1423 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1424 else break; 1425 } 1426 imark = i; 1427 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1428 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1429 } 1430 if (idx) { 1431 *idx = idx_p = mat->rowindices; 1432 if (imark > -1) { 1433 for (i=0; i<imark; i++) { 1434 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1435 } 1436 } else { 1437 for (i=0; i<nzB; i++) { 1438 if (cmap[cworkB[i]/bs] < cstart) 1439 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1440 else break; 1441 } 1442 imark = i; 1443 } 1444 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1445 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1446 } 1447 } else { 1448 if (idx) *idx = 0; 1449 if (v) *v = 0; 1450 } 1451 } 1452 *nz = nztot; 1453 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1454 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1455 PetscFunctionReturn(0); 1456 } 1457 1458 #undef __FUNCT__ 1459 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1460 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1461 { 1462 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1463 1464 PetscFunctionBegin; 1465 if (!baij->getrowactive) { 1466 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1467 } 1468 baij->getrowactive = PETSC_FALSE; 1469 PetscFunctionReturn(0); 1470 } 1471 1472 #undef __FUNCT__ 1473 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1474 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1475 { 1476 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1477 PetscErrorCode ierr; 1478 1479 PetscFunctionBegin; 1480 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1481 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } 1484 1485 #undef __FUNCT__ 1486 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1487 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1488 { 1489 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1490 Mat A = a->A,B = a->B; 1491 PetscErrorCode ierr; 1492 PetscReal isend[5],irecv[5]; 1493 1494 PetscFunctionBegin; 1495 info->block_size = (PetscReal)matin->rmap.bs; 1496 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1497 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1498 isend[3] = info->memory; isend[4] = info->mallocs; 1499 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1500 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1501 isend[3] += info->memory; isend[4] += info->mallocs; 1502 if (flag == MAT_LOCAL) { 1503 info->nz_used = isend[0]; 1504 info->nz_allocated = isend[1]; 1505 info->nz_unneeded = isend[2]; 1506 info->memory = isend[3]; 1507 info->mallocs = isend[4]; 1508 } else if (flag == MAT_GLOBAL_MAX) { 1509 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1510 info->nz_used = irecv[0]; 1511 info->nz_allocated = irecv[1]; 1512 info->nz_unneeded = irecv[2]; 1513 info->memory = irecv[3]; 1514 info->mallocs = irecv[4]; 1515 } else if (flag == MAT_GLOBAL_SUM) { 1516 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1517 info->nz_used = irecv[0]; 1518 info->nz_allocated = irecv[1]; 1519 info->nz_unneeded = irecv[2]; 1520 info->memory = irecv[3]; 1521 info->mallocs = irecv[4]; 1522 } else { 1523 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1524 } 1525 info->rows_global = (PetscReal)A->rmap.N; 1526 info->columns_global = (PetscReal)A->cmap.N; 1527 info->rows_local = (PetscReal)A->rmap.N; 1528 info->columns_local = (PetscReal)A->cmap.N; 1529 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1530 info->fill_ratio_needed = 0; 1531 info->factor_mallocs = 0; 1532 PetscFunctionReturn(0); 1533 } 1534 1535 #undef __FUNCT__ 1536 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1537 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op) 1538 { 1539 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1540 PetscErrorCode ierr; 1541 1542 PetscFunctionBegin; 1543 switch (op) { 1544 case MAT_NO_NEW_NONZERO_LOCATIONS: 1545 case MAT_YES_NEW_NONZERO_LOCATIONS: 1546 case MAT_COLUMNS_UNSORTED: 1547 case MAT_COLUMNS_SORTED: 1548 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1549 case MAT_KEEP_ZEROED_ROWS: 1550 case MAT_NEW_NONZERO_LOCATION_ERR: 1551 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1552 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1553 break; 1554 case MAT_ROW_ORIENTED: 1555 a->roworiented = PETSC_TRUE; 1556 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1557 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1558 break; 1559 case MAT_ROWS_SORTED: 1560 case MAT_ROWS_UNSORTED: 1561 case MAT_YES_NEW_DIAGONALS: 1562 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1563 break; 1564 case MAT_COLUMN_ORIENTED: 1565 a->roworiented = PETSC_FALSE; 1566 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1567 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1568 break; 1569 case MAT_IGNORE_OFF_PROC_ENTRIES: 1570 a->donotstash = PETSC_TRUE; 1571 break; 1572 case MAT_NO_NEW_DIAGONALS: 1573 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1574 case MAT_USE_HASH_TABLE: 1575 a->ht_flag = PETSC_TRUE; 1576 break; 1577 case MAT_SYMMETRIC: 1578 case MAT_STRUCTURALLY_SYMMETRIC: 1579 case MAT_HERMITIAN: 1580 case MAT_SYMMETRY_ETERNAL: 1581 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1582 break; 1583 case MAT_NOT_SYMMETRIC: 1584 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1585 case MAT_NOT_HERMITIAN: 1586 case MAT_NOT_SYMMETRY_ETERNAL: 1587 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1588 break; 1589 default: 1590 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1591 } 1592 PetscFunctionReturn(0); 1593 } 1594 1595 #undef __FUNCT__ 1596 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1597 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1598 { 1599 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1600 Mat_SeqBAIJ *Aloc; 1601 Mat B; 1602 PetscErrorCode ierr; 1603 PetscInt M=A->rmap.N,N=A->cmap.N,*ai,*aj,i,*rvals,j,k,col; 1604 PetscInt bs=A->rmap.bs,mbs=baij->mbs; 1605 MatScalar *a; 1606 1607 PetscFunctionBegin; 1608 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1609 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1610 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1611 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1612 ierr = MatMPIBAIJSetPreallocation(B,A->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1613 1614 /* copy over the A part */ 1615 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1616 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1617 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1618 1619 for (i=0; i<mbs; i++) { 1620 rvals[0] = bs*(baij->rstartbs + i); 1621 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1622 for (j=ai[i]; j<ai[i+1]; j++) { 1623 col = (baij->cstartbs+aj[j])*bs; 1624 for (k=0; k<bs; k++) { 1625 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1626 col++; a += bs; 1627 } 1628 } 1629 } 1630 /* copy over the B part */ 1631 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1632 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1633 for (i=0; i<mbs; i++) { 1634 rvals[0] = bs*(baij->rstartbs + i); 1635 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1636 for (j=ai[i]; j<ai[i+1]; j++) { 1637 col = baij->garray[aj[j]]*bs; 1638 for (k=0; k<bs; k++) { 1639 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1640 col++; a += bs; 1641 } 1642 } 1643 } 1644 ierr = PetscFree(rvals);CHKERRQ(ierr); 1645 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1646 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1647 1648 if (matout) { 1649 *matout = B; 1650 } else { 1651 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1652 } 1653 PetscFunctionReturn(0); 1654 } 1655 1656 #undef __FUNCT__ 1657 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1658 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1659 { 1660 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1661 Mat a = baij->A,b = baij->B; 1662 PetscErrorCode ierr; 1663 PetscInt s1,s2,s3; 1664 1665 PetscFunctionBegin; 1666 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1667 if (rr) { 1668 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1669 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1670 /* Overlap communication with computation. */ 1671 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1672 } 1673 if (ll) { 1674 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1675 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1676 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1677 } 1678 /* scale the diagonal block */ 1679 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1680 1681 if (rr) { 1682 /* Do a scatter end and then right scale the off-diagonal block */ 1683 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1684 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1685 } 1686 1687 PetscFunctionReturn(0); 1688 } 1689 1690 #undef __FUNCT__ 1691 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1692 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 1693 { 1694 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1695 PetscErrorCode ierr; 1696 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1697 PetscInt i,*owners = A->rmap.range; 1698 PetscInt *nprocs,j,idx,nsends,row; 1699 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1700 PetscInt *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1; 1701 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap.rstart; 1702 MPI_Comm comm = A->comm; 1703 MPI_Request *send_waits,*recv_waits; 1704 MPI_Status recv_status,*send_status; 1705 #if defined(PETSC_DEBUG) 1706 PetscTruth found = PETSC_FALSE; 1707 #endif 1708 1709 PetscFunctionBegin; 1710 /* first count number of contributors to each processor */ 1711 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1712 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1713 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1714 j = 0; 1715 for (i=0; i<N; i++) { 1716 if (lastidx > (idx = rows[i])) j = 0; 1717 lastidx = idx; 1718 for (; j<size; j++) { 1719 if (idx >= owners[j] && idx < owners[j+1]) { 1720 nprocs[2*j]++; 1721 nprocs[2*j+1] = 1; 1722 owner[i] = j; 1723 #if defined(PETSC_DEBUG) 1724 found = PETSC_TRUE; 1725 #endif 1726 break; 1727 } 1728 } 1729 #if defined(PETSC_DEBUG) 1730 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1731 found = PETSC_FALSE; 1732 #endif 1733 } 1734 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1735 1736 /* inform other processors of number of messages and max length*/ 1737 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1738 1739 /* post receives: */ 1740 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1741 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1742 for (i=0; i<nrecvs; i++) { 1743 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1744 } 1745 1746 /* do sends: 1747 1) starts[i] gives the starting index in svalues for stuff going to 1748 the ith processor 1749 */ 1750 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1751 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1752 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1753 starts[0] = 0; 1754 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1755 for (i=0; i<N; i++) { 1756 svalues[starts[owner[i]]++] = rows[i]; 1757 } 1758 1759 starts[0] = 0; 1760 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1761 count = 0; 1762 for (i=0; i<size; i++) { 1763 if (nprocs[2*i+1]) { 1764 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1765 } 1766 } 1767 ierr = PetscFree(starts);CHKERRQ(ierr); 1768 1769 base = owners[rank]; 1770 1771 /* wait on receives */ 1772 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 1773 source = lens + nrecvs; 1774 count = nrecvs; slen = 0; 1775 while (count) { 1776 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1777 /* unpack receives into our local space */ 1778 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1779 source[imdex] = recv_status.MPI_SOURCE; 1780 lens[imdex] = n; 1781 slen += n; 1782 count--; 1783 } 1784 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1785 1786 /* move the data into the send scatter */ 1787 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1788 count = 0; 1789 for (i=0; i<nrecvs; i++) { 1790 values = rvalues + i*nmax; 1791 for (j=0; j<lens[i]; j++) { 1792 lrows[count++] = values[j] - base; 1793 } 1794 } 1795 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1796 ierr = PetscFree(lens);CHKERRQ(ierr); 1797 ierr = PetscFree(owner);CHKERRQ(ierr); 1798 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1799 1800 /* actually zap the local rows */ 1801 /* 1802 Zero the required rows. If the "diagonal block" of the matrix 1803 is square and the user wishes to set the diagonal we use separate 1804 code so that MatSetValues() is not called for each diagonal allocating 1805 new memory, thus calling lots of mallocs and slowing things down. 1806 1807 Contributed by: Matthew Knepley 1808 */ 1809 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1810 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1811 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 1812 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1813 } else if (diag != 0.0) { 1814 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1815 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1816 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1817 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1818 } 1819 for (i=0; i<slen; i++) { 1820 row = lrows[i] + rstart_bs; 1821 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1822 } 1823 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1824 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1825 } else { 1826 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1827 } 1828 1829 ierr = PetscFree(lrows);CHKERRQ(ierr); 1830 1831 /* wait on sends */ 1832 if (nsends) { 1833 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1834 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1835 ierr = PetscFree(send_status);CHKERRQ(ierr); 1836 } 1837 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1838 ierr = PetscFree(svalues);CHKERRQ(ierr); 1839 1840 PetscFunctionReturn(0); 1841 } 1842 1843 #undef __FUNCT__ 1844 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1845 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1846 { 1847 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1848 PetscErrorCode ierr; 1849 1850 PetscFunctionBegin; 1851 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1852 PetscFunctionReturn(0); 1853 } 1854 1855 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1856 1857 #undef __FUNCT__ 1858 #define __FUNCT__ "MatEqual_MPIBAIJ" 1859 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1860 { 1861 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1862 Mat a,b,c,d; 1863 PetscTruth flg; 1864 PetscErrorCode ierr; 1865 1866 PetscFunctionBegin; 1867 a = matA->A; b = matA->B; 1868 c = matB->A; d = matB->B; 1869 1870 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1871 if (flg) { 1872 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1873 } 1874 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1875 PetscFunctionReturn(0); 1876 } 1877 1878 #undef __FUNCT__ 1879 #define __FUNCT__ "MatCopy_MPIBAIJ" 1880 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1881 { 1882 PetscErrorCode ierr; 1883 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1884 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1885 1886 PetscFunctionBegin; 1887 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1888 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1889 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1890 } else { 1891 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1892 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1893 } 1894 PetscFunctionReturn(0); 1895 } 1896 1897 #undef __FUNCT__ 1898 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1899 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1900 { 1901 PetscErrorCode ierr; 1902 1903 PetscFunctionBegin; 1904 ierr = MatMPIBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1905 PetscFunctionReturn(0); 1906 } 1907 1908 #include "petscblaslapack.h" 1909 #undef __FUNCT__ 1910 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1911 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1912 { 1913 PetscErrorCode ierr; 1914 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1915 PetscBLASInt bnz,one=1; 1916 Mat_SeqBAIJ *x,*y; 1917 1918 PetscFunctionBegin; 1919 if (str == SAME_NONZERO_PATTERN) { 1920 PetscScalar alpha = a; 1921 x = (Mat_SeqBAIJ *)xx->A->data; 1922 y = (Mat_SeqBAIJ *)yy->A->data; 1923 bnz = (PetscBLASInt)x->nz; 1924 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1925 x = (Mat_SeqBAIJ *)xx->B->data; 1926 y = (Mat_SeqBAIJ *)yy->B->data; 1927 bnz = (PetscBLASInt)x->nz; 1928 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1929 } else { 1930 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1931 } 1932 PetscFunctionReturn(0); 1933 } 1934 1935 #undef __FUNCT__ 1936 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1937 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1938 { 1939 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1940 PetscErrorCode ierr; 1941 1942 PetscFunctionBegin; 1943 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1944 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1945 PetscFunctionReturn(0); 1946 } 1947 1948 #undef __FUNCT__ 1949 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1950 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1951 { 1952 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1953 PetscErrorCode ierr; 1954 1955 PetscFunctionBegin; 1956 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1957 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1958 PetscFunctionReturn(0); 1959 } 1960 1961 /* -------------------------------------------------------------------*/ 1962 static struct _MatOps MatOps_Values = { 1963 MatSetValues_MPIBAIJ, 1964 MatGetRow_MPIBAIJ, 1965 MatRestoreRow_MPIBAIJ, 1966 MatMult_MPIBAIJ, 1967 /* 4*/ MatMultAdd_MPIBAIJ, 1968 MatMultTranspose_MPIBAIJ, 1969 MatMultTransposeAdd_MPIBAIJ, 1970 0, 1971 0, 1972 0, 1973 /*10*/ 0, 1974 0, 1975 0, 1976 0, 1977 MatTranspose_MPIBAIJ, 1978 /*15*/ MatGetInfo_MPIBAIJ, 1979 MatEqual_MPIBAIJ, 1980 MatGetDiagonal_MPIBAIJ, 1981 MatDiagonalScale_MPIBAIJ, 1982 MatNorm_MPIBAIJ, 1983 /*20*/ MatAssemblyBegin_MPIBAIJ, 1984 MatAssemblyEnd_MPIBAIJ, 1985 0, 1986 MatSetOption_MPIBAIJ, 1987 MatZeroEntries_MPIBAIJ, 1988 /*25*/ MatZeroRows_MPIBAIJ, 1989 0, 1990 0, 1991 0, 1992 0, 1993 /*30*/ MatSetUpPreallocation_MPIBAIJ, 1994 0, 1995 0, 1996 0, 1997 0, 1998 /*35*/ MatDuplicate_MPIBAIJ, 1999 0, 2000 0, 2001 0, 2002 0, 2003 /*40*/ MatAXPY_MPIBAIJ, 2004 MatGetSubMatrices_MPIBAIJ, 2005 MatIncreaseOverlap_MPIBAIJ, 2006 MatGetValues_MPIBAIJ, 2007 MatCopy_MPIBAIJ, 2008 /*45*/ 0, 2009 MatScale_MPIBAIJ, 2010 0, 2011 0, 2012 0, 2013 /*50*/ 0, 2014 0, 2015 0, 2016 0, 2017 0, 2018 /*55*/ 0, 2019 0, 2020 MatSetUnfactored_MPIBAIJ, 2021 0, 2022 MatSetValuesBlocked_MPIBAIJ, 2023 /*60*/ 0, 2024 MatDestroy_MPIBAIJ, 2025 MatView_MPIBAIJ, 2026 0, 2027 0, 2028 /*65*/ 0, 2029 0, 2030 0, 2031 0, 2032 0, 2033 /*70*/ MatGetRowMaxAbs_MPIBAIJ, 2034 0, 2035 0, 2036 0, 2037 0, 2038 /*75*/ 0, 2039 0, 2040 0, 2041 0, 2042 0, 2043 /*80*/ 0, 2044 0, 2045 0, 2046 0, 2047 MatLoad_MPIBAIJ, 2048 /*85*/ 0, 2049 0, 2050 0, 2051 0, 2052 0, 2053 /*90*/ 0, 2054 0, 2055 0, 2056 0, 2057 0, 2058 /*95*/ 0, 2059 0, 2060 0, 2061 0, 2062 0, 2063 /*100*/0, 2064 0, 2065 0, 2066 0, 2067 0, 2068 /*105*/0, 2069 MatRealPart_MPIBAIJ, 2070 MatImaginaryPart_MPIBAIJ}; 2071 2072 2073 EXTERN_C_BEGIN 2074 #undef __FUNCT__ 2075 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2076 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2077 { 2078 PetscFunctionBegin; 2079 *a = ((Mat_MPIBAIJ *)A->data)->A; 2080 *iscopy = PETSC_FALSE; 2081 PetscFunctionReturn(0); 2082 } 2083 EXTERN_C_END 2084 2085 EXTERN_C_BEGIN 2086 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2087 EXTERN_C_END 2088 2089 #undef __FUNCT__ 2090 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2091 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 2092 { 2093 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)B->data; 2094 PetscInt m = B->rmap.n/bs,cstart = baij->cstartbs, cend = baij->cendbs,j,nnz,i,d; 2095 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = baij->rstartbs,ii; 2096 const PetscInt *JJ; 2097 PetscScalar *values; 2098 PetscErrorCode ierr; 2099 2100 PetscFunctionBegin; 2101 #if defined(PETSC_OPT_g) 2102 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"Ii[0] must be 0 it is %D",Ii[0]); 2103 #endif 2104 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2105 o_nnz = d_nnz + m; 2106 2107 for (i=0; i<m; i++) { 2108 nnz = Ii[i+1]- Ii[i]; 2109 JJ = J + Ii[i]; 2110 nnz_max = PetscMax(nnz_max,nnz); 2111 #if defined(PETSC_OPT_g) 2112 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2113 #endif 2114 for (j=0; j<nnz; j++) { 2115 if (*JJ >= cstart) break; 2116 JJ++; 2117 } 2118 d = 0; 2119 for (; j<nnz; j++) { 2120 if (*JJ++ >= cend) break; 2121 d++; 2122 } 2123 d_nnz[i] = d; 2124 o_nnz[i] = nnz - d; 2125 } 2126 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2127 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2128 2129 if (v) values = (PetscScalar*)v; 2130 else { 2131 ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2132 ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2133 } 2134 2135 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2136 for (i=0; i<m; i++) { 2137 ii = i + rstart; 2138 nnz = Ii[i+1]- Ii[i]; 2139 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2140 } 2141 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2142 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2143 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2144 2145 if (!v) { 2146 ierr = PetscFree(values);CHKERRQ(ierr); 2147 } 2148 PetscFunctionReturn(0); 2149 } 2150 2151 #undef __FUNCT__ 2152 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2153 /*@C 2154 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2155 (the default parallel PETSc format). 2156 2157 Collective on MPI_Comm 2158 2159 Input Parameters: 2160 + A - the matrix 2161 . i - the indices into j for the start of each local row (starts with zero) 2162 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2163 - v - optional values in the matrix 2164 2165 Level: developer 2166 2167 .keywords: matrix, aij, compressed row, sparse, parallel 2168 2169 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2170 @*/ 2171 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2172 { 2173 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2174 2175 PetscFunctionBegin; 2176 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2177 if (f) { 2178 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2179 } 2180 PetscFunctionReturn(0); 2181 } 2182 2183 EXTERN_C_BEGIN 2184 #undef __FUNCT__ 2185 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2186 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2187 { 2188 Mat_MPIBAIJ *b; 2189 PetscErrorCode ierr; 2190 PetscInt i; 2191 2192 PetscFunctionBegin; 2193 B->preallocated = PETSC_TRUE; 2194 ierr = PetscOptionsBegin(B->comm,B->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2195 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2196 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2197 2198 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2199 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2200 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2201 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2202 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2203 2204 B->rmap.bs = bs; 2205 B->cmap.bs = bs; 2206 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2207 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2208 2209 if (d_nnz) { 2210 for (i=0; i<B->rmap.n/bs; i++) { 2211 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2212 } 2213 } 2214 if (o_nnz) { 2215 for (i=0; i<B->rmap.n/bs; i++) { 2216 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2217 } 2218 } 2219 2220 b = (Mat_MPIBAIJ*)B->data; 2221 b->bs2 = bs*bs; 2222 b->mbs = B->rmap.n/bs; 2223 b->nbs = B->cmap.n/bs; 2224 b->Mbs = B->rmap.N/bs; 2225 b->Nbs = B->cmap.N/bs; 2226 2227 for (i=0; i<=b->size; i++) { 2228 b->rangebs[i] = B->rmap.range[i]/bs; 2229 } 2230 b->rstartbs = B->rmap.rstart/bs; 2231 b->rendbs = B->rmap.rend/bs; 2232 b->cstartbs = B->cmap.rstart/bs; 2233 b->cendbs = B->cmap.rend/bs; 2234 2235 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2236 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 2237 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2238 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2239 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2240 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2241 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 2242 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2243 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2244 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2245 2246 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2247 2248 PetscFunctionReturn(0); 2249 } 2250 EXTERN_C_END 2251 2252 EXTERN_C_BEGIN 2253 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2254 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2255 EXTERN_C_END 2256 2257 /*MC 2258 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2259 2260 Options Database Keys: 2261 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2262 . -mat_block_size <bs> - set the blocksize used to store the matrix 2263 - -mat_use_hash_table <fact> 2264 2265 Level: beginner 2266 2267 .seealso: MatCreateMPIBAIJ 2268 M*/ 2269 2270 EXTERN_C_BEGIN 2271 #undef __FUNCT__ 2272 #define __FUNCT__ "MatCreate_MPIBAIJ" 2273 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2274 { 2275 Mat_MPIBAIJ *b; 2276 PetscErrorCode ierr; 2277 PetscTruth flg; 2278 2279 PetscFunctionBegin; 2280 ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2281 B->data = (void*)b; 2282 2283 2284 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2285 B->mapping = 0; 2286 B->factor = 0; 2287 B->assembled = PETSC_FALSE; 2288 2289 B->insertmode = NOT_SET_VALUES; 2290 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2291 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2292 2293 /* build local table of row and column ownerships */ 2294 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 2295 2296 /* build cache for off array entries formed */ 2297 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2298 b->donotstash = PETSC_FALSE; 2299 b->colmap = PETSC_NULL; 2300 b->garray = PETSC_NULL; 2301 b->roworiented = PETSC_TRUE; 2302 2303 #if defined(PETSC_USE_MAT_SINGLE) 2304 /* stuff for MatSetValues_XXX in single precision */ 2305 b->setvalueslen = 0; 2306 b->setvaluescopy = PETSC_NULL; 2307 #endif 2308 2309 /* stuff used in block assembly */ 2310 b->barray = 0; 2311 2312 /* stuff used for matrix vector multiply */ 2313 b->lvec = 0; 2314 b->Mvctx = 0; 2315 2316 /* stuff for MatGetRow() */ 2317 b->rowindices = 0; 2318 b->rowvalues = 0; 2319 b->getrowactive = PETSC_FALSE; 2320 2321 /* hash table stuff */ 2322 b->ht = 0; 2323 b->hd = 0; 2324 b->ht_size = 0; 2325 b->ht_flag = PETSC_FALSE; 2326 b->ht_fact = 0; 2327 b->ht_total_ct = 0; 2328 b->ht_insert_ct = 0; 2329 2330 ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 2331 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2332 if (flg) { 2333 PetscReal fact = 1.39; 2334 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2335 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 2336 if (fact <= 1.0) fact = 1.39; 2337 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2338 ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2339 } 2340 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2341 2342 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2343 "MatStoreValues_MPIBAIJ", 2344 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2345 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2346 "MatRetrieveValues_MPIBAIJ", 2347 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2348 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2349 "MatGetDiagonalBlock_MPIBAIJ", 2350 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2351 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2352 "MatMPIBAIJSetPreallocation_MPIBAIJ", 2353 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2354 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 2355 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 2356 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 2357 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 2358 "MatDiagonalScaleLocal_MPIBAIJ", 2359 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2360 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 2361 "MatSetHashTableFactor_MPIBAIJ", 2362 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2363 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 2364 PetscFunctionReturn(0); 2365 } 2366 EXTERN_C_END 2367 2368 /*MC 2369 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2370 2371 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2372 and MATMPIBAIJ otherwise. 2373 2374 Options Database Keys: 2375 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2376 2377 Level: beginner 2378 2379 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2380 M*/ 2381 2382 EXTERN_C_BEGIN 2383 #undef __FUNCT__ 2384 #define __FUNCT__ "MatCreate_BAIJ" 2385 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 2386 { 2387 PetscErrorCode ierr; 2388 PetscMPIInt size; 2389 2390 PetscFunctionBegin; 2391 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2392 if (size == 1) { 2393 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2394 } else { 2395 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2396 } 2397 PetscFunctionReturn(0); 2398 } 2399 EXTERN_C_END 2400 2401 #undef __FUNCT__ 2402 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2403 /*@C 2404 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2405 (block compressed row). For good matrix assembly performance 2406 the user should preallocate the matrix storage by setting the parameters 2407 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2408 performance can be increased by more than a factor of 50. 2409 2410 Collective on Mat 2411 2412 Input Parameters: 2413 + A - the matrix 2414 . bs - size of blockk 2415 . d_nz - number of block nonzeros per block row in diagonal portion of local 2416 submatrix (same for all local rows) 2417 . d_nnz - array containing the number of block nonzeros in the various block rows 2418 of the in diagonal portion of the local (possibly different for each block 2419 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2420 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2421 submatrix (same for all local rows). 2422 - o_nnz - array containing the number of nonzeros in the various block rows of the 2423 off-diagonal portion of the local submatrix (possibly different for 2424 each block row) or PETSC_NULL. 2425 2426 If the *_nnz parameter is given then the *_nz parameter is ignored 2427 2428 Options Database Keys: 2429 + -mat_block_size - size of the blocks to use 2430 - -mat_use_hash_table <fact> 2431 2432 Notes: 2433 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2434 than it must be used on all processors that share the object for that argument. 2435 2436 Storage Information: 2437 For a square global matrix we define each processor's diagonal portion 2438 to be its local rows and the corresponding columns (a square submatrix); 2439 each processor's off-diagonal portion encompasses the remainder of the 2440 local matrix (a rectangular submatrix). 2441 2442 The user can specify preallocated storage for the diagonal part of 2443 the local submatrix with either d_nz or d_nnz (not both). Set 2444 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2445 memory allocation. Likewise, specify preallocated storage for the 2446 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2447 2448 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2449 the figure below we depict these three local rows and all columns (0-11). 2450 2451 .vb 2452 0 1 2 3 4 5 6 7 8 9 10 11 2453 ------------------- 2454 row 3 | o o o d d d o o o o o o 2455 row 4 | o o o d d d o o o o o o 2456 row 5 | o o o d d d o o o o o o 2457 ------------------- 2458 .ve 2459 2460 Thus, any entries in the d locations are stored in the d (diagonal) 2461 submatrix, and any entries in the o locations are stored in the 2462 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2463 stored simply in the MATSEQBAIJ format for compressed row storage. 2464 2465 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2466 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2467 In general, for PDE problems in which most nonzeros are near the diagonal, 2468 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2469 or you will get TERRIBLE performance; see the users' manual chapter on 2470 matrices. 2471 2472 Level: intermediate 2473 2474 .keywords: matrix, block, aij, compressed row, sparse, parallel 2475 2476 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 2477 @*/ 2478 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2479 { 2480 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2481 2482 PetscFunctionBegin; 2483 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2484 if (f) { 2485 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2486 } 2487 PetscFunctionReturn(0); 2488 } 2489 2490 #undef __FUNCT__ 2491 #define __FUNCT__ "MatCreateMPIBAIJ" 2492 /*@C 2493 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2494 (block compressed row). For good matrix assembly performance 2495 the user should preallocate the matrix storage by setting the parameters 2496 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2497 performance can be increased by more than a factor of 50. 2498 2499 Collective on MPI_Comm 2500 2501 Input Parameters: 2502 + comm - MPI communicator 2503 . bs - size of blockk 2504 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2505 This value should be the same as the local size used in creating the 2506 y vector for the matrix-vector product y = Ax. 2507 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2508 This value should be the same as the local size used in creating the 2509 x vector for the matrix-vector product y = Ax. 2510 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2511 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2512 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2513 submatrix (same for all local rows) 2514 . d_nnz - array containing the number of nonzero blocks in the various block rows 2515 of the in diagonal portion of the local (possibly different for each block 2516 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2517 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2518 submatrix (same for all local rows). 2519 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2520 off-diagonal portion of the local submatrix (possibly different for 2521 each block row) or PETSC_NULL. 2522 2523 Output Parameter: 2524 . A - the matrix 2525 2526 Options Database Keys: 2527 + -mat_block_size - size of the blocks to use 2528 - -mat_use_hash_table <fact> 2529 2530 Notes: 2531 If the *_nnz parameter is given then the *_nz parameter is ignored 2532 2533 A nonzero block is any block that as 1 or more nonzeros in it 2534 2535 The user MUST specify either the local or global matrix dimensions 2536 (possibly both). 2537 2538 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2539 than it must be used on all processors that share the object for that argument. 2540 2541 Storage Information: 2542 For a square global matrix we define each processor's diagonal portion 2543 to be its local rows and the corresponding columns (a square submatrix); 2544 each processor's off-diagonal portion encompasses the remainder of the 2545 local matrix (a rectangular submatrix). 2546 2547 The user can specify preallocated storage for the diagonal part of 2548 the local submatrix with either d_nz or d_nnz (not both). Set 2549 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2550 memory allocation. Likewise, specify preallocated storage for the 2551 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2552 2553 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2554 the figure below we depict these three local rows and all columns (0-11). 2555 2556 .vb 2557 0 1 2 3 4 5 6 7 8 9 10 11 2558 ------------------- 2559 row 3 | o o o d d d o o o o o o 2560 row 4 | o o o d d d o o o o o o 2561 row 5 | o o o d d d o o o o o o 2562 ------------------- 2563 .ve 2564 2565 Thus, any entries in the d locations are stored in the d (diagonal) 2566 submatrix, and any entries in the o locations are stored in the 2567 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2568 stored simply in the MATSEQBAIJ format for compressed row storage. 2569 2570 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2571 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2572 In general, for PDE problems in which most nonzeros are near the diagonal, 2573 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2574 or you will get TERRIBLE performance; see the users' manual chapter on 2575 matrices. 2576 2577 Level: intermediate 2578 2579 .keywords: matrix, block, aij, compressed row, sparse, parallel 2580 2581 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2582 @*/ 2583 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2584 { 2585 PetscErrorCode ierr; 2586 PetscMPIInt size; 2587 2588 PetscFunctionBegin; 2589 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2590 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2591 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2592 if (size > 1) { 2593 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2594 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2595 } else { 2596 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2597 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2598 } 2599 PetscFunctionReturn(0); 2600 } 2601 2602 #undef __FUNCT__ 2603 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 2604 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2605 { 2606 Mat mat; 2607 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2608 PetscErrorCode ierr; 2609 PetscInt len=0; 2610 2611 PetscFunctionBegin; 2612 *newmat = 0; 2613 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2614 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2615 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2616 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2617 2618 mat->factor = matin->factor; 2619 mat->preallocated = PETSC_TRUE; 2620 mat->assembled = PETSC_TRUE; 2621 mat->insertmode = NOT_SET_VALUES; 2622 2623 a = (Mat_MPIBAIJ*)mat->data; 2624 mat->rmap.bs = matin->rmap.bs; 2625 a->bs2 = oldmat->bs2; 2626 a->mbs = oldmat->mbs; 2627 a->nbs = oldmat->nbs; 2628 a->Mbs = oldmat->Mbs; 2629 a->Nbs = oldmat->Nbs; 2630 2631 ierr = PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2632 ierr = PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2633 2634 a->size = oldmat->size; 2635 a->rank = oldmat->rank; 2636 a->donotstash = oldmat->donotstash; 2637 a->roworiented = oldmat->roworiented; 2638 a->rowindices = 0; 2639 a->rowvalues = 0; 2640 a->getrowactive = PETSC_FALSE; 2641 a->barray = 0; 2642 a->rstartbs = oldmat->rstartbs; 2643 a->rendbs = oldmat->rendbs; 2644 a->cstartbs = oldmat->cstartbs; 2645 a->cendbs = oldmat->cendbs; 2646 2647 /* hash table stuff */ 2648 a->ht = 0; 2649 a->hd = 0; 2650 a->ht_size = 0; 2651 a->ht_flag = oldmat->ht_flag; 2652 a->ht_fact = oldmat->ht_fact; 2653 a->ht_total_ct = 0; 2654 a->ht_insert_ct = 0; 2655 2656 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 2657 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2658 ierr = MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr); 2659 if (oldmat->colmap) { 2660 #if defined (PETSC_USE_CTABLE) 2661 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2662 #else 2663 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2664 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2665 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2666 #endif 2667 } else a->colmap = 0; 2668 2669 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2670 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2671 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2672 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2673 } else a->garray = 0; 2674 2675 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2676 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2677 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2678 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2679 2680 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2681 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2682 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2683 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2684 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2685 *newmat = mat; 2686 2687 PetscFunctionReturn(0); 2688 } 2689 2690 #include "petscsys.h" 2691 2692 #undef __FUNCT__ 2693 #define __FUNCT__ "MatLoad_MPIBAIJ" 2694 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2695 { 2696 Mat A; 2697 PetscErrorCode ierr; 2698 int fd; 2699 PetscInt i,nz,j,rstart,rend; 2700 PetscScalar *vals,*buf; 2701 MPI_Comm comm = ((PetscObject)viewer)->comm; 2702 MPI_Status status; 2703 PetscMPIInt rank,size,maxnz; 2704 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 2705 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 2706 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 2707 PetscMPIInt tag = ((PetscObject)viewer)->tag; 2708 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 2709 PetscInt dcount,kmax,k,nzcount,tmp,mend; 2710 2711 PetscFunctionBegin; 2712 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 2713 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2714 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2715 2716 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2717 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2718 if (!rank) { 2719 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2720 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2721 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2722 } 2723 2724 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2725 M = header[1]; N = header[2]; 2726 2727 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2728 2729 /* 2730 This code adds extra rows to make sure the number of rows is 2731 divisible by the blocksize 2732 */ 2733 Mbs = M/bs; 2734 extra_rows = bs - M + bs*Mbs; 2735 if (extra_rows == bs) extra_rows = 0; 2736 else Mbs++; 2737 if (extra_rows && !rank) { 2738 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2739 } 2740 2741 /* determine ownership of all rows */ 2742 mbs = Mbs/size + ((Mbs % size) > rank); 2743 m = mbs*bs; 2744 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 2745 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2746 2747 /* process 0 needs enough room for process with most rows */ 2748 if (!rank) { 2749 mmax = rowners[1]; 2750 for (i=2; i<size; i++) { 2751 mmax = PetscMax(mmax,rowners[i]); 2752 } 2753 mmax*=bs; 2754 } else mmax = m; 2755 2756 rowners[0] = 0; 2757 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2758 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2759 rstart = rowners[rank]; 2760 rend = rowners[rank+1]; 2761 2762 /* distribute row lengths to all processors */ 2763 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 2764 if (!rank) { 2765 mend = m; 2766 if (size == 1) mend = mend - extra_rows; 2767 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 2768 for (j=mend; j<m; j++) locrowlens[j] = 1; 2769 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2770 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2771 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2772 for (j=0; j<m; j++) { 2773 procsnz[0] += locrowlens[j]; 2774 } 2775 for (i=1; i<size; i++) { 2776 mend = browners[i+1] - browners[i]; 2777 if (i == size-1) mend = mend - extra_rows; 2778 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 2779 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 2780 /* calculate the number of nonzeros on each processor */ 2781 for (j=0; j<browners[i+1]-browners[i]; j++) { 2782 procsnz[i] += rowlengths[j]; 2783 } 2784 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2785 } 2786 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2787 } else { 2788 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2789 } 2790 2791 if (!rank) { 2792 /* determine max buffer needed and allocate it */ 2793 maxnz = procsnz[0]; 2794 for (i=1; i<size; i++) { 2795 maxnz = PetscMax(maxnz,procsnz[i]); 2796 } 2797 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2798 2799 /* read in my part of the matrix column indices */ 2800 nz = procsnz[0]; 2801 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2802 mycols = ibuf; 2803 if (size == 1) nz -= extra_rows; 2804 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2805 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2806 2807 /* read in every ones (except the last) and ship off */ 2808 for (i=1; i<size-1; i++) { 2809 nz = procsnz[i]; 2810 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2811 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2812 } 2813 /* read in the stuff for the last proc */ 2814 if (size != 1) { 2815 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2816 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2817 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2818 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2819 } 2820 ierr = PetscFree(cols);CHKERRQ(ierr); 2821 } else { 2822 /* determine buffer space needed for message */ 2823 nz = 0; 2824 for (i=0; i<m; i++) { 2825 nz += locrowlens[i]; 2826 } 2827 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2828 mycols = ibuf; 2829 /* receive message of column indices*/ 2830 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2831 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2832 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2833 } 2834 2835 /* loop over local rows, determining number of off diagonal entries */ 2836 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2837 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2838 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2839 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2840 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2841 rowcount = 0; nzcount = 0; 2842 for (i=0; i<mbs; i++) { 2843 dcount = 0; 2844 odcount = 0; 2845 for (j=0; j<bs; j++) { 2846 kmax = locrowlens[rowcount]; 2847 for (k=0; k<kmax; k++) { 2848 tmp = mycols[nzcount++]/bs; 2849 if (!mask[tmp]) { 2850 mask[tmp] = 1; 2851 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2852 else masked1[dcount++] = tmp; 2853 } 2854 } 2855 rowcount++; 2856 } 2857 2858 dlens[i] = dcount; 2859 odlens[i] = odcount; 2860 2861 /* zero out the mask elements we set */ 2862 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2863 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2864 } 2865 2866 /* create our matrix */ 2867 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2868 ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2869 ierr = MatSetType(A,type);CHKERRQ(ierr) 2870 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2871 2872 /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */ 2873 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2874 2875 if (!rank) { 2876 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2877 /* read in my part of the matrix numerical values */ 2878 nz = procsnz[0]; 2879 vals = buf; 2880 mycols = ibuf; 2881 if (size == 1) nz -= extra_rows; 2882 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2883 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2884 2885 /* insert into matrix */ 2886 jj = rstart*bs; 2887 for (i=0; i<m; i++) { 2888 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2889 mycols += locrowlens[i]; 2890 vals += locrowlens[i]; 2891 jj++; 2892 } 2893 /* read in other processors (except the last one) and ship out */ 2894 for (i=1; i<size-1; i++) { 2895 nz = procsnz[i]; 2896 vals = buf; 2897 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2898 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2899 } 2900 /* the last proc */ 2901 if (size != 1){ 2902 nz = procsnz[i] - extra_rows; 2903 vals = buf; 2904 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2905 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2906 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2907 } 2908 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2909 } else { 2910 /* receive numeric values */ 2911 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2912 2913 /* receive message of values*/ 2914 vals = buf; 2915 mycols = ibuf; 2916 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2917 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2918 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2919 2920 /* insert into matrix */ 2921 jj = rstart*bs; 2922 for (i=0; i<m; i++) { 2923 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2924 mycols += locrowlens[i]; 2925 vals += locrowlens[i]; 2926 jj++; 2927 } 2928 } 2929 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2930 ierr = PetscFree(buf);CHKERRQ(ierr); 2931 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2932 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2933 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2934 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2935 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2936 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2937 2938 *newmat = A; 2939 PetscFunctionReturn(0); 2940 } 2941 2942 #undef __FUNCT__ 2943 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2944 /*@ 2945 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2946 2947 Input Parameters: 2948 . mat - the matrix 2949 . fact - factor 2950 2951 Collective on Mat 2952 2953 Level: advanced 2954 2955 Notes: 2956 This can also be set by the command line option: -mat_use_hash_table <fact> 2957 2958 .keywords: matrix, hashtable, factor, HT 2959 2960 .seealso: MatSetOption() 2961 @*/ 2962 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2963 { 2964 PetscErrorCode ierr,(*f)(Mat,PetscReal); 2965 2966 PetscFunctionBegin; 2967 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 2968 if (f) { 2969 ierr = (*f)(mat,fact);CHKERRQ(ierr); 2970 } 2971 PetscFunctionReturn(0); 2972 } 2973 2974 EXTERN_C_BEGIN 2975 #undef __FUNCT__ 2976 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 2977 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 2978 { 2979 Mat_MPIBAIJ *baij; 2980 2981 PetscFunctionBegin; 2982 baij = (Mat_MPIBAIJ*)mat->data; 2983 baij->ht_fact = fact; 2984 PetscFunctionReturn(0); 2985 } 2986 EXTERN_C_END 2987 2988 #undef __FUNCT__ 2989 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2990 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2991 { 2992 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2993 PetscFunctionBegin; 2994 *Ad = a->A; 2995 *Ao = a->B; 2996 *colmap = a->garray; 2997 PetscFunctionReturn(0); 2998 } 2999