1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 15 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 18 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 19 { 20 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt i,*idxb = 0; 23 PetscScalar *va,*vb; 24 Vec vtmp; 25 26 PetscFunctionBegin; 27 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 28 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 29 if (idx) { 30 for (i=0; i<A->cmap.n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap.rstart;} 31 } 32 33 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap.n,&vtmp);CHKERRQ(ierr); 34 if (idx) {ierr = PetscMalloc(A->rmap.n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 35 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 36 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 37 38 for (i=0; i<A->rmap.n; i++){ 39 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap.bs*a->garray[idxb[i]/A->cmap.bs] + (idxb[i] % A->cmap.bs);} 40 } 41 42 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 43 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 44 if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);} 45 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 46 PetscFunctionReturn(0); 47 } 48 49 EXTERN_C_BEGIN 50 #undef __FUNCT__ 51 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 52 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 53 { 54 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 55 PetscErrorCode ierr; 56 57 PetscFunctionBegin; 58 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 59 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 60 PetscFunctionReturn(0); 61 } 62 EXTERN_C_END 63 64 EXTERN_C_BEGIN 65 #undef __FUNCT__ 66 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 67 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 68 { 69 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 70 PetscErrorCode ierr; 71 72 PetscFunctionBegin; 73 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 74 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 75 PetscFunctionReturn(0); 76 } 77 EXTERN_C_END 78 79 /* 80 Local utility routine that creates a mapping from the global column 81 number to the local number in the off-diagonal part of the local 82 storage of the matrix. This is done in a non scable way since the 83 length of colmap equals the global matrix length. 84 */ 85 #undef __FUNCT__ 86 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 87 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 88 { 89 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 90 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 91 PetscErrorCode ierr; 92 PetscInt nbs = B->nbs,i,bs=mat->rmap.bs; 93 94 PetscFunctionBegin; 95 #if defined (PETSC_USE_CTABLE) 96 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 97 for (i=0; i<nbs; i++){ 98 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 99 } 100 #else 101 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 102 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 103 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 105 #endif 106 PetscFunctionReturn(0); 107 } 108 109 #define CHUNKSIZE 10 110 111 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 112 { \ 113 \ 114 brow = row/bs; \ 115 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 116 rmax = aimax[brow]; nrow = ailen[brow]; \ 117 bcol = col/bs; \ 118 ridx = row % bs; cidx = col % bs; \ 119 low = 0; high = nrow; \ 120 while (high-low > 3) { \ 121 t = (low+high)/2; \ 122 if (rp[t] > bcol) high = t; \ 123 else low = t; \ 124 } \ 125 for (_i=low; _i<high; _i++) { \ 126 if (rp[_i] > bcol) break; \ 127 if (rp[_i] == bcol) { \ 128 bap = ap + bs2*_i + bs*cidx + ridx; \ 129 if (addv == ADD_VALUES) *bap += value; \ 130 else *bap = value; \ 131 goto a_noinsert; \ 132 } \ 133 } \ 134 if (a->nonew == 1) goto a_noinsert; \ 135 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 136 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 137 N = nrow++ - 1; \ 138 /* shift up all the later entries in this row */ \ 139 for (ii=N; ii>=_i; ii--) { \ 140 rp[ii+1] = rp[ii]; \ 141 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 142 } \ 143 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 144 rp[_i] = bcol; \ 145 ap[bs2*_i + bs*cidx + ridx] = value; \ 146 a_noinsert:; \ 147 ailen[brow] = nrow; \ 148 } 149 150 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 151 { \ 152 brow = row/bs; \ 153 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 154 rmax = bimax[brow]; nrow = bilen[brow]; \ 155 bcol = col/bs; \ 156 ridx = row % bs; cidx = col % bs; \ 157 low = 0; high = nrow; \ 158 while (high-low > 3) { \ 159 t = (low+high)/2; \ 160 if (rp[t] > bcol) high = t; \ 161 else low = t; \ 162 } \ 163 for (_i=low; _i<high; _i++) { \ 164 if (rp[_i] > bcol) break; \ 165 if (rp[_i] == bcol) { \ 166 bap = ap + bs2*_i + bs*cidx + ridx; \ 167 if (addv == ADD_VALUES) *bap += value; \ 168 else *bap = value; \ 169 goto b_noinsert; \ 170 } \ 171 } \ 172 if (b->nonew == 1) goto b_noinsert; \ 173 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 174 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 175 CHKMEMQ;\ 176 N = nrow++ - 1; \ 177 /* shift up all the later entries in this row */ \ 178 for (ii=N; ii>=_i; ii--) { \ 179 rp[ii+1] = rp[ii]; \ 180 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 181 } \ 182 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 183 rp[_i] = bcol; \ 184 ap[bs2*_i + bs*cidx + ridx] = value; \ 185 b_noinsert:; \ 186 bilen[brow] = nrow; \ 187 } 188 189 #undef __FUNCT__ 190 #define __FUNCT__ "MatSetValues_MPIBAIJ" 191 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 192 { 193 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 194 MatScalar value; 195 PetscTruth roworiented = baij->roworiented; 196 PetscErrorCode ierr; 197 PetscInt i,j,row,col; 198 PetscInt rstart_orig=mat->rmap.rstart; 199 PetscInt rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart; 200 PetscInt cend_orig=mat->cmap.rend,bs=mat->rmap.bs; 201 202 /* Some Variables required in the macro */ 203 Mat A = baij->A; 204 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 205 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 206 MatScalar *aa=a->a; 207 208 Mat B = baij->B; 209 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 210 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 211 MatScalar *ba=b->a; 212 213 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 214 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 215 MatScalar *ap,*bap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_DEBUG) 221 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 222 #endif 223 if (im[i] >= rstart_orig && im[i] < rend_orig) { 224 row = im[i] - rstart_orig; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart_orig && in[j] < cend_orig){ 227 col = in[j] - cstart_orig; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 230 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 231 } else if (in[j] < 0) continue; 232 #if defined(PETSC_USE_DEBUG) 233 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap.N-1);} 234 #endif 235 else { 236 if (mat->was_assembled) { 237 if (!baij->colmap) { 238 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 239 } 240 #if defined (PETSC_USE_CTABLE) 241 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 242 col = col - 1; 243 #else 244 col = baij->colmap[in[j]/bs] - 1; 245 #endif 246 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 247 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 248 col = in[j]; 249 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 250 B = baij->B; 251 b = (Mat_SeqBAIJ*)(B)->data; 252 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 253 ba=b->a; 254 } else col += in[j]%bs; 255 } else col = in[j]; 256 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 257 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 258 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 259 } 260 } 261 } else { 262 if (!baij->donotstash) { 263 if (roworiented) { 264 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 265 } else { 266 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 267 } 268 } 269 } 270 } 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 276 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 277 { 278 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 279 const PetscScalar *value; 280 MatScalar *barray=baij->barray; 281 PetscTruth roworiented = baij->roworiented; 282 PetscErrorCode ierr; 283 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 284 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 285 PetscInt cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2; 286 287 PetscFunctionBegin; 288 if(!barray) { 289 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 290 baij->barray = barray; 291 } 292 293 if (roworiented) { 294 stepval = (n-1)*bs; 295 } else { 296 stepval = (m-1)*bs; 297 } 298 for (i=0; i<m; i++) { 299 if (im[i] < 0) continue; 300 #if defined(PETSC_USE_DEBUG) 301 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 302 #endif 303 if (im[i] >= rstart && im[i] < rend) { 304 row = im[i] - rstart; 305 for (j=0; j<n; j++) { 306 /* If NumCol = 1 then a copy is not required */ 307 if ((roworiented) && (n == 1)) { 308 barray = (MatScalar*)v + i*bs2; 309 } else if((!roworiented) && (m == 1)) { 310 barray = (MatScalar*)v + j*bs2; 311 } else { /* Here a copy is required */ 312 if (roworiented) { 313 value = v + i*(stepval+bs)*bs + j*bs; 314 } else { 315 value = v + j*(stepval+bs)*bs + i*bs; 316 } 317 for (ii=0; ii<bs; ii++,value+=stepval) { 318 for (jj=0; jj<bs; jj++) { 319 *barray++ = *value++; 320 } 321 } 322 barray -=bs2; 323 } 324 325 if (in[j] >= cstart && in[j] < cend){ 326 col = in[j] - cstart; 327 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 328 } 329 else if (in[j] < 0) continue; 330 #if defined(PETSC_USE_DEBUG) 331 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 332 #endif 333 else { 334 if (mat->was_assembled) { 335 if (!baij->colmap) { 336 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 337 } 338 339 #if defined(PETSC_USE_DEBUG) 340 #if defined (PETSC_USE_CTABLE) 341 { PetscInt data; 342 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 343 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 344 } 345 #else 346 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 347 #endif 348 #endif 349 #if defined (PETSC_USE_CTABLE) 350 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 351 col = (col - 1)/bs; 352 #else 353 col = (baij->colmap[in[j]] - 1)/bs; 354 #endif 355 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 356 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 357 col = in[j]; 358 } 359 } 360 else col = in[j]; 361 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 362 } 363 } 364 } else { 365 if (!baij->donotstash) { 366 if (roworiented) { 367 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 368 } else { 369 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 370 } 371 } 372 } 373 } 374 PetscFunctionReturn(0); 375 } 376 377 #define HASH_KEY 0.6180339887 378 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 379 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 380 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 381 #undef __FUNCT__ 382 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 383 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 384 { 385 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 386 PetscTruth roworiented = baij->roworiented; 387 PetscErrorCode ierr; 388 PetscInt i,j,row,col; 389 PetscInt rstart_orig=mat->rmap.rstart; 390 PetscInt rend_orig=mat->rmap.rend,Nbs=baij->Nbs; 391 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap.bs,*HT=baij->ht,idx; 392 PetscReal tmp; 393 MatScalar **HD = baij->hd,value; 394 #if defined(PETSC_USE_DEBUG) 395 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 396 #endif 397 398 PetscFunctionBegin; 399 400 for (i=0; i<m; i++) { 401 #if defined(PETSC_USE_DEBUG) 402 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 403 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 404 #endif 405 row = im[i]; 406 if (row >= rstart_orig && row < rend_orig) { 407 for (j=0; j<n; j++) { 408 col = in[j]; 409 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 410 /* Look up PetscInto the Hash Table */ 411 key = (row/bs)*Nbs+(col/bs)+1; 412 h1 = HASH(size,key,tmp); 413 414 415 idx = h1; 416 #if defined(PETSC_USE_DEBUG) 417 insert_ct++; 418 total_ct++; 419 if (HT[idx] != key) { 420 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 421 if (idx == size) { 422 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 423 if (idx == h1) { 424 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 425 } 426 } 427 } 428 #else 429 if (HT[idx] != key) { 430 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 431 if (idx == size) { 432 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 433 if (idx == h1) { 434 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 435 } 436 } 437 } 438 #endif 439 /* A HASH table entry is found, so insert the values at the correct address */ 440 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 441 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 442 } 443 } else { 444 if (!baij->donotstash) { 445 if (roworiented) { 446 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 447 } else { 448 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 449 } 450 } 451 } 452 } 453 #if defined(PETSC_USE_DEBUG) 454 baij->ht_total_ct = total_ct; 455 baij->ht_insert_ct = insert_ct; 456 #endif 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 463 { 464 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 465 PetscTruth roworiented = baij->roworiented; 466 PetscErrorCode ierr; 467 PetscInt i,j,ii,jj,row,col; 468 PetscInt rstart=baij->rstartbs; 469 PetscInt rend=mat->rmap.rend,stepval,bs=mat->rmap.bs,bs2=baij->bs2,nbs2=n*bs2; 470 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 471 PetscReal tmp; 472 MatScalar **HD = baij->hd,*baij_a; 473 const PetscScalar *v_t,*value; 474 #if defined(PETSC_USE_DEBUG) 475 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 476 #endif 477 478 PetscFunctionBegin; 479 480 if (roworiented) { 481 stepval = (n-1)*bs; 482 } else { 483 stepval = (m-1)*bs; 484 } 485 for (i=0; i<m; i++) { 486 #if defined(PETSC_USE_DEBUG) 487 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 488 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 489 #endif 490 row = im[i]; 491 v_t = v + i*nbs2; 492 if (row >= rstart && row < rend) { 493 for (j=0; j<n; j++) { 494 col = in[j]; 495 496 /* Look up into the Hash Table */ 497 key = row*Nbs+col+1; 498 h1 = HASH(size,key,tmp); 499 500 idx = h1; 501 #if defined(PETSC_USE_DEBUG) 502 total_ct++; 503 insert_ct++; 504 if (HT[idx] != key) { 505 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 506 if (idx == size) { 507 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 508 if (idx == h1) { 509 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 510 } 511 } 512 } 513 #else 514 if (HT[idx] != key) { 515 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 516 if (idx == size) { 517 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 518 if (idx == h1) { 519 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 520 } 521 } 522 } 523 #endif 524 baij_a = HD[idx]; 525 if (roworiented) { 526 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 527 /* value = v + (i*(stepval+bs)+j)*bs; */ 528 value = v_t; 529 v_t += bs; 530 if (addv == ADD_VALUES) { 531 for (ii=0; ii<bs; ii++,value+=stepval) { 532 for (jj=ii; jj<bs2; jj+=bs) { 533 baij_a[jj] += *value++; 534 } 535 } 536 } else { 537 for (ii=0; ii<bs; ii++,value+=stepval) { 538 for (jj=ii; jj<bs2; jj+=bs) { 539 baij_a[jj] = *value++; 540 } 541 } 542 } 543 } else { 544 value = v + j*(stepval+bs)*bs + i*bs; 545 if (addv == ADD_VALUES) { 546 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 547 for (jj=0; jj<bs; jj++) { 548 baij_a[jj] += *value++; 549 } 550 } 551 } else { 552 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 553 for (jj=0; jj<bs; jj++) { 554 baij_a[jj] = *value++; 555 } 556 } 557 } 558 } 559 } 560 } else { 561 if (!baij->donotstash) { 562 if (roworiented) { 563 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 564 } else { 565 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 566 } 567 } 568 } 569 } 570 #if defined(PETSC_USE_DEBUG) 571 baij->ht_total_ct = total_ct; 572 baij->ht_insert_ct = insert_ct; 573 #endif 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatGetValues_MPIBAIJ" 579 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 580 { 581 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 582 PetscErrorCode ierr; 583 PetscInt bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend; 584 PetscInt bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data; 585 586 PetscFunctionBegin; 587 for (i=0; i<m; i++) { 588 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 589 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 590 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 591 row = idxm[i] - bsrstart; 592 for (j=0; j<n; j++) { 593 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 594 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 595 if (idxn[j] >= bscstart && idxn[j] < bscend){ 596 col = idxn[j] - bscstart; 597 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 598 } else { 599 if (!baij->colmap) { 600 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 601 } 602 #if defined (PETSC_USE_CTABLE) 603 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 604 data --; 605 #else 606 data = baij->colmap[idxn[j]/bs]-1; 607 #endif 608 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 609 else { 610 col = data + idxn[j]%bs; 611 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 612 } 613 } 614 } 615 } else { 616 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 617 } 618 } 619 PetscFunctionReturn(0); 620 } 621 622 #undef __FUNCT__ 623 #define __FUNCT__ "MatNorm_MPIBAIJ" 624 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 625 { 626 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 627 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 628 PetscErrorCode ierr; 629 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap.bs,nz,row,col; 630 PetscReal sum = 0.0; 631 MatScalar *v; 632 633 PetscFunctionBegin; 634 if (baij->size == 1) { 635 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 636 } else { 637 if (type == NORM_FROBENIUS) { 638 v = amat->a; 639 nz = amat->nz*bs2; 640 for (i=0; i<nz; i++) { 641 #if defined(PETSC_USE_COMPLEX) 642 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 643 #else 644 sum += (*v)*(*v); v++; 645 #endif 646 } 647 v = bmat->a; 648 nz = bmat->nz*bs2; 649 for (i=0; i<nz; i++) { 650 #if defined(PETSC_USE_COMPLEX) 651 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 652 #else 653 sum += (*v)*(*v); v++; 654 #endif 655 } 656 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 657 *nrm = sqrt(*nrm); 658 } else if (type == NORM_1) { /* max column sum */ 659 PetscReal *tmp,*tmp2; 660 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 661 ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 662 tmp2 = tmp + mat->cmap.N; 663 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 664 v = amat->a; jj = amat->j; 665 for (i=0; i<amat->nz; i++) { 666 for (j=0; j<bs; j++){ 667 col = bs*(cstart + *jj) + j; /* column index */ 668 for (row=0; row<bs; row++){ 669 tmp[col] += PetscAbsScalar(*v); v++; 670 } 671 } 672 jj++; 673 } 674 v = bmat->a; jj = bmat->j; 675 for (i=0; i<bmat->nz; i++) { 676 for (j=0; j<bs; j++){ 677 col = bs*garray[*jj] + j; 678 for (row=0; row<bs; row++){ 679 tmp[col] += PetscAbsScalar(*v); v++; 680 } 681 } 682 jj++; 683 } 684 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 685 *nrm = 0.0; 686 for (j=0; j<mat->cmap.N; j++) { 687 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 688 } 689 ierr = PetscFree(tmp);CHKERRQ(ierr); 690 } else if (type == NORM_INFINITY) { /* max row sum */ 691 PetscReal *sums; 692 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr) 693 sum = 0.0; 694 for (j=0; j<amat->mbs; j++) { 695 for (row=0; row<bs; row++) sums[row] = 0.0; 696 v = amat->a + bs2*amat->i[j]; 697 nz = amat->i[j+1]-amat->i[j]; 698 for (i=0; i<nz; i++) { 699 for (col=0; col<bs; col++){ 700 for (row=0; row<bs; row++){ 701 sums[row] += PetscAbsScalar(*v); v++; 702 } 703 } 704 } 705 v = bmat->a + bs2*bmat->i[j]; 706 nz = bmat->i[j+1]-bmat->i[j]; 707 for (i=0; i<nz; i++) { 708 for (col=0; col<bs; col++){ 709 for (row=0; row<bs; row++){ 710 sums[row] += PetscAbsScalar(*v); v++; 711 } 712 } 713 } 714 for (row=0; row<bs; row++){ 715 if (sums[row] > sum) sum = sums[row]; 716 } 717 } 718 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 719 ierr = PetscFree(sums);CHKERRQ(ierr); 720 } else { 721 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 722 } 723 } 724 PetscFunctionReturn(0); 725 } 726 727 /* 728 Creates the hash table, and sets the table 729 This table is created only once. 730 If new entried need to be added to the matrix 731 then the hash table has to be destroyed and 732 recreated. 733 */ 734 #undef __FUNCT__ 735 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 736 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 737 { 738 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 739 Mat A = baij->A,B=baij->B; 740 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 741 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 742 PetscErrorCode ierr; 743 PetscInt size,bs2=baij->bs2,rstart=baij->rstartbs; 744 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 745 PetscInt *HT,key; 746 MatScalar **HD; 747 PetscReal tmp; 748 #if defined(PETSC_USE_INFO) 749 PetscInt ct=0,max=0; 750 #endif 751 752 PetscFunctionBegin; 753 baij->ht_size=(PetscInt)(factor*nz); 754 size = baij->ht_size; 755 756 if (baij->ht) { 757 PetscFunctionReturn(0); 758 } 759 760 /* Allocate Memory for Hash Table */ 761 ierr = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 762 baij->ht = (PetscInt*)(baij->hd + size); 763 HD = baij->hd; 764 HT = baij->ht; 765 766 767 ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr); 768 769 770 /* Loop Over A */ 771 for (i=0; i<a->mbs; i++) { 772 for (j=ai[i]; j<ai[i+1]; j++) { 773 row = i+rstart; 774 col = aj[j]+cstart; 775 776 key = row*Nbs + col + 1; 777 h1 = HASH(size,key,tmp); 778 for (k=0; k<size; k++){ 779 if (!HT[(h1+k)%size]) { 780 HT[(h1+k)%size] = key; 781 HD[(h1+k)%size] = a->a + j*bs2; 782 break; 783 #if defined(PETSC_USE_INFO) 784 } else { 785 ct++; 786 #endif 787 } 788 } 789 #if defined(PETSC_USE_INFO) 790 if (k> max) max = k; 791 #endif 792 } 793 } 794 /* Loop Over B */ 795 for (i=0; i<b->mbs; i++) { 796 for (j=bi[i]; j<bi[i+1]; j++) { 797 row = i+rstart; 798 col = garray[bj[j]]; 799 key = row*Nbs + col + 1; 800 h1 = HASH(size,key,tmp); 801 for (k=0; k<size; k++){ 802 if (!HT[(h1+k)%size]) { 803 HT[(h1+k)%size] = key; 804 HD[(h1+k)%size] = b->a + j*bs2; 805 break; 806 #if defined(PETSC_USE_INFO) 807 } else { 808 ct++; 809 #endif 810 } 811 } 812 #if defined(PETSC_USE_INFO) 813 if (k> max) max = k; 814 #endif 815 } 816 } 817 818 /* Print Summary */ 819 #if defined(PETSC_USE_INFO) 820 for (i=0,j=0; i<size; i++) { 821 if (HT[i]) {j++;} 822 } 823 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 824 #endif 825 PetscFunctionReturn(0); 826 } 827 828 #undef __FUNCT__ 829 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 830 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 831 { 832 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 833 PetscErrorCode ierr; 834 PetscInt nstash,reallocs; 835 InsertMode addv; 836 837 PetscFunctionBegin; 838 if (baij->donotstash) { 839 PetscFunctionReturn(0); 840 } 841 842 /* make sure all processors are either in INSERTMODE or ADDMODE */ 843 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 844 if (addv == (ADD_VALUES|INSERT_VALUES)) { 845 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 846 } 847 mat->insertmode = addv; /* in case this processor had no cache */ 848 849 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr); 850 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 851 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 852 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 853 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 854 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 855 PetscFunctionReturn(0); 856 } 857 858 #undef __FUNCT__ 859 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 860 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 861 { 862 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 863 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 864 PetscErrorCode ierr; 865 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 866 PetscInt *row,*col; 867 PetscTruth r1,r2,r3,other_disassembled; 868 MatScalar *val; 869 InsertMode addv = mat->insertmode; 870 PetscMPIInt n; 871 872 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 873 PetscFunctionBegin; 874 if (!baij->donotstash) { 875 while (1) { 876 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 877 if (!flg) break; 878 879 for (i=0; i<n;) { 880 /* Now identify the consecutive vals belonging to the same row */ 881 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 882 if (j < n) ncols = j-i; 883 else ncols = n-i; 884 /* Now assemble all these values with a single function call */ 885 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 886 i = j; 887 } 888 } 889 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 890 /* Now process the block-stash. Since the values are stashed column-oriented, 891 set the roworiented flag to column oriented, and after MatSetValues() 892 restore the original flags */ 893 r1 = baij->roworiented; 894 r2 = a->roworiented; 895 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 896 baij->roworiented = PETSC_FALSE; 897 a->roworiented = PETSC_FALSE; 898 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 899 while (1) { 900 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 901 if (!flg) break; 902 903 for (i=0; i<n;) { 904 /* Now identify the consecutive vals belonging to the same row */ 905 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 906 if (j < n) ncols = j-i; 907 else ncols = n-i; 908 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 909 i = j; 910 } 911 } 912 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 913 baij->roworiented = r1; 914 a->roworiented = r2; 915 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 916 } 917 918 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 919 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 920 921 /* determine if any processor has disassembled, if so we must 922 also disassemble ourselfs, in order that we may reassemble. */ 923 /* 924 if nonzero structure of submatrix B cannot change then we know that 925 no processor disassembled thus we can skip this stuff 926 */ 927 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 928 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 929 if (mat->was_assembled && !other_disassembled) { 930 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 931 } 932 } 933 934 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 935 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 936 } 937 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 938 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 939 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 940 941 #if defined(PETSC_USE_INFO) 942 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 943 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 944 baij->ht_total_ct = 0; 945 baij->ht_insert_ct = 0; 946 } 947 #endif 948 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 949 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 950 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 951 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 952 } 953 954 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 955 baij->rowvalues = 0; 956 PetscFunctionReturn(0); 957 } 958 959 #undef __FUNCT__ 960 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 961 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 962 { 963 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 964 PetscErrorCode ierr; 965 PetscMPIInt size = baij->size,rank = baij->rank; 966 PetscInt bs = mat->rmap.bs; 967 PetscTruth iascii,isdraw; 968 PetscViewer sviewer; 969 PetscViewerFormat format; 970 971 PetscFunctionBegin; 972 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 973 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 974 if (iascii) { 975 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 976 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 977 MatInfo info; 978 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 979 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 980 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 981 rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 982 mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr); 983 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 984 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 985 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 986 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 987 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 988 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 989 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 990 PetscFunctionReturn(0); 991 } else if (format == PETSC_VIEWER_ASCII_INFO) { 992 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 995 PetscFunctionReturn(0); 996 } 997 } 998 999 if (isdraw) { 1000 PetscDraw draw; 1001 PetscTruth isnull; 1002 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1003 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1004 } 1005 1006 if (size == 1) { 1007 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1008 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1009 } else { 1010 /* assemble the entire matrix onto first processor. */ 1011 Mat A; 1012 Mat_SeqBAIJ *Aloc; 1013 PetscInt M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1014 MatScalar *a; 1015 1016 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1017 /* Perhaps this should be the type of mat? */ 1018 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1019 if (!rank) { 1020 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1021 } else { 1022 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1023 } 1024 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1025 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1026 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1027 1028 /* copy over the A part */ 1029 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1030 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1031 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1032 1033 for (i=0; i<mbs; i++) { 1034 rvals[0] = bs*(baij->rstartbs + i); 1035 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1036 for (j=ai[i]; j<ai[i+1]; j++) { 1037 col = (baij->cstartbs+aj[j])*bs; 1038 for (k=0; k<bs; k++) { 1039 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1040 col++; a += bs; 1041 } 1042 } 1043 } 1044 /* copy over the B part */ 1045 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1046 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1047 for (i=0; i<mbs; i++) { 1048 rvals[0] = bs*(baij->rstartbs + i); 1049 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1050 for (j=ai[i]; j<ai[i+1]; j++) { 1051 col = baij->garray[aj[j]]*bs; 1052 for (k=0; k<bs; k++) { 1053 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1054 col++; a += bs; 1055 } 1056 } 1057 } 1058 ierr = PetscFree(rvals);CHKERRQ(ierr); 1059 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1060 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1061 /* 1062 Everyone has to call to draw the matrix since the graphics waits are 1063 synchronized across all processors that share the PetscDraw object 1064 */ 1065 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1066 if (!rank) { 1067 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1068 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1069 } 1070 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1071 ierr = MatDestroy(A);CHKERRQ(ierr); 1072 } 1073 PetscFunctionReturn(0); 1074 } 1075 1076 #undef __FUNCT__ 1077 #define __FUNCT__ "MatView_MPIBAIJ" 1078 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1079 { 1080 PetscErrorCode ierr; 1081 PetscTruth iascii,isdraw,issocket,isbinary; 1082 1083 PetscFunctionBegin; 1084 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1085 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1086 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1087 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1088 if (iascii || isdraw || issocket || isbinary) { 1089 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1090 } else { 1091 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1092 } 1093 PetscFunctionReturn(0); 1094 } 1095 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1098 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1099 { 1100 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1101 PetscErrorCode ierr; 1102 1103 PetscFunctionBegin; 1104 #if defined(PETSC_USE_LOG) 1105 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N); 1106 #endif 1107 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1108 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1109 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1110 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1111 #if defined (PETSC_USE_CTABLE) 1112 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 1113 #else 1114 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1115 #endif 1116 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1117 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1118 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1119 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1120 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1121 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1122 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1123 ierr = PetscFree(baij);CHKERRQ(ierr); 1124 1125 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1126 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1127 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1128 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1129 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1130 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1131 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1132 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1133 PetscFunctionReturn(0); 1134 } 1135 1136 #undef __FUNCT__ 1137 #define __FUNCT__ "MatMult_MPIBAIJ" 1138 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1139 { 1140 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1141 PetscErrorCode ierr; 1142 PetscInt nt; 1143 1144 PetscFunctionBegin; 1145 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1146 if (nt != A->cmap.n) { 1147 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1148 } 1149 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1150 if (nt != A->rmap.n) { 1151 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1152 } 1153 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1154 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1155 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1156 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1157 PetscFunctionReturn(0); 1158 } 1159 1160 #undef __FUNCT__ 1161 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1162 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1163 { 1164 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1165 PetscErrorCode ierr; 1166 1167 PetscFunctionBegin; 1168 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1169 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1170 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1171 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1172 PetscFunctionReturn(0); 1173 } 1174 1175 #undef __FUNCT__ 1176 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1177 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1178 { 1179 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1180 PetscErrorCode ierr; 1181 PetscTruth merged; 1182 1183 PetscFunctionBegin; 1184 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1185 /* do nondiagonal part */ 1186 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1187 if (!merged) { 1188 /* send it on its way */ 1189 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1190 /* do local part */ 1191 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1192 /* receive remote parts: note this assumes the values are not actually */ 1193 /* inserted in yy until the next line */ 1194 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1195 } else { 1196 /* do local part */ 1197 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1198 /* send it on its way */ 1199 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1200 /* values actually were received in the Begin() but we need to call this nop */ 1201 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1202 } 1203 PetscFunctionReturn(0); 1204 } 1205 1206 #undef __FUNCT__ 1207 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1208 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1209 { 1210 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1211 PetscErrorCode ierr; 1212 1213 PetscFunctionBegin; 1214 /* do nondiagonal part */ 1215 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1216 /* send it on its way */ 1217 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1218 /* do local part */ 1219 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1220 /* receive remote parts: note this assumes the values are not actually */ 1221 /* inserted in yy until the next line, which is true for my implementation*/ 1222 /* but is not perhaps always true. */ 1223 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 /* 1228 This only works correctly for square matrices where the subblock A->A is the 1229 diagonal block 1230 */ 1231 #undef __FUNCT__ 1232 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1233 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1234 { 1235 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1236 PetscErrorCode ierr; 1237 1238 PetscFunctionBegin; 1239 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1240 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatScale_MPIBAIJ" 1246 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1247 { 1248 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1249 PetscErrorCode ierr; 1250 1251 PetscFunctionBegin; 1252 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1253 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1254 PetscFunctionReturn(0); 1255 } 1256 1257 #undef __FUNCT__ 1258 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1259 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1260 { 1261 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1262 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1263 PetscErrorCode ierr; 1264 PetscInt bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1265 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend; 1266 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1267 1268 PetscFunctionBegin; 1269 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1270 mat->getrowactive = PETSC_TRUE; 1271 1272 if (!mat->rowvalues && (idx || v)) { 1273 /* 1274 allocate enough space to hold information from the longest row. 1275 */ 1276 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1277 PetscInt max = 1,mbs = mat->mbs,tmp; 1278 for (i=0; i<mbs; i++) { 1279 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1280 if (max < tmp) { max = tmp; } 1281 } 1282 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1283 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1284 } 1285 1286 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1287 lrow = row - brstart; 1288 1289 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1290 if (!v) {pvA = 0; pvB = 0;} 1291 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1292 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1293 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1294 nztot = nzA + nzB; 1295 1296 cmap = mat->garray; 1297 if (v || idx) { 1298 if (nztot) { 1299 /* Sort by increasing column numbers, assuming A and B already sorted */ 1300 PetscInt imark = -1; 1301 if (v) { 1302 *v = v_p = mat->rowvalues; 1303 for (i=0; i<nzB; i++) { 1304 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1305 else break; 1306 } 1307 imark = i; 1308 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1309 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1310 } 1311 if (idx) { 1312 *idx = idx_p = mat->rowindices; 1313 if (imark > -1) { 1314 for (i=0; i<imark; i++) { 1315 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1316 } 1317 } else { 1318 for (i=0; i<nzB; i++) { 1319 if (cmap[cworkB[i]/bs] < cstart) 1320 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1321 else break; 1322 } 1323 imark = i; 1324 } 1325 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1326 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1327 } 1328 } else { 1329 if (idx) *idx = 0; 1330 if (v) *v = 0; 1331 } 1332 } 1333 *nz = nztot; 1334 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1335 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1336 PetscFunctionReturn(0); 1337 } 1338 1339 #undef __FUNCT__ 1340 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1341 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1342 { 1343 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1344 1345 PetscFunctionBegin; 1346 if (!baij->getrowactive) { 1347 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1348 } 1349 baij->getrowactive = PETSC_FALSE; 1350 PetscFunctionReturn(0); 1351 } 1352 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1355 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1356 { 1357 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1358 PetscErrorCode ierr; 1359 1360 PetscFunctionBegin; 1361 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1362 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1368 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1369 { 1370 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1371 Mat A = a->A,B = a->B; 1372 PetscErrorCode ierr; 1373 PetscReal isend[5],irecv[5]; 1374 1375 PetscFunctionBegin; 1376 info->block_size = (PetscReal)matin->rmap.bs; 1377 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1378 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1379 isend[3] = info->memory; isend[4] = info->mallocs; 1380 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1381 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1382 isend[3] += info->memory; isend[4] += info->mallocs; 1383 if (flag == MAT_LOCAL) { 1384 info->nz_used = isend[0]; 1385 info->nz_allocated = isend[1]; 1386 info->nz_unneeded = isend[2]; 1387 info->memory = isend[3]; 1388 info->mallocs = isend[4]; 1389 } else if (flag == MAT_GLOBAL_MAX) { 1390 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1391 info->nz_used = irecv[0]; 1392 info->nz_allocated = irecv[1]; 1393 info->nz_unneeded = irecv[2]; 1394 info->memory = irecv[3]; 1395 info->mallocs = irecv[4]; 1396 } else if (flag == MAT_GLOBAL_SUM) { 1397 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1398 info->nz_used = irecv[0]; 1399 info->nz_allocated = irecv[1]; 1400 info->nz_unneeded = irecv[2]; 1401 info->memory = irecv[3]; 1402 info->mallocs = irecv[4]; 1403 } else { 1404 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1405 } 1406 info->rows_global = (PetscReal)A->rmap.N; 1407 info->columns_global = (PetscReal)A->cmap.N; 1408 info->rows_local = (PetscReal)A->rmap.N; 1409 info->columns_local = (PetscReal)A->cmap.N; 1410 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1411 info->fill_ratio_needed = 0; 1412 info->factor_mallocs = 0; 1413 PetscFunctionReturn(0); 1414 } 1415 1416 #undef __FUNCT__ 1417 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1418 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg) 1419 { 1420 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1421 PetscErrorCode ierr; 1422 1423 PetscFunctionBegin; 1424 switch (op) { 1425 case MAT_NEW_NONZERO_LOCATIONS: 1426 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1427 case MAT_KEEP_ZEROED_ROWS: 1428 case MAT_NEW_NONZERO_LOCATION_ERR: 1429 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1430 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1431 break; 1432 case MAT_ROW_ORIENTED: 1433 a->roworiented = flg; 1434 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1435 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1436 break; 1437 case MAT_NEW_DIAGONALS: 1438 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1439 break; 1440 case MAT_IGNORE_OFF_PROC_ENTRIES: 1441 a->donotstash = flg; 1442 break; 1443 case MAT_USE_HASH_TABLE: 1444 a->ht_flag = flg; 1445 break; 1446 case MAT_SYMMETRIC: 1447 case MAT_STRUCTURALLY_SYMMETRIC: 1448 case MAT_HERMITIAN: 1449 case MAT_SYMMETRY_ETERNAL: 1450 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1451 break; 1452 default: 1453 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1454 } 1455 PetscFunctionReturn(0); 1456 } 1457 1458 #undef __FUNCT__ 1459 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1460 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1461 { 1462 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1463 Mat_SeqBAIJ *Aloc; 1464 Mat B; 1465 PetscErrorCode ierr; 1466 PetscInt M=A->rmap.N,N=A->cmap.N,*ai,*aj,i,*rvals,j,k,col; 1467 PetscInt bs=A->rmap.bs,mbs=baij->mbs; 1468 MatScalar *a; 1469 1470 PetscFunctionBegin; 1471 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1472 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1473 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1474 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1475 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1476 ierr = MatMPIBAIJSetPreallocation(B,A->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1477 } else { 1478 B = *matout; 1479 } 1480 1481 /* copy over the A part */ 1482 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1483 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1484 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1485 1486 for (i=0; i<mbs; i++) { 1487 rvals[0] = bs*(baij->rstartbs + i); 1488 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1489 for (j=ai[i]; j<ai[i+1]; j++) { 1490 col = (baij->cstartbs+aj[j])*bs; 1491 for (k=0; k<bs; k++) { 1492 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1493 col++; a += bs; 1494 } 1495 } 1496 } 1497 /* copy over the B part */ 1498 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1499 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1500 for (i=0; i<mbs; i++) { 1501 rvals[0] = bs*(baij->rstartbs + i); 1502 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1503 for (j=ai[i]; j<ai[i+1]; j++) { 1504 col = baij->garray[aj[j]]*bs; 1505 for (k=0; k<bs; k++) { 1506 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1507 col++; a += bs; 1508 } 1509 } 1510 } 1511 ierr = PetscFree(rvals);CHKERRQ(ierr); 1512 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1513 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1514 1515 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1516 *matout = B; 1517 } else { 1518 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1519 } 1520 PetscFunctionReturn(0); 1521 } 1522 1523 #undef __FUNCT__ 1524 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1525 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1526 { 1527 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1528 Mat a = baij->A,b = baij->B; 1529 PetscErrorCode ierr; 1530 PetscInt s1,s2,s3; 1531 1532 PetscFunctionBegin; 1533 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1534 if (rr) { 1535 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1536 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1537 /* Overlap communication with computation. */ 1538 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1539 } 1540 if (ll) { 1541 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1542 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1543 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1544 } 1545 /* scale the diagonal block */ 1546 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1547 1548 if (rr) { 1549 /* Do a scatter end and then right scale the off-diagonal block */ 1550 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1551 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1552 } 1553 1554 PetscFunctionReturn(0); 1555 } 1556 1557 #undef __FUNCT__ 1558 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1559 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 1560 { 1561 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1562 PetscErrorCode ierr; 1563 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1564 PetscInt i,*owners = A->rmap.range; 1565 PetscInt *nprocs,j,idx,nsends,row; 1566 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1567 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1568 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap.rstart; 1569 MPI_Comm comm = ((PetscObject)A)->comm; 1570 MPI_Request *send_waits,*recv_waits; 1571 MPI_Status recv_status,*send_status; 1572 #if defined(PETSC_DEBUG) 1573 PetscTruth found = PETSC_FALSE; 1574 #endif 1575 1576 PetscFunctionBegin; 1577 /* first count number of contributors to each processor */ 1578 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1579 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1580 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1581 j = 0; 1582 for (i=0; i<N; i++) { 1583 if (lastidx > (idx = rows[i])) j = 0; 1584 lastidx = idx; 1585 for (; j<size; j++) { 1586 if (idx >= owners[j] && idx < owners[j+1]) { 1587 nprocs[2*j]++; 1588 nprocs[2*j+1] = 1; 1589 owner[i] = j; 1590 #if defined(PETSC_DEBUG) 1591 found = PETSC_TRUE; 1592 #endif 1593 break; 1594 } 1595 } 1596 #if defined(PETSC_DEBUG) 1597 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1598 found = PETSC_FALSE; 1599 #endif 1600 } 1601 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1602 1603 /* inform other processors of number of messages and max length*/ 1604 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1605 1606 /* post receives: */ 1607 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1608 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1609 for (i=0; i<nrecvs; i++) { 1610 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1611 } 1612 1613 /* do sends: 1614 1) starts[i] gives the starting index in svalues for stuff going to 1615 the ith processor 1616 */ 1617 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1618 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1619 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1620 starts[0] = 0; 1621 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1622 for (i=0; i<N; i++) { 1623 svalues[starts[owner[i]]++] = rows[i]; 1624 } 1625 1626 starts[0] = 0; 1627 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1628 count = 0; 1629 for (i=0; i<size; i++) { 1630 if (nprocs[2*i+1]) { 1631 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1632 } 1633 } 1634 ierr = PetscFree(starts);CHKERRQ(ierr); 1635 1636 base = owners[rank]; 1637 1638 /* wait on receives */ 1639 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 1640 source = lens + nrecvs; 1641 count = nrecvs; slen = 0; 1642 while (count) { 1643 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1644 /* unpack receives into our local space */ 1645 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1646 source[imdex] = recv_status.MPI_SOURCE; 1647 lens[imdex] = n; 1648 slen += n; 1649 count--; 1650 } 1651 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1652 1653 /* move the data into the send scatter */ 1654 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1655 count = 0; 1656 for (i=0; i<nrecvs; i++) { 1657 values = rvalues + i*nmax; 1658 for (j=0; j<lens[i]; j++) { 1659 lrows[count++] = values[j] - base; 1660 } 1661 } 1662 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1663 ierr = PetscFree(lens);CHKERRQ(ierr); 1664 ierr = PetscFree(owner);CHKERRQ(ierr); 1665 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1666 1667 /* actually zap the local rows */ 1668 /* 1669 Zero the required rows. If the "diagonal block" of the matrix 1670 is square and the user wishes to set the diagonal we use separate 1671 code so that MatSetValues() is not called for each diagonal allocating 1672 new memory, thus calling lots of mallocs and slowing things down. 1673 1674 Contributed by: Matthew Knepley 1675 */ 1676 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1677 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1678 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 1679 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1680 } else if (diag != 0.0) { 1681 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1682 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1683 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1684 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1685 } 1686 for (i=0; i<slen; i++) { 1687 row = lrows[i] + rstart_bs; 1688 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1689 } 1690 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1691 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1692 } else { 1693 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1694 } 1695 1696 ierr = PetscFree(lrows);CHKERRQ(ierr); 1697 1698 /* wait on sends */ 1699 if (nsends) { 1700 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1701 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1702 ierr = PetscFree(send_status);CHKERRQ(ierr); 1703 } 1704 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1705 ierr = PetscFree(svalues);CHKERRQ(ierr); 1706 1707 PetscFunctionReturn(0); 1708 } 1709 1710 #undef __FUNCT__ 1711 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1712 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1713 { 1714 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1715 PetscErrorCode ierr; 1716 1717 PetscFunctionBegin; 1718 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1719 PetscFunctionReturn(0); 1720 } 1721 1722 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1723 1724 #undef __FUNCT__ 1725 #define __FUNCT__ "MatEqual_MPIBAIJ" 1726 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1727 { 1728 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1729 Mat a,b,c,d; 1730 PetscTruth flg; 1731 PetscErrorCode ierr; 1732 1733 PetscFunctionBegin; 1734 a = matA->A; b = matA->B; 1735 c = matB->A; d = matB->B; 1736 1737 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1738 if (flg) { 1739 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1740 } 1741 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1742 PetscFunctionReturn(0); 1743 } 1744 1745 #undef __FUNCT__ 1746 #define __FUNCT__ "MatCopy_MPIBAIJ" 1747 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1748 { 1749 PetscErrorCode ierr; 1750 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1751 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1752 1753 PetscFunctionBegin; 1754 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1755 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1756 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1757 } else { 1758 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1759 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1760 } 1761 PetscFunctionReturn(0); 1762 } 1763 1764 #undef __FUNCT__ 1765 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1766 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1767 { 1768 PetscErrorCode ierr; 1769 1770 PetscFunctionBegin; 1771 ierr = MatMPIBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1772 PetscFunctionReturn(0); 1773 } 1774 1775 #include "petscblaslapack.h" 1776 #undef __FUNCT__ 1777 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1778 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1779 { 1780 PetscErrorCode ierr; 1781 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1782 PetscBLASInt bnz,one=1; 1783 Mat_SeqBAIJ *x,*y; 1784 1785 PetscFunctionBegin; 1786 if (str == SAME_NONZERO_PATTERN) { 1787 PetscScalar alpha = a; 1788 x = (Mat_SeqBAIJ *)xx->A->data; 1789 y = (Mat_SeqBAIJ *)yy->A->data; 1790 bnz = PetscBLASIntCast(x->nz); 1791 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1792 x = (Mat_SeqBAIJ *)xx->B->data; 1793 y = (Mat_SeqBAIJ *)yy->B->data; 1794 bnz = PetscBLASIntCast(x->nz); 1795 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1796 } else { 1797 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1798 } 1799 PetscFunctionReturn(0); 1800 } 1801 1802 #undef __FUNCT__ 1803 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1804 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1805 { 1806 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1807 PetscErrorCode ierr; 1808 1809 PetscFunctionBegin; 1810 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1811 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1812 PetscFunctionReturn(0); 1813 } 1814 1815 #undef __FUNCT__ 1816 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1817 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1818 { 1819 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1820 PetscErrorCode ierr; 1821 1822 PetscFunctionBegin; 1823 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1824 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1825 PetscFunctionReturn(0); 1826 } 1827 1828 /* -------------------------------------------------------------------*/ 1829 static struct _MatOps MatOps_Values = { 1830 MatSetValues_MPIBAIJ, 1831 MatGetRow_MPIBAIJ, 1832 MatRestoreRow_MPIBAIJ, 1833 MatMult_MPIBAIJ, 1834 /* 4*/ MatMultAdd_MPIBAIJ, 1835 MatMultTranspose_MPIBAIJ, 1836 MatMultTransposeAdd_MPIBAIJ, 1837 0, 1838 0, 1839 0, 1840 /*10*/ 0, 1841 0, 1842 0, 1843 0, 1844 MatTranspose_MPIBAIJ, 1845 /*15*/ MatGetInfo_MPIBAIJ, 1846 MatEqual_MPIBAIJ, 1847 MatGetDiagonal_MPIBAIJ, 1848 MatDiagonalScale_MPIBAIJ, 1849 MatNorm_MPIBAIJ, 1850 /*20*/ MatAssemblyBegin_MPIBAIJ, 1851 MatAssemblyEnd_MPIBAIJ, 1852 0, 1853 MatSetOption_MPIBAIJ, 1854 MatZeroEntries_MPIBAIJ, 1855 /*25*/ MatZeroRows_MPIBAIJ, 1856 0, 1857 0, 1858 0, 1859 0, 1860 /*30*/ MatSetUpPreallocation_MPIBAIJ, 1861 0, 1862 0, 1863 0, 1864 0, 1865 /*35*/ MatDuplicate_MPIBAIJ, 1866 0, 1867 0, 1868 0, 1869 0, 1870 /*40*/ MatAXPY_MPIBAIJ, 1871 MatGetSubMatrices_MPIBAIJ, 1872 MatIncreaseOverlap_MPIBAIJ, 1873 MatGetValues_MPIBAIJ, 1874 MatCopy_MPIBAIJ, 1875 /*45*/ 0, 1876 MatScale_MPIBAIJ, 1877 0, 1878 0, 1879 0, 1880 /*50*/ 0, 1881 0, 1882 0, 1883 0, 1884 0, 1885 /*55*/ 0, 1886 0, 1887 MatSetUnfactored_MPIBAIJ, 1888 0, 1889 MatSetValuesBlocked_MPIBAIJ, 1890 /*60*/ 0, 1891 MatDestroy_MPIBAIJ, 1892 MatView_MPIBAIJ, 1893 0, 1894 0, 1895 /*65*/ 0, 1896 0, 1897 0, 1898 0, 1899 0, 1900 /*70*/ MatGetRowMaxAbs_MPIBAIJ, 1901 0, 1902 0, 1903 0, 1904 0, 1905 /*75*/ 0, 1906 0, 1907 0, 1908 0, 1909 0, 1910 /*80*/ 0, 1911 0, 1912 0, 1913 0, 1914 MatLoad_MPIBAIJ, 1915 /*85*/ 0, 1916 0, 1917 0, 1918 0, 1919 0, 1920 /*90*/ 0, 1921 0, 1922 0, 1923 0, 1924 0, 1925 /*95*/ 0, 1926 0, 1927 0, 1928 0, 1929 0, 1930 /*100*/0, 1931 0, 1932 0, 1933 0, 1934 0, 1935 /*105*/0, 1936 MatRealPart_MPIBAIJ, 1937 MatImaginaryPart_MPIBAIJ}; 1938 1939 1940 EXTERN_C_BEGIN 1941 #undef __FUNCT__ 1942 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 1943 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1944 { 1945 PetscFunctionBegin; 1946 *a = ((Mat_MPIBAIJ *)A->data)->A; 1947 *iscopy = PETSC_FALSE; 1948 PetscFunctionReturn(0); 1949 } 1950 EXTERN_C_END 1951 1952 EXTERN_C_BEGIN 1953 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 1954 EXTERN_C_END 1955 1956 #undef __FUNCT__ 1957 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 1958 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 1959 { 1960 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)B->data; 1961 PetscInt m = B->rmap.n/bs,cstart = baij->cstartbs, cend = baij->cendbs,j,nnz,i,d; 1962 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = baij->rstartbs,ii; 1963 const PetscInt *JJ; 1964 PetscScalar *values; 1965 PetscErrorCode ierr; 1966 1967 PetscFunctionBegin; 1968 #if defined(PETSC_OPT_g) 1969 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"Ii[0] must be 0 it is %D",Ii[0]); 1970 #endif 1971 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1972 o_nnz = d_nnz + m; 1973 1974 for (i=0; i<m; i++) { 1975 nnz = Ii[i+1]- Ii[i]; 1976 JJ = J + Ii[i]; 1977 nnz_max = PetscMax(nnz_max,nnz); 1978 #if defined(PETSC_OPT_g) 1979 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 1980 #endif 1981 for (j=0; j<nnz; j++) { 1982 if (*JJ >= cstart) break; 1983 JJ++; 1984 } 1985 d = 0; 1986 for (; j<nnz; j++) { 1987 if (*JJ++ >= cend) break; 1988 d++; 1989 } 1990 d_nnz[i] = d; 1991 o_nnz[i] = nnz - d; 1992 } 1993 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1994 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1995 1996 if (v) values = (PetscScalar*)v; 1997 else { 1998 ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 1999 ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2000 } 2001 2002 for (i=0; i<m; i++) { 2003 ii = i + rstart; 2004 nnz = Ii[i+1]- Ii[i]; 2005 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2006 } 2007 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2008 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2009 2010 if (!v) { 2011 ierr = PetscFree(values);CHKERRQ(ierr); 2012 } 2013 PetscFunctionReturn(0); 2014 } 2015 2016 #undef __FUNCT__ 2017 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2018 /*@C 2019 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2020 (the default parallel PETSc format). 2021 2022 Collective on MPI_Comm 2023 2024 Input Parameters: 2025 + A - the matrix 2026 . i - the indices into j for the start of each local row (starts with zero) 2027 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2028 - v - optional values in the matrix 2029 2030 Level: developer 2031 2032 .keywords: matrix, aij, compressed row, sparse, parallel 2033 2034 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2035 @*/ 2036 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2037 { 2038 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2039 2040 PetscFunctionBegin; 2041 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2042 if (f) { 2043 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2044 } 2045 PetscFunctionReturn(0); 2046 } 2047 2048 EXTERN_C_BEGIN 2049 #undef __FUNCT__ 2050 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2051 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2052 { 2053 Mat_MPIBAIJ *b; 2054 PetscErrorCode ierr; 2055 PetscInt i; 2056 2057 PetscFunctionBegin; 2058 B->preallocated = PETSC_TRUE; 2059 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2060 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2061 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2062 2063 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2064 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2065 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2066 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2067 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2068 2069 B->rmap.bs = bs; 2070 B->cmap.bs = bs; 2071 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2072 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2073 2074 if (d_nnz) { 2075 for (i=0; i<B->rmap.n/bs; i++) { 2076 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2077 } 2078 } 2079 if (o_nnz) { 2080 for (i=0; i<B->rmap.n/bs; i++) { 2081 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2082 } 2083 } 2084 2085 b = (Mat_MPIBAIJ*)B->data; 2086 b->bs2 = bs*bs; 2087 b->mbs = B->rmap.n/bs; 2088 b->nbs = B->cmap.n/bs; 2089 b->Mbs = B->rmap.N/bs; 2090 b->Nbs = B->cmap.N/bs; 2091 2092 for (i=0; i<=b->size; i++) { 2093 b->rangebs[i] = B->rmap.range[i]/bs; 2094 } 2095 b->rstartbs = B->rmap.rstart/bs; 2096 b->rendbs = B->rmap.rend/bs; 2097 b->cstartbs = B->cmap.rstart/bs; 2098 b->cendbs = B->cmap.rend/bs; 2099 2100 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2101 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 2102 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2103 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2104 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2105 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2106 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 2107 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2108 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2109 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2110 2111 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 2112 2113 PetscFunctionReturn(0); 2114 } 2115 EXTERN_C_END 2116 2117 EXTERN_C_BEGIN 2118 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2119 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2120 EXTERN_C_END 2121 2122 /*MC 2123 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2124 2125 Options Database Keys: 2126 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2127 . -mat_block_size <bs> - set the blocksize used to store the matrix 2128 - -mat_use_hash_table <fact> 2129 2130 Level: beginner 2131 2132 .seealso: MatCreateMPIBAIJ 2133 M*/ 2134 2135 EXTERN_C_BEGIN 2136 #undef __FUNCT__ 2137 #define __FUNCT__ "MatCreate_MPIBAIJ" 2138 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2139 { 2140 Mat_MPIBAIJ *b; 2141 PetscErrorCode ierr; 2142 PetscTruth flg; 2143 2144 PetscFunctionBegin; 2145 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2146 B->data = (void*)b; 2147 2148 2149 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2150 B->mapping = 0; 2151 B->factor = 0; 2152 B->assembled = PETSC_FALSE; 2153 2154 B->insertmode = NOT_SET_VALUES; 2155 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 2156 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 2157 2158 /* build local table of row and column ownerships */ 2159 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 2160 2161 /* build cache for off array entries formed */ 2162 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 2163 b->donotstash = PETSC_FALSE; 2164 b->colmap = PETSC_NULL; 2165 b->garray = PETSC_NULL; 2166 b->roworiented = PETSC_TRUE; 2167 2168 /* stuff used in block assembly */ 2169 b->barray = 0; 2170 2171 /* stuff used for matrix vector multiply */ 2172 b->lvec = 0; 2173 b->Mvctx = 0; 2174 2175 /* stuff for MatGetRow() */ 2176 b->rowindices = 0; 2177 b->rowvalues = 0; 2178 b->getrowactive = PETSC_FALSE; 2179 2180 /* hash table stuff */ 2181 b->ht = 0; 2182 b->hd = 0; 2183 b->ht_size = 0; 2184 b->ht_flag = PETSC_FALSE; 2185 b->ht_fact = 0; 2186 b->ht_total_ct = 0; 2187 b->ht_insert_ct = 0; 2188 2189 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 2190 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2191 if (flg) { 2192 PetscReal fact = 1.39; 2193 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2194 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 2195 if (fact <= 1.0) fact = 1.39; 2196 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2197 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2198 } 2199 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2200 2201 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2202 "MatStoreValues_MPIBAIJ", 2203 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2204 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2205 "MatRetrieveValues_MPIBAIJ", 2206 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2207 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2208 "MatGetDiagonalBlock_MPIBAIJ", 2209 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2210 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2211 "MatMPIBAIJSetPreallocation_MPIBAIJ", 2212 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2213 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 2214 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 2215 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 2216 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 2217 "MatDiagonalScaleLocal_MPIBAIJ", 2218 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2219 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 2220 "MatSetHashTableFactor_MPIBAIJ", 2221 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2222 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 2223 PetscFunctionReturn(0); 2224 } 2225 EXTERN_C_END 2226 2227 /*MC 2228 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2229 2230 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2231 and MATMPIBAIJ otherwise. 2232 2233 Options Database Keys: 2234 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2235 2236 Level: beginner 2237 2238 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2239 M*/ 2240 2241 EXTERN_C_BEGIN 2242 #undef __FUNCT__ 2243 #define __FUNCT__ "MatCreate_BAIJ" 2244 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 2245 { 2246 PetscErrorCode ierr; 2247 PetscMPIInt size; 2248 2249 PetscFunctionBegin; 2250 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2251 if (size == 1) { 2252 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2253 } else { 2254 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2255 } 2256 PetscFunctionReturn(0); 2257 } 2258 EXTERN_C_END 2259 2260 #undef __FUNCT__ 2261 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2262 /*@C 2263 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2264 (block compressed row). For good matrix assembly performance 2265 the user should preallocate the matrix storage by setting the parameters 2266 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2267 performance can be increased by more than a factor of 50. 2268 2269 Collective on Mat 2270 2271 Input Parameters: 2272 + A - the matrix 2273 . bs - size of blockk 2274 . d_nz - number of block nonzeros per block row in diagonal portion of local 2275 submatrix (same for all local rows) 2276 . d_nnz - array containing the number of block nonzeros in the various block rows 2277 of the in diagonal portion of the local (possibly different for each block 2278 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2279 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2280 submatrix (same for all local rows). 2281 - o_nnz - array containing the number of nonzeros in the various block rows of the 2282 off-diagonal portion of the local submatrix (possibly different for 2283 each block row) or PETSC_NULL. 2284 2285 If the *_nnz parameter is given then the *_nz parameter is ignored 2286 2287 Options Database Keys: 2288 + -mat_block_size - size of the blocks to use 2289 - -mat_use_hash_table <fact> 2290 2291 Notes: 2292 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2293 than it must be used on all processors that share the object for that argument. 2294 2295 Storage Information: 2296 For a square global matrix we define each processor's diagonal portion 2297 to be its local rows and the corresponding columns (a square submatrix); 2298 each processor's off-diagonal portion encompasses the remainder of the 2299 local matrix (a rectangular submatrix). 2300 2301 The user can specify preallocated storage for the diagonal part of 2302 the local submatrix with either d_nz or d_nnz (not both). Set 2303 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2304 memory allocation. Likewise, specify preallocated storage for the 2305 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2306 2307 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2308 the figure below we depict these three local rows and all columns (0-11). 2309 2310 .vb 2311 0 1 2 3 4 5 6 7 8 9 10 11 2312 ------------------- 2313 row 3 | o o o d d d o o o o o o 2314 row 4 | o o o d d d o o o o o o 2315 row 5 | o o o d d d o o o o o o 2316 ------------------- 2317 .ve 2318 2319 Thus, any entries in the d locations are stored in the d (diagonal) 2320 submatrix, and any entries in the o locations are stored in the 2321 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2322 stored simply in the MATSEQBAIJ format for compressed row storage. 2323 2324 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2325 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2326 In general, for PDE problems in which most nonzeros are near the diagonal, 2327 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2328 or you will get TERRIBLE performance; see the users' manual chapter on 2329 matrices. 2330 2331 You can call MatGetInfo() to get information on how effective the preallocation was; 2332 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2333 You can also run with the option -info and look for messages with the string 2334 malloc in them to see if additional memory allocation was needed. 2335 2336 Level: intermediate 2337 2338 .keywords: matrix, block, aij, compressed row, sparse, parallel 2339 2340 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 2341 @*/ 2342 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2343 { 2344 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2345 2346 PetscFunctionBegin; 2347 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2348 if (f) { 2349 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2350 } 2351 PetscFunctionReturn(0); 2352 } 2353 2354 #undef __FUNCT__ 2355 #define __FUNCT__ "MatCreateMPIBAIJ" 2356 /*@C 2357 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2358 (block compressed row). For good matrix assembly performance 2359 the user should preallocate the matrix storage by setting the parameters 2360 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2361 performance can be increased by more than a factor of 50. 2362 2363 Collective on MPI_Comm 2364 2365 Input Parameters: 2366 + comm - MPI communicator 2367 . bs - size of blockk 2368 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2369 This value should be the same as the local size used in creating the 2370 y vector for the matrix-vector product y = Ax. 2371 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2372 This value should be the same as the local size used in creating the 2373 x vector for the matrix-vector product y = Ax. 2374 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2375 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2376 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2377 submatrix (same for all local rows) 2378 . d_nnz - array containing the number of nonzero blocks in the various block rows 2379 of the in diagonal portion of the local (possibly different for each block 2380 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2381 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2382 submatrix (same for all local rows). 2383 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2384 off-diagonal portion of the local submatrix (possibly different for 2385 each block row) or PETSC_NULL. 2386 2387 Output Parameter: 2388 . A - the matrix 2389 2390 Options Database Keys: 2391 + -mat_block_size - size of the blocks to use 2392 - -mat_use_hash_table <fact> 2393 2394 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2395 MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely 2396 true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles. 2397 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2398 2399 Notes: 2400 If the *_nnz parameter is given then the *_nz parameter is ignored 2401 2402 A nonzero block is any block that as 1 or more nonzeros in it 2403 2404 The user MUST specify either the local or global matrix dimensions 2405 (possibly both). 2406 2407 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2408 than it must be used on all processors that share the object for that argument. 2409 2410 Storage Information: 2411 For a square global matrix we define each processor's diagonal portion 2412 to be its local rows and the corresponding columns (a square submatrix); 2413 each processor's off-diagonal portion encompasses the remainder of the 2414 local matrix (a rectangular submatrix). 2415 2416 The user can specify preallocated storage for the diagonal part of 2417 the local submatrix with either d_nz or d_nnz (not both). Set 2418 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2419 memory allocation. Likewise, specify preallocated storage for the 2420 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2421 2422 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2423 the figure below we depict these three local rows and all columns (0-11). 2424 2425 .vb 2426 0 1 2 3 4 5 6 7 8 9 10 11 2427 ------------------- 2428 row 3 | o o o d d d o o o o o o 2429 row 4 | o o o d d d o o o o o o 2430 row 5 | o o o d d d o o o o o o 2431 ------------------- 2432 .ve 2433 2434 Thus, any entries in the d locations are stored in the d (diagonal) 2435 submatrix, and any entries in the o locations are stored in the 2436 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2437 stored simply in the MATSEQBAIJ format for compressed row storage. 2438 2439 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2440 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2441 In general, for PDE problems in which most nonzeros are near the diagonal, 2442 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2443 or you will get TERRIBLE performance; see the users' manual chapter on 2444 matrices. 2445 2446 Level: intermediate 2447 2448 .keywords: matrix, block, aij, compressed row, sparse, parallel 2449 2450 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2451 @*/ 2452 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2453 { 2454 PetscErrorCode ierr; 2455 PetscMPIInt size; 2456 2457 PetscFunctionBegin; 2458 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2459 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2460 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2461 if (size > 1) { 2462 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2463 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2464 } else { 2465 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2466 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2467 } 2468 PetscFunctionReturn(0); 2469 } 2470 2471 #undef __FUNCT__ 2472 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 2473 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2474 { 2475 Mat mat; 2476 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2477 PetscErrorCode ierr; 2478 PetscInt len=0; 2479 2480 PetscFunctionBegin; 2481 *newmat = 0; 2482 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2483 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2484 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2485 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2486 2487 mat->factor = matin->factor; 2488 mat->preallocated = PETSC_TRUE; 2489 mat->assembled = PETSC_TRUE; 2490 mat->insertmode = NOT_SET_VALUES; 2491 2492 a = (Mat_MPIBAIJ*)mat->data; 2493 mat->rmap.bs = matin->rmap.bs; 2494 a->bs2 = oldmat->bs2; 2495 a->mbs = oldmat->mbs; 2496 a->nbs = oldmat->nbs; 2497 a->Mbs = oldmat->Mbs; 2498 a->Nbs = oldmat->Nbs; 2499 2500 ierr = PetscMapCopy(((PetscObject)matin)->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2501 ierr = PetscMapCopy(((PetscObject)matin)->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2502 2503 a->size = oldmat->size; 2504 a->rank = oldmat->rank; 2505 a->donotstash = oldmat->donotstash; 2506 a->roworiented = oldmat->roworiented; 2507 a->rowindices = 0; 2508 a->rowvalues = 0; 2509 a->getrowactive = PETSC_FALSE; 2510 a->barray = 0; 2511 a->rstartbs = oldmat->rstartbs; 2512 a->rendbs = oldmat->rendbs; 2513 a->cstartbs = oldmat->cstartbs; 2514 a->cendbs = oldmat->cendbs; 2515 2516 /* hash table stuff */ 2517 a->ht = 0; 2518 a->hd = 0; 2519 a->ht_size = 0; 2520 a->ht_flag = oldmat->ht_flag; 2521 a->ht_fact = oldmat->ht_fact; 2522 a->ht_total_ct = 0; 2523 a->ht_insert_ct = 0; 2524 2525 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 2526 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2527 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr); 2528 if (oldmat->colmap) { 2529 #if defined (PETSC_USE_CTABLE) 2530 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2531 #else 2532 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2533 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2534 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2535 #endif 2536 } else a->colmap = 0; 2537 2538 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2539 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2540 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2541 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2542 } else a->garray = 0; 2543 2544 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2545 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2546 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2547 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2548 2549 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2550 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2551 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2552 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2553 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2554 *newmat = mat; 2555 2556 PetscFunctionReturn(0); 2557 } 2558 2559 #include "petscsys.h" 2560 2561 #undef __FUNCT__ 2562 #define __FUNCT__ "MatLoad_MPIBAIJ" 2563 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2564 { 2565 Mat A; 2566 PetscErrorCode ierr; 2567 int fd; 2568 PetscInt i,nz,j,rstart,rend; 2569 PetscScalar *vals,*buf; 2570 MPI_Comm comm = ((PetscObject)viewer)->comm; 2571 MPI_Status status; 2572 PetscMPIInt rank,size,maxnz; 2573 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 2574 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 2575 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 2576 PetscMPIInt tag = ((PetscObject)viewer)->tag; 2577 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 2578 PetscInt dcount,kmax,k,nzcount,tmp,mend; 2579 2580 PetscFunctionBegin; 2581 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 2582 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2583 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2584 2585 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2586 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2587 if (!rank) { 2588 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2589 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2590 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2591 } 2592 2593 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2594 M = header[1]; N = header[2]; 2595 2596 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2597 2598 /* 2599 This code adds extra rows to make sure the number of rows is 2600 divisible by the blocksize 2601 */ 2602 Mbs = M/bs; 2603 extra_rows = bs - M + bs*Mbs; 2604 if (extra_rows == bs) extra_rows = 0; 2605 else Mbs++; 2606 if (extra_rows && !rank) { 2607 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2608 } 2609 2610 /* determine ownership of all rows */ 2611 mbs = Mbs/size + ((Mbs % size) > rank); 2612 m = mbs*bs; 2613 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 2614 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2615 2616 /* process 0 needs enough room for process with most rows */ 2617 if (!rank) { 2618 mmax = rowners[1]; 2619 for (i=2; i<size; i++) { 2620 mmax = PetscMax(mmax,rowners[i]); 2621 } 2622 mmax*=bs; 2623 } else mmax = m; 2624 2625 rowners[0] = 0; 2626 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2627 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2628 rstart = rowners[rank]; 2629 rend = rowners[rank+1]; 2630 2631 /* distribute row lengths to all processors */ 2632 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 2633 if (!rank) { 2634 mend = m; 2635 if (size == 1) mend = mend - extra_rows; 2636 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 2637 for (j=mend; j<m; j++) locrowlens[j] = 1; 2638 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2639 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2640 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2641 for (j=0; j<m; j++) { 2642 procsnz[0] += locrowlens[j]; 2643 } 2644 for (i=1; i<size; i++) { 2645 mend = browners[i+1] - browners[i]; 2646 if (i == size-1) mend = mend - extra_rows; 2647 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 2648 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 2649 /* calculate the number of nonzeros on each processor */ 2650 for (j=0; j<browners[i+1]-browners[i]; j++) { 2651 procsnz[i] += rowlengths[j]; 2652 } 2653 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2654 } 2655 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2656 } else { 2657 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2658 } 2659 2660 if (!rank) { 2661 /* determine max buffer needed and allocate it */ 2662 maxnz = procsnz[0]; 2663 for (i=1; i<size; i++) { 2664 maxnz = PetscMax(maxnz,procsnz[i]); 2665 } 2666 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2667 2668 /* read in my part of the matrix column indices */ 2669 nz = procsnz[0]; 2670 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2671 mycols = ibuf; 2672 if (size == 1) nz -= extra_rows; 2673 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2674 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2675 2676 /* read in every ones (except the last) and ship off */ 2677 for (i=1; i<size-1; i++) { 2678 nz = procsnz[i]; 2679 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2680 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2681 } 2682 /* read in the stuff for the last proc */ 2683 if (size != 1) { 2684 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2685 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2686 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2687 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2688 } 2689 ierr = PetscFree(cols);CHKERRQ(ierr); 2690 } else { 2691 /* determine buffer space needed for message */ 2692 nz = 0; 2693 for (i=0; i<m; i++) { 2694 nz += locrowlens[i]; 2695 } 2696 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2697 mycols = ibuf; 2698 /* receive message of column indices*/ 2699 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2700 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2701 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2702 } 2703 2704 /* loop over local rows, determining number of off diagonal entries */ 2705 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2706 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2707 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2708 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2709 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2710 rowcount = 0; nzcount = 0; 2711 for (i=0; i<mbs; i++) { 2712 dcount = 0; 2713 odcount = 0; 2714 for (j=0; j<bs; j++) { 2715 kmax = locrowlens[rowcount]; 2716 for (k=0; k<kmax; k++) { 2717 tmp = mycols[nzcount++]/bs; 2718 if (!mask[tmp]) { 2719 mask[tmp] = 1; 2720 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2721 else masked1[dcount++] = tmp; 2722 } 2723 } 2724 rowcount++; 2725 } 2726 2727 dlens[i] = dcount; 2728 odlens[i] = odcount; 2729 2730 /* zero out the mask elements we set */ 2731 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2732 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2733 } 2734 2735 /* create our matrix */ 2736 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2737 ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2738 ierr = MatSetType(A,type);CHKERRQ(ierr) 2739 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2740 2741 if (!rank) { 2742 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2743 /* read in my part of the matrix numerical values */ 2744 nz = procsnz[0]; 2745 vals = buf; 2746 mycols = ibuf; 2747 if (size == 1) nz -= extra_rows; 2748 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2749 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2750 2751 /* insert into matrix */ 2752 jj = rstart*bs; 2753 for (i=0; i<m; i++) { 2754 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2755 mycols += locrowlens[i]; 2756 vals += locrowlens[i]; 2757 jj++; 2758 } 2759 /* read in other processors (except the last one) and ship out */ 2760 for (i=1; i<size-1; i++) { 2761 nz = procsnz[i]; 2762 vals = buf; 2763 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2764 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2765 } 2766 /* the last proc */ 2767 if (size != 1){ 2768 nz = procsnz[i] - extra_rows; 2769 vals = buf; 2770 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2771 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2772 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2773 } 2774 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2775 } else { 2776 /* receive numeric values */ 2777 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2778 2779 /* receive message of values*/ 2780 vals = buf; 2781 mycols = ibuf; 2782 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2783 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2784 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2785 2786 /* insert into matrix */ 2787 jj = rstart*bs; 2788 for (i=0; i<m; i++) { 2789 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2790 mycols += locrowlens[i]; 2791 vals += locrowlens[i]; 2792 jj++; 2793 } 2794 } 2795 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2796 ierr = PetscFree(buf);CHKERRQ(ierr); 2797 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2798 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2799 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2800 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2801 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2802 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2803 2804 *newmat = A; 2805 PetscFunctionReturn(0); 2806 } 2807 2808 #undef __FUNCT__ 2809 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2810 /*@ 2811 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2812 2813 Input Parameters: 2814 . mat - the matrix 2815 . fact - factor 2816 2817 Collective on Mat 2818 2819 Level: advanced 2820 2821 Notes: 2822 This can also be set by the command line option: -mat_use_hash_table <fact> 2823 2824 .keywords: matrix, hashtable, factor, HT 2825 2826 .seealso: MatSetOption() 2827 @*/ 2828 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2829 { 2830 PetscErrorCode ierr,(*f)(Mat,PetscReal); 2831 2832 PetscFunctionBegin; 2833 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 2834 if (f) { 2835 ierr = (*f)(mat,fact);CHKERRQ(ierr); 2836 } 2837 PetscFunctionReturn(0); 2838 } 2839 2840 EXTERN_C_BEGIN 2841 #undef __FUNCT__ 2842 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 2843 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 2844 { 2845 Mat_MPIBAIJ *baij; 2846 2847 PetscFunctionBegin; 2848 baij = (Mat_MPIBAIJ*)mat->data; 2849 baij->ht_fact = fact; 2850 PetscFunctionReturn(0); 2851 } 2852 EXTERN_C_END 2853 2854 #undef __FUNCT__ 2855 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2856 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2857 { 2858 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2859 PetscFunctionBegin; 2860 *Ad = a->A; 2861 *Ao = a->B; 2862 *colmap = a->garray; 2863 PetscFunctionReturn(0); 2864 } 2865 2866 /* 2867 Special version for direct calls from Fortran (to eliminate two function call overheads 2868 */ 2869 #if defined(PETSC_HAVE_FORTRAN_CAPS) 2870 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 2871 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 2872 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 2873 #endif 2874 2875 #undef __FUNCT__ 2876 #define __FUNCT__ "matmpibiajsetvaluesblocked" 2877 /*@C 2878 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 2879 2880 Collective on Mat 2881 2882 Input Parameters: 2883 + mat - the matrix 2884 . min - number of input rows 2885 . im - input rows 2886 . nin - number of input columns 2887 . in - input columns 2888 . v - numerical values input 2889 - addvin - INSERT_VALUES or ADD_VALUES 2890 2891 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 2892 2893 Level: advanced 2894 2895 .seealso: MatSetValuesBlocked() 2896 @*/ 2897 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 2898 { 2899 /* convert input arguments to C version */ 2900 Mat mat = *matin; 2901 PetscInt m = *min, n = *nin; 2902 InsertMode addv = *addvin; 2903 2904 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2905 const MatScalar *value; 2906 MatScalar *barray=baij->barray; 2907 PetscTruth roworiented = baij->roworiented; 2908 PetscErrorCode ierr; 2909 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 2910 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 2911 PetscInt cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2; 2912 2913 PetscFunctionBegin; 2914 /* tasks normally handled by MatSetValuesBlocked() */ 2915 if (mat->insertmode == NOT_SET_VALUES) { 2916 mat->insertmode = addv; 2917 } 2918 #if defined(PETSC_USE_DEBUG) 2919 else if (mat->insertmode != addv) { 2920 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 2921 } 2922 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2923 #endif 2924 if (mat->assembled) { 2925 mat->was_assembled = PETSC_TRUE; 2926 mat->assembled = PETSC_FALSE; 2927 } 2928 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 2929 2930 2931 if(!barray) { 2932 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 2933 baij->barray = barray; 2934 } 2935 2936 if (roworiented) { 2937 stepval = (n-1)*bs; 2938 } else { 2939 stepval = (m-1)*bs; 2940 } 2941 for (i=0; i<m; i++) { 2942 if (im[i] < 0) continue; 2943 #if defined(PETSC_USE_DEBUG) 2944 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 2945 #endif 2946 if (im[i] >= rstart && im[i] < rend) { 2947 row = im[i] - rstart; 2948 for (j=0; j<n; j++) { 2949 /* If NumCol = 1 then a copy is not required */ 2950 if ((roworiented) && (n == 1)) { 2951 barray = (MatScalar*)v + i*bs2; 2952 } else if((!roworiented) && (m == 1)) { 2953 barray = (MatScalar*)v + j*bs2; 2954 } else { /* Here a copy is required */ 2955 if (roworiented) { 2956 value = v + i*(stepval+bs)*bs + j*bs; 2957 } else { 2958 value = v + j*(stepval+bs)*bs + i*bs; 2959 } 2960 for (ii=0; ii<bs; ii++,value+=stepval) { 2961 for (jj=0; jj<bs; jj++) { 2962 *barray++ = *value++; 2963 } 2964 } 2965 barray -=bs2; 2966 } 2967 2968 if (in[j] >= cstart && in[j] < cend){ 2969 col = in[j] - cstart; 2970 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 2971 } 2972 else if (in[j] < 0) continue; 2973 #if defined(PETSC_USE_DEBUG) 2974 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 2975 #endif 2976 else { 2977 if (mat->was_assembled) { 2978 if (!baij->colmap) { 2979 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2980 } 2981 2982 #if defined(PETSC_USE_DEBUG) 2983 #if defined (PETSC_USE_CTABLE) 2984 { PetscInt data; 2985 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 2986 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 2987 } 2988 #else 2989 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 2990 #endif 2991 #endif 2992 #if defined (PETSC_USE_CTABLE) 2993 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 2994 col = (col - 1)/bs; 2995 #else 2996 col = (baij->colmap[in[j]] - 1)/bs; 2997 #endif 2998 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 2999 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3000 col = in[j]; 3001 } 3002 } 3003 else col = in[j]; 3004 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3005 } 3006 } 3007 } else { 3008 if (!baij->donotstash) { 3009 if (roworiented) { 3010 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3011 } else { 3012 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3013 } 3014 } 3015 } 3016 } 3017 3018 /* task normally handled by MatSetValuesBlocked() */ 3019 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3020 PetscFunctionReturn(0); 3021 } 3022