1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 15 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 18 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 19 { 20 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt i,*idxb = 0; 23 PetscScalar *va,*vb; 24 Vec vtmp; 25 26 PetscFunctionBegin; 27 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 28 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 29 if (idx) { 30 for (i=0; i<A->cmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 31 } 32 33 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 34 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 35 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 36 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 37 38 for (i=0; i<A->rmap->n; i++){ 39 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 40 } 41 42 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 43 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 44 if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);} 45 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 46 PetscFunctionReturn(0); 47 } 48 49 EXTERN_C_BEGIN 50 #undef __FUNCT__ 51 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 52 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 53 { 54 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 55 PetscErrorCode ierr; 56 57 PetscFunctionBegin; 58 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 59 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 60 PetscFunctionReturn(0); 61 } 62 EXTERN_C_END 63 64 EXTERN_C_BEGIN 65 #undef __FUNCT__ 66 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 67 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 68 { 69 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 70 PetscErrorCode ierr; 71 72 PetscFunctionBegin; 73 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 74 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 75 PetscFunctionReturn(0); 76 } 77 EXTERN_C_END 78 79 /* 80 Local utility routine that creates a mapping from the global column 81 number to the local number in the off-diagonal part of the local 82 storage of the matrix. This is done in a non scable way since the 83 length of colmap equals the global matrix length. 84 */ 85 #undef __FUNCT__ 86 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 87 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 88 { 89 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 90 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 91 PetscErrorCode ierr; 92 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 93 94 PetscFunctionBegin; 95 #if defined (PETSC_USE_CTABLE) 96 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 97 for (i=0; i<nbs; i++){ 98 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 99 } 100 #else 101 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 102 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 103 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 105 #endif 106 PetscFunctionReturn(0); 107 } 108 109 #define CHUNKSIZE 10 110 111 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 112 { \ 113 \ 114 brow = row/bs; \ 115 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 116 rmax = aimax[brow]; nrow = ailen[brow]; \ 117 bcol = col/bs; \ 118 ridx = row % bs; cidx = col % bs; \ 119 low = 0; high = nrow; \ 120 while (high-low > 3) { \ 121 t = (low+high)/2; \ 122 if (rp[t] > bcol) high = t; \ 123 else low = t; \ 124 } \ 125 for (_i=low; _i<high; _i++) { \ 126 if (rp[_i] > bcol) break; \ 127 if (rp[_i] == bcol) { \ 128 bap = ap + bs2*_i + bs*cidx + ridx; \ 129 if (addv == ADD_VALUES) *bap += value; \ 130 else *bap = value; \ 131 goto a_noinsert; \ 132 } \ 133 } \ 134 if (a->nonew == 1) goto a_noinsert; \ 135 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 136 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 137 N = nrow++ - 1; \ 138 /* shift up all the later entries in this row */ \ 139 for (ii=N; ii>=_i; ii--) { \ 140 rp[ii+1] = rp[ii]; \ 141 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 142 } \ 143 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 144 rp[_i] = bcol; \ 145 ap[bs2*_i + bs*cidx + ridx] = value; \ 146 a_noinsert:; \ 147 ailen[brow] = nrow; \ 148 } 149 150 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 151 { \ 152 brow = row/bs; \ 153 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 154 rmax = bimax[brow]; nrow = bilen[brow]; \ 155 bcol = col/bs; \ 156 ridx = row % bs; cidx = col % bs; \ 157 low = 0; high = nrow; \ 158 while (high-low > 3) { \ 159 t = (low+high)/2; \ 160 if (rp[t] > bcol) high = t; \ 161 else low = t; \ 162 } \ 163 for (_i=low; _i<high; _i++) { \ 164 if (rp[_i] > bcol) break; \ 165 if (rp[_i] == bcol) { \ 166 bap = ap + bs2*_i + bs*cidx + ridx; \ 167 if (addv == ADD_VALUES) *bap += value; \ 168 else *bap = value; \ 169 goto b_noinsert; \ 170 } \ 171 } \ 172 if (b->nonew == 1) goto b_noinsert; \ 173 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 174 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 175 CHKMEMQ;\ 176 N = nrow++ - 1; \ 177 /* shift up all the later entries in this row */ \ 178 for (ii=N; ii>=_i; ii--) { \ 179 rp[ii+1] = rp[ii]; \ 180 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 181 } \ 182 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 183 rp[_i] = bcol; \ 184 ap[bs2*_i + bs*cidx + ridx] = value; \ 185 b_noinsert:; \ 186 bilen[brow] = nrow; \ 187 } 188 189 #undef __FUNCT__ 190 #define __FUNCT__ "MatSetValues_MPIBAIJ" 191 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 192 { 193 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 194 MatScalar value; 195 PetscTruth roworiented = baij->roworiented; 196 PetscErrorCode ierr; 197 PetscInt i,j,row,col; 198 PetscInt rstart_orig=mat->rmap->rstart; 199 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 200 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 201 202 /* Some Variables required in the macro */ 203 Mat A = baij->A; 204 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 205 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 206 MatScalar *aa=a->a; 207 208 Mat B = baij->B; 209 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 210 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 211 MatScalar *ba=b->a; 212 213 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 214 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 215 MatScalar *ap,*bap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_DEBUG) 221 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 222 #endif 223 if (im[i] >= rstart_orig && im[i] < rend_orig) { 224 row = im[i] - rstart_orig; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart_orig && in[j] < cend_orig){ 227 col = in[j] - cstart_orig; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 230 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 231 } else if (in[j] < 0) continue; 232 #if defined(PETSC_USE_DEBUG) 233 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1);} 234 #endif 235 else { 236 if (mat->was_assembled) { 237 if (!baij->colmap) { 238 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 239 } 240 #if defined (PETSC_USE_CTABLE) 241 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 242 col = col - 1; 243 #else 244 col = baij->colmap[in[j]/bs] - 1; 245 #endif 246 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 247 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 248 col = in[j]; 249 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 250 B = baij->B; 251 b = (Mat_SeqBAIJ*)(B)->data; 252 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 253 ba=b->a; 254 } else col += in[j]%bs; 255 } else col = in[j]; 256 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 257 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 258 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 259 } 260 } 261 } else { 262 if (!baij->donotstash) { 263 if (roworiented) { 264 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 265 } else { 266 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 267 } 268 } 269 } 270 } 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 276 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 277 { 278 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 279 const PetscScalar *value; 280 MatScalar *barray=baij->barray; 281 PetscTruth roworiented = baij->roworiented; 282 PetscErrorCode ierr; 283 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 284 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 285 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 286 287 PetscFunctionBegin; 288 if(!barray) { 289 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 290 baij->barray = barray; 291 } 292 293 if (roworiented) { 294 stepval = (n-1)*bs; 295 } else { 296 stepval = (m-1)*bs; 297 } 298 for (i=0; i<m; i++) { 299 if (im[i] < 0) continue; 300 #if defined(PETSC_USE_DEBUG) 301 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 302 #endif 303 if (im[i] >= rstart && im[i] < rend) { 304 row = im[i] - rstart; 305 for (j=0; j<n; j++) { 306 /* If NumCol = 1 then a copy is not required */ 307 if ((roworiented) && (n == 1)) { 308 barray = (MatScalar*)v + i*bs2; 309 } else if((!roworiented) && (m == 1)) { 310 barray = (MatScalar*)v + j*bs2; 311 } else { /* Here a copy is required */ 312 if (roworiented) { 313 value = v + i*(stepval+bs)*bs + j*bs; 314 } else { 315 value = v + j*(stepval+bs)*bs + i*bs; 316 } 317 for (ii=0; ii<bs; ii++,value+=stepval) { 318 for (jj=0; jj<bs; jj++) { 319 *barray++ = *value++; 320 } 321 } 322 barray -=bs2; 323 } 324 325 if (in[j] >= cstart && in[j] < cend){ 326 col = in[j] - cstart; 327 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 328 } 329 else if (in[j] < 0) continue; 330 #if defined(PETSC_USE_DEBUG) 331 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 332 #endif 333 else { 334 if (mat->was_assembled) { 335 if (!baij->colmap) { 336 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 337 } 338 339 #if defined(PETSC_USE_DEBUG) 340 #if defined (PETSC_USE_CTABLE) 341 { PetscInt data; 342 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 343 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 344 } 345 #else 346 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 347 #endif 348 #endif 349 #if defined (PETSC_USE_CTABLE) 350 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 351 col = (col - 1)/bs; 352 #else 353 col = (baij->colmap[in[j]] - 1)/bs; 354 #endif 355 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 356 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 357 col = in[j]; 358 } 359 } 360 else col = in[j]; 361 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 362 } 363 } 364 } else { 365 if (!baij->donotstash) { 366 if (roworiented) { 367 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 368 } else { 369 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 370 } 371 } 372 } 373 } 374 PetscFunctionReturn(0); 375 } 376 377 #define HASH_KEY 0.6180339887 378 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 379 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 380 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 381 #undef __FUNCT__ 382 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 383 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 384 { 385 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 386 PetscTruth roworiented = baij->roworiented; 387 PetscErrorCode ierr; 388 PetscInt i,j,row,col; 389 PetscInt rstart_orig=mat->rmap->rstart; 390 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 391 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 392 PetscReal tmp; 393 MatScalar **HD = baij->hd,value; 394 #if defined(PETSC_USE_DEBUG) 395 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 396 #endif 397 398 PetscFunctionBegin; 399 400 for (i=0; i<m; i++) { 401 #if defined(PETSC_USE_DEBUG) 402 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 403 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 404 #endif 405 row = im[i]; 406 if (row >= rstart_orig && row < rend_orig) { 407 for (j=0; j<n; j++) { 408 col = in[j]; 409 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 410 /* Look up PetscInto the Hash Table */ 411 key = (row/bs)*Nbs+(col/bs)+1; 412 h1 = HASH(size,key,tmp); 413 414 415 idx = h1; 416 #if defined(PETSC_USE_DEBUG) 417 insert_ct++; 418 total_ct++; 419 if (HT[idx] != key) { 420 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 421 if (idx == size) { 422 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 423 if (idx == h1) { 424 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 425 } 426 } 427 } 428 #else 429 if (HT[idx] != key) { 430 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 431 if (idx == size) { 432 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 433 if (idx == h1) { 434 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 435 } 436 } 437 } 438 #endif 439 /* A HASH table entry is found, so insert the values at the correct address */ 440 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 441 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 442 } 443 } else { 444 if (!baij->donotstash) { 445 if (roworiented) { 446 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 447 } else { 448 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 449 } 450 } 451 } 452 } 453 #if defined(PETSC_USE_DEBUG) 454 baij->ht_total_ct = total_ct; 455 baij->ht_insert_ct = insert_ct; 456 #endif 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 463 { 464 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 465 PetscTruth roworiented = baij->roworiented; 466 PetscErrorCode ierr; 467 PetscInt i,j,ii,jj,row,col; 468 PetscInt rstart=baij->rstartbs; 469 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 470 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 471 PetscReal tmp; 472 MatScalar **HD = baij->hd,*baij_a; 473 const PetscScalar *v_t,*value; 474 #if defined(PETSC_USE_DEBUG) 475 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 476 #endif 477 478 PetscFunctionBegin; 479 480 if (roworiented) { 481 stepval = (n-1)*bs; 482 } else { 483 stepval = (m-1)*bs; 484 } 485 for (i=0; i<m; i++) { 486 #if defined(PETSC_USE_DEBUG) 487 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 488 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 489 #endif 490 row = im[i]; 491 v_t = v + i*nbs2; 492 if (row >= rstart && row < rend) { 493 for (j=0; j<n; j++) { 494 col = in[j]; 495 496 /* Look up into the Hash Table */ 497 key = row*Nbs+col+1; 498 h1 = HASH(size,key,tmp); 499 500 idx = h1; 501 #if defined(PETSC_USE_DEBUG) 502 total_ct++; 503 insert_ct++; 504 if (HT[idx] != key) { 505 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 506 if (idx == size) { 507 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 508 if (idx == h1) { 509 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 510 } 511 } 512 } 513 #else 514 if (HT[idx] != key) { 515 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 516 if (idx == size) { 517 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 518 if (idx == h1) { 519 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 520 } 521 } 522 } 523 #endif 524 baij_a = HD[idx]; 525 if (roworiented) { 526 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 527 /* value = v + (i*(stepval+bs)+j)*bs; */ 528 value = v_t; 529 v_t += bs; 530 if (addv == ADD_VALUES) { 531 for (ii=0; ii<bs; ii++,value+=stepval) { 532 for (jj=ii; jj<bs2; jj+=bs) { 533 baij_a[jj] += *value++; 534 } 535 } 536 } else { 537 for (ii=0; ii<bs; ii++,value+=stepval) { 538 for (jj=ii; jj<bs2; jj+=bs) { 539 baij_a[jj] = *value++; 540 } 541 } 542 } 543 } else { 544 value = v + j*(stepval+bs)*bs + i*bs; 545 if (addv == ADD_VALUES) { 546 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 547 for (jj=0; jj<bs; jj++) { 548 baij_a[jj] += *value++; 549 } 550 } 551 } else { 552 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 553 for (jj=0; jj<bs; jj++) { 554 baij_a[jj] = *value++; 555 } 556 } 557 } 558 } 559 } 560 } else { 561 if (!baij->donotstash) { 562 if (roworiented) { 563 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 564 } else { 565 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 566 } 567 } 568 } 569 } 570 #if defined(PETSC_USE_DEBUG) 571 baij->ht_total_ct = total_ct; 572 baij->ht_insert_ct = insert_ct; 573 #endif 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatGetValues_MPIBAIJ" 579 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 580 { 581 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 582 PetscErrorCode ierr; 583 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 584 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 585 586 PetscFunctionBegin; 587 for (i=0; i<m; i++) { 588 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 589 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 590 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 591 row = idxm[i] - bsrstart; 592 for (j=0; j<n; j++) { 593 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 594 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 595 if (idxn[j] >= bscstart && idxn[j] < bscend){ 596 col = idxn[j] - bscstart; 597 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 598 } else { 599 if (!baij->colmap) { 600 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 601 } 602 #if defined (PETSC_USE_CTABLE) 603 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 604 data --; 605 #else 606 data = baij->colmap[idxn[j]/bs]-1; 607 #endif 608 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 609 else { 610 col = data + idxn[j]%bs; 611 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 612 } 613 } 614 } 615 } else { 616 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 617 } 618 } 619 PetscFunctionReturn(0); 620 } 621 622 #undef __FUNCT__ 623 #define __FUNCT__ "MatNorm_MPIBAIJ" 624 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 625 { 626 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 627 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 628 PetscErrorCode ierr; 629 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 630 PetscReal sum = 0.0; 631 MatScalar *v; 632 633 PetscFunctionBegin; 634 if (baij->size == 1) { 635 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 636 } else { 637 if (type == NORM_FROBENIUS) { 638 v = amat->a; 639 nz = amat->nz*bs2; 640 for (i=0; i<nz; i++) { 641 #if defined(PETSC_USE_COMPLEX) 642 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 643 #else 644 sum += (*v)*(*v); v++; 645 #endif 646 } 647 v = bmat->a; 648 nz = bmat->nz*bs2; 649 for (i=0; i<nz; i++) { 650 #if defined(PETSC_USE_COMPLEX) 651 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 652 #else 653 sum += (*v)*(*v); v++; 654 #endif 655 } 656 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 657 *nrm = sqrt(*nrm); 658 } else if (type == NORM_1) { /* max column sum */ 659 PetscReal *tmp,*tmp2; 660 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 661 ierr = PetscMalloc((2*mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 662 tmp2 = tmp + mat->cmap->N; 663 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 664 v = amat->a; jj = amat->j; 665 for (i=0; i<amat->nz; i++) { 666 for (j=0; j<bs; j++){ 667 col = bs*(cstart + *jj) + j; /* column index */ 668 for (row=0; row<bs; row++){ 669 tmp[col] += PetscAbsScalar(*v); v++; 670 } 671 } 672 jj++; 673 } 674 v = bmat->a; jj = bmat->j; 675 for (i=0; i<bmat->nz; i++) { 676 for (j=0; j<bs; j++){ 677 col = bs*garray[*jj] + j; 678 for (row=0; row<bs; row++){ 679 tmp[col] += PetscAbsScalar(*v); v++; 680 } 681 } 682 jj++; 683 } 684 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 685 *nrm = 0.0; 686 for (j=0; j<mat->cmap->N; j++) { 687 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 688 } 689 ierr = PetscFree(tmp);CHKERRQ(ierr); 690 } else if (type == NORM_INFINITY) { /* max row sum */ 691 PetscReal *sums; 692 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr) 693 sum = 0.0; 694 for (j=0; j<amat->mbs; j++) { 695 for (row=0; row<bs; row++) sums[row] = 0.0; 696 v = amat->a + bs2*amat->i[j]; 697 nz = amat->i[j+1]-amat->i[j]; 698 for (i=0; i<nz; i++) { 699 for (col=0; col<bs; col++){ 700 for (row=0; row<bs; row++){ 701 sums[row] += PetscAbsScalar(*v); v++; 702 } 703 } 704 } 705 v = bmat->a + bs2*bmat->i[j]; 706 nz = bmat->i[j+1]-bmat->i[j]; 707 for (i=0; i<nz; i++) { 708 for (col=0; col<bs; col++){ 709 for (row=0; row<bs; row++){ 710 sums[row] += PetscAbsScalar(*v); v++; 711 } 712 } 713 } 714 for (row=0; row<bs; row++){ 715 if (sums[row] > sum) sum = sums[row]; 716 } 717 } 718 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 719 ierr = PetscFree(sums);CHKERRQ(ierr); 720 } else { 721 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 722 } 723 } 724 PetscFunctionReturn(0); 725 } 726 727 /* 728 Creates the hash table, and sets the table 729 This table is created only once. 730 If new entried need to be added to the matrix 731 then the hash table has to be destroyed and 732 recreated. 733 */ 734 #undef __FUNCT__ 735 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 736 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 737 { 738 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 739 Mat A = baij->A,B=baij->B; 740 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 741 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 742 PetscErrorCode ierr; 743 PetscInt size,bs2=baij->bs2,rstart=baij->rstartbs; 744 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 745 PetscInt *HT,key; 746 MatScalar **HD; 747 PetscReal tmp; 748 #if defined(PETSC_USE_INFO) 749 PetscInt ct=0,max=0; 750 #endif 751 752 PetscFunctionBegin; 753 baij->ht_size=(PetscInt)(factor*nz); 754 size = baij->ht_size; 755 756 if (baij->ht) { 757 PetscFunctionReturn(0); 758 } 759 760 /* Allocate Memory for Hash Table */ 761 ierr = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 762 baij->ht = (PetscInt*)(baij->hd + size); 763 HD = baij->hd; 764 HT = baij->ht; 765 766 767 ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr); 768 769 770 /* Loop Over A */ 771 for (i=0; i<a->mbs; i++) { 772 for (j=ai[i]; j<ai[i+1]; j++) { 773 row = i+rstart; 774 col = aj[j]+cstart; 775 776 key = row*Nbs + col + 1; 777 h1 = HASH(size,key,tmp); 778 for (k=0; k<size; k++){ 779 if (!HT[(h1+k)%size]) { 780 HT[(h1+k)%size] = key; 781 HD[(h1+k)%size] = a->a + j*bs2; 782 break; 783 #if defined(PETSC_USE_INFO) 784 } else { 785 ct++; 786 #endif 787 } 788 } 789 #if defined(PETSC_USE_INFO) 790 if (k> max) max = k; 791 #endif 792 } 793 } 794 /* Loop Over B */ 795 for (i=0; i<b->mbs; i++) { 796 for (j=bi[i]; j<bi[i+1]; j++) { 797 row = i+rstart; 798 col = garray[bj[j]]; 799 key = row*Nbs + col + 1; 800 h1 = HASH(size,key,tmp); 801 for (k=0; k<size; k++){ 802 if (!HT[(h1+k)%size]) { 803 HT[(h1+k)%size] = key; 804 HD[(h1+k)%size] = b->a + j*bs2; 805 break; 806 #if defined(PETSC_USE_INFO) 807 } else { 808 ct++; 809 #endif 810 } 811 } 812 #if defined(PETSC_USE_INFO) 813 if (k> max) max = k; 814 #endif 815 } 816 } 817 818 /* Print Summary */ 819 #if defined(PETSC_USE_INFO) 820 for (i=0,j=0; i<size; i++) { 821 if (HT[i]) {j++;} 822 } 823 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 824 #endif 825 PetscFunctionReturn(0); 826 } 827 828 #undef __FUNCT__ 829 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 830 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 831 { 832 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 833 PetscErrorCode ierr; 834 PetscInt nstash,reallocs; 835 InsertMode addv; 836 837 PetscFunctionBegin; 838 if (baij->donotstash) { 839 PetscFunctionReturn(0); 840 } 841 842 /* make sure all processors are either in INSERTMODE or ADDMODE */ 843 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 844 if (addv == (ADD_VALUES|INSERT_VALUES)) { 845 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 846 } 847 mat->insertmode = addv; /* in case this processor had no cache */ 848 849 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 850 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 851 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 852 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 853 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 854 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 855 PetscFunctionReturn(0); 856 } 857 858 #undef __FUNCT__ 859 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 860 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 861 { 862 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 863 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 864 PetscErrorCode ierr; 865 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 866 PetscInt *row,*col; 867 PetscTruth r1,r2,r3,other_disassembled; 868 MatScalar *val; 869 InsertMode addv = mat->insertmode; 870 PetscMPIInt n; 871 872 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 873 PetscFunctionBegin; 874 if (!baij->donotstash) { 875 while (1) { 876 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 877 if (!flg) break; 878 879 for (i=0; i<n;) { 880 /* Now identify the consecutive vals belonging to the same row */ 881 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 882 if (j < n) ncols = j-i; 883 else ncols = n-i; 884 /* Now assemble all these values with a single function call */ 885 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 886 i = j; 887 } 888 } 889 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 890 /* Now process the block-stash. Since the values are stashed column-oriented, 891 set the roworiented flag to column oriented, and after MatSetValues() 892 restore the original flags */ 893 r1 = baij->roworiented; 894 r2 = a->roworiented; 895 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 896 baij->roworiented = PETSC_FALSE; 897 a->roworiented = PETSC_FALSE; 898 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 899 while (1) { 900 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 901 if (!flg) break; 902 903 for (i=0; i<n;) { 904 /* Now identify the consecutive vals belonging to the same row */ 905 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 906 if (j < n) ncols = j-i; 907 else ncols = n-i; 908 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 909 i = j; 910 } 911 } 912 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 913 baij->roworiented = r1; 914 a->roworiented = r2; 915 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 916 } 917 918 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 919 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 920 921 /* determine if any processor has disassembled, if so we must 922 also disassemble ourselfs, in order that we may reassemble. */ 923 /* 924 if nonzero structure of submatrix B cannot change then we know that 925 no processor disassembled thus we can skip this stuff 926 */ 927 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 928 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 929 if (mat->was_assembled && !other_disassembled) { 930 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 931 } 932 } 933 934 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 935 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 936 } 937 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 938 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 939 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 940 941 #if defined(PETSC_USE_INFO) 942 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 943 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 944 baij->ht_total_ct = 0; 945 baij->ht_insert_ct = 0; 946 } 947 #endif 948 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 949 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 950 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 951 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 952 } 953 954 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 955 baij->rowvalues = 0; 956 PetscFunctionReturn(0); 957 } 958 959 #undef __FUNCT__ 960 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 961 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 962 { 963 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 964 PetscErrorCode ierr; 965 PetscMPIInt size = baij->size,rank = baij->rank; 966 PetscInt bs = mat->rmap->bs; 967 PetscTruth iascii,isdraw; 968 PetscViewer sviewer; 969 PetscViewerFormat format; 970 971 PetscFunctionBegin; 972 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 973 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 974 if (iascii) { 975 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 976 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 977 MatInfo info; 978 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 979 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 980 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 981 rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 982 mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 983 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 984 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 985 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 986 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 987 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 988 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 989 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 990 PetscFunctionReturn(0); 991 } else if (format == PETSC_VIEWER_ASCII_INFO) { 992 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 995 PetscFunctionReturn(0); 996 } 997 } 998 999 if (isdraw) { 1000 PetscDraw draw; 1001 PetscTruth isnull; 1002 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1003 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1004 } 1005 1006 if (size == 1) { 1007 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1008 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1009 } else { 1010 /* assemble the entire matrix onto first processor. */ 1011 Mat A; 1012 Mat_SeqBAIJ *Aloc; 1013 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1014 MatScalar *a; 1015 1016 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1017 /* Perhaps this should be the type of mat? */ 1018 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1019 if (!rank) { 1020 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1021 } else { 1022 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1023 } 1024 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1025 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1026 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1027 1028 /* copy over the A part */ 1029 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1030 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1031 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1032 1033 for (i=0; i<mbs; i++) { 1034 rvals[0] = bs*(baij->rstartbs + i); 1035 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1036 for (j=ai[i]; j<ai[i+1]; j++) { 1037 col = (baij->cstartbs+aj[j])*bs; 1038 for (k=0; k<bs; k++) { 1039 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1040 col++; a += bs; 1041 } 1042 } 1043 } 1044 /* copy over the B part */ 1045 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1046 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1047 for (i=0; i<mbs; i++) { 1048 rvals[0] = bs*(baij->rstartbs + i); 1049 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1050 for (j=ai[i]; j<ai[i+1]; j++) { 1051 col = baij->garray[aj[j]]*bs; 1052 for (k=0; k<bs; k++) { 1053 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1054 col++; a += bs; 1055 } 1056 } 1057 } 1058 ierr = PetscFree(rvals);CHKERRQ(ierr); 1059 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1060 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1061 /* 1062 Everyone has to call to draw the matrix since the graphics waits are 1063 synchronized across all processors that share the PetscDraw object 1064 */ 1065 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1066 if (!rank) { 1067 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1068 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1069 } 1070 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1071 ierr = MatDestroy(A);CHKERRQ(ierr); 1072 } 1073 PetscFunctionReturn(0); 1074 } 1075 1076 #undef __FUNCT__ 1077 #define __FUNCT__ "MatView_MPIBAIJ" 1078 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1079 { 1080 PetscErrorCode ierr; 1081 PetscTruth iascii,isdraw,issocket,isbinary; 1082 1083 PetscFunctionBegin; 1084 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1085 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1086 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1087 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1088 if (iascii || isdraw || issocket || isbinary) { 1089 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1090 } else { 1091 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1092 } 1093 PetscFunctionReturn(0); 1094 } 1095 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1098 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1099 { 1100 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1101 PetscErrorCode ierr; 1102 1103 PetscFunctionBegin; 1104 #if defined(PETSC_USE_LOG) 1105 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1106 #endif 1107 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1108 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1109 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1110 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1111 #if defined (PETSC_USE_CTABLE) 1112 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 1113 #else 1114 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1115 #endif 1116 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1117 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1118 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1119 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1120 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1121 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1122 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1123 ierr = PetscFree(baij);CHKERRQ(ierr); 1124 1125 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1126 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1127 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1128 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1129 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1130 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1131 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1132 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1133 PetscFunctionReturn(0); 1134 } 1135 1136 #undef __FUNCT__ 1137 #define __FUNCT__ "MatMult_MPIBAIJ" 1138 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1139 { 1140 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1141 PetscErrorCode ierr; 1142 PetscInt nt; 1143 1144 PetscFunctionBegin; 1145 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1146 if (nt != A->cmap->n) { 1147 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1148 } 1149 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1150 if (nt != A->rmap->n) { 1151 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1152 } 1153 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1154 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1155 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1156 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1157 PetscFunctionReturn(0); 1158 } 1159 1160 #undef __FUNCT__ 1161 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1162 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1163 { 1164 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1165 PetscErrorCode ierr; 1166 1167 PetscFunctionBegin; 1168 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1169 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1170 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1171 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1172 PetscFunctionReturn(0); 1173 } 1174 1175 #undef __FUNCT__ 1176 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1177 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1178 { 1179 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1180 PetscErrorCode ierr; 1181 PetscTruth merged; 1182 1183 PetscFunctionBegin; 1184 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1185 /* do nondiagonal part */ 1186 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1187 if (!merged) { 1188 /* send it on its way */ 1189 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1190 /* do local part */ 1191 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1192 /* receive remote parts: note this assumes the values are not actually */ 1193 /* inserted in yy until the next line */ 1194 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1195 } else { 1196 /* do local part */ 1197 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1198 /* send it on its way */ 1199 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1200 /* values actually were received in the Begin() but we need to call this nop */ 1201 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1202 } 1203 PetscFunctionReturn(0); 1204 } 1205 1206 #undef __FUNCT__ 1207 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1208 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1209 { 1210 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1211 PetscErrorCode ierr; 1212 1213 PetscFunctionBegin; 1214 /* do nondiagonal part */ 1215 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1216 /* send it on its way */ 1217 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1218 /* do local part */ 1219 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1220 /* receive remote parts: note this assumes the values are not actually */ 1221 /* inserted in yy until the next line, which is true for my implementation*/ 1222 /* but is not perhaps always true. */ 1223 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 /* 1228 This only works correctly for square matrices where the subblock A->A is the 1229 diagonal block 1230 */ 1231 #undef __FUNCT__ 1232 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1233 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1234 { 1235 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1236 PetscErrorCode ierr; 1237 1238 PetscFunctionBegin; 1239 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1240 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatScale_MPIBAIJ" 1246 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1247 { 1248 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1249 PetscErrorCode ierr; 1250 1251 PetscFunctionBegin; 1252 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1253 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1254 PetscFunctionReturn(0); 1255 } 1256 1257 #undef __FUNCT__ 1258 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1259 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1260 { 1261 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1262 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1263 PetscErrorCode ierr; 1264 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1265 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1266 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1267 1268 PetscFunctionBegin; 1269 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1270 mat->getrowactive = PETSC_TRUE; 1271 1272 if (!mat->rowvalues && (idx || v)) { 1273 /* 1274 allocate enough space to hold information from the longest row. 1275 */ 1276 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1277 PetscInt max = 1,mbs = mat->mbs,tmp; 1278 for (i=0; i<mbs; i++) { 1279 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1280 if (max < tmp) { max = tmp; } 1281 } 1282 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1283 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1284 } 1285 1286 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1287 lrow = row - brstart; 1288 1289 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1290 if (!v) {pvA = 0; pvB = 0;} 1291 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1292 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1293 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1294 nztot = nzA + nzB; 1295 1296 cmap = mat->garray; 1297 if (v || idx) { 1298 if (nztot) { 1299 /* Sort by increasing column numbers, assuming A and B already sorted */ 1300 PetscInt imark = -1; 1301 if (v) { 1302 *v = v_p = mat->rowvalues; 1303 for (i=0; i<nzB; i++) { 1304 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1305 else break; 1306 } 1307 imark = i; 1308 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1309 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1310 } 1311 if (idx) { 1312 *idx = idx_p = mat->rowindices; 1313 if (imark > -1) { 1314 for (i=0; i<imark; i++) { 1315 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1316 } 1317 } else { 1318 for (i=0; i<nzB; i++) { 1319 if (cmap[cworkB[i]/bs] < cstart) 1320 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1321 else break; 1322 } 1323 imark = i; 1324 } 1325 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1326 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1327 } 1328 } else { 1329 if (idx) *idx = 0; 1330 if (v) *v = 0; 1331 } 1332 } 1333 *nz = nztot; 1334 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1335 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1336 PetscFunctionReturn(0); 1337 } 1338 1339 #undef __FUNCT__ 1340 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1341 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1342 { 1343 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1344 1345 PetscFunctionBegin; 1346 if (!baij->getrowactive) { 1347 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1348 } 1349 baij->getrowactive = PETSC_FALSE; 1350 PetscFunctionReturn(0); 1351 } 1352 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1355 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1356 { 1357 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1358 PetscErrorCode ierr; 1359 1360 PetscFunctionBegin; 1361 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1362 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1368 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1369 { 1370 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1371 Mat A = a->A,B = a->B; 1372 PetscErrorCode ierr; 1373 PetscReal isend[5],irecv[5]; 1374 1375 PetscFunctionBegin; 1376 info->block_size = (PetscReal)matin->rmap->bs; 1377 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1378 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1379 isend[3] = info->memory; isend[4] = info->mallocs; 1380 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1381 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1382 isend[3] += info->memory; isend[4] += info->mallocs; 1383 if (flag == MAT_LOCAL) { 1384 info->nz_used = isend[0]; 1385 info->nz_allocated = isend[1]; 1386 info->nz_unneeded = isend[2]; 1387 info->memory = isend[3]; 1388 info->mallocs = isend[4]; 1389 } else if (flag == MAT_GLOBAL_MAX) { 1390 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1391 info->nz_used = irecv[0]; 1392 info->nz_allocated = irecv[1]; 1393 info->nz_unneeded = irecv[2]; 1394 info->memory = irecv[3]; 1395 info->mallocs = irecv[4]; 1396 } else if (flag == MAT_GLOBAL_SUM) { 1397 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1398 info->nz_used = irecv[0]; 1399 info->nz_allocated = irecv[1]; 1400 info->nz_unneeded = irecv[2]; 1401 info->memory = irecv[3]; 1402 info->mallocs = irecv[4]; 1403 } else { 1404 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1405 } 1406 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1407 info->fill_ratio_needed = 0; 1408 info->factor_mallocs = 0; 1409 PetscFunctionReturn(0); 1410 } 1411 1412 #undef __FUNCT__ 1413 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1414 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg) 1415 { 1416 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1417 PetscErrorCode ierr; 1418 1419 PetscFunctionBegin; 1420 switch (op) { 1421 case MAT_NEW_NONZERO_LOCATIONS: 1422 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1423 case MAT_KEEP_ZEROED_ROWS: 1424 case MAT_NEW_NONZERO_LOCATION_ERR: 1425 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1426 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1427 break; 1428 case MAT_ROW_ORIENTED: 1429 a->roworiented = flg; 1430 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1431 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1432 break; 1433 case MAT_NEW_DIAGONALS: 1434 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1435 break; 1436 case MAT_IGNORE_OFF_PROC_ENTRIES: 1437 a->donotstash = flg; 1438 break; 1439 case MAT_USE_HASH_TABLE: 1440 a->ht_flag = flg; 1441 break; 1442 case MAT_SYMMETRIC: 1443 case MAT_STRUCTURALLY_SYMMETRIC: 1444 case MAT_HERMITIAN: 1445 case MAT_SYMMETRY_ETERNAL: 1446 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1447 break; 1448 default: 1449 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1450 } 1451 PetscFunctionReturn(0); 1452 } 1453 1454 #undef __FUNCT__ 1455 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1456 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1457 { 1458 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1459 Mat_SeqBAIJ *Aloc; 1460 Mat B; 1461 PetscErrorCode ierr; 1462 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1463 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1464 MatScalar *a; 1465 1466 PetscFunctionBegin; 1467 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1468 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1469 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1470 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1471 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1472 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1473 } else { 1474 B = *matout; 1475 } 1476 1477 /* copy over the A part */ 1478 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1479 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1480 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1481 1482 for (i=0; i<mbs; i++) { 1483 rvals[0] = bs*(baij->rstartbs + i); 1484 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1485 for (j=ai[i]; j<ai[i+1]; j++) { 1486 col = (baij->cstartbs+aj[j])*bs; 1487 for (k=0; k<bs; k++) { 1488 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1489 col++; a += bs; 1490 } 1491 } 1492 } 1493 /* copy over the B part */ 1494 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1495 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1496 for (i=0; i<mbs; i++) { 1497 rvals[0] = bs*(baij->rstartbs + i); 1498 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1499 for (j=ai[i]; j<ai[i+1]; j++) { 1500 col = baij->garray[aj[j]]*bs; 1501 for (k=0; k<bs; k++) { 1502 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1503 col++; a += bs; 1504 } 1505 } 1506 } 1507 ierr = PetscFree(rvals);CHKERRQ(ierr); 1508 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1509 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1510 1511 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1512 *matout = B; 1513 } else { 1514 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1515 } 1516 PetscFunctionReturn(0); 1517 } 1518 1519 #undef __FUNCT__ 1520 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1521 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1522 { 1523 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1524 Mat a = baij->A,b = baij->B; 1525 PetscErrorCode ierr; 1526 PetscInt s1,s2,s3; 1527 1528 PetscFunctionBegin; 1529 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1530 if (rr) { 1531 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1532 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1533 /* Overlap communication with computation. */ 1534 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1535 } 1536 if (ll) { 1537 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1538 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1539 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1540 } 1541 /* scale the diagonal block */ 1542 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1543 1544 if (rr) { 1545 /* Do a scatter end and then right scale the off-diagonal block */ 1546 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1547 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1548 } 1549 1550 PetscFunctionReturn(0); 1551 } 1552 1553 #undef __FUNCT__ 1554 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1555 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 1556 { 1557 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1558 PetscErrorCode ierr; 1559 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1560 PetscInt i,*owners = A->rmap->range; 1561 PetscInt *nprocs,j,idx,nsends,row; 1562 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1563 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1564 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1565 MPI_Comm comm = ((PetscObject)A)->comm; 1566 MPI_Request *send_waits,*recv_waits; 1567 MPI_Status recv_status,*send_status; 1568 #if defined(PETSC_DEBUG) 1569 PetscTruth found = PETSC_FALSE; 1570 #endif 1571 1572 PetscFunctionBegin; 1573 /* first count number of contributors to each processor */ 1574 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1575 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1576 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1577 j = 0; 1578 for (i=0; i<N; i++) { 1579 if (lastidx > (idx = rows[i])) j = 0; 1580 lastidx = idx; 1581 for (; j<size; j++) { 1582 if (idx >= owners[j] && idx < owners[j+1]) { 1583 nprocs[2*j]++; 1584 nprocs[2*j+1] = 1; 1585 owner[i] = j; 1586 #if defined(PETSC_DEBUG) 1587 found = PETSC_TRUE; 1588 #endif 1589 break; 1590 } 1591 } 1592 #if defined(PETSC_DEBUG) 1593 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1594 found = PETSC_FALSE; 1595 #endif 1596 } 1597 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1598 1599 /* inform other processors of number of messages and max length*/ 1600 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1601 1602 /* post receives: */ 1603 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1604 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1605 for (i=0; i<nrecvs; i++) { 1606 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1607 } 1608 1609 /* do sends: 1610 1) starts[i] gives the starting index in svalues for stuff going to 1611 the ith processor 1612 */ 1613 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1614 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1615 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1616 starts[0] = 0; 1617 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1618 for (i=0; i<N; i++) { 1619 svalues[starts[owner[i]]++] = rows[i]; 1620 } 1621 1622 starts[0] = 0; 1623 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1624 count = 0; 1625 for (i=0; i<size; i++) { 1626 if (nprocs[2*i+1]) { 1627 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1628 } 1629 } 1630 ierr = PetscFree(starts);CHKERRQ(ierr); 1631 1632 base = owners[rank]; 1633 1634 /* wait on receives */ 1635 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 1636 source = lens + nrecvs; 1637 count = nrecvs; slen = 0; 1638 while (count) { 1639 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1640 /* unpack receives into our local space */ 1641 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1642 source[imdex] = recv_status.MPI_SOURCE; 1643 lens[imdex] = n; 1644 slen += n; 1645 count--; 1646 } 1647 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1648 1649 /* move the data into the send scatter */ 1650 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1651 count = 0; 1652 for (i=0; i<nrecvs; i++) { 1653 values = rvalues + i*nmax; 1654 for (j=0; j<lens[i]; j++) { 1655 lrows[count++] = values[j] - base; 1656 } 1657 } 1658 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1659 ierr = PetscFree(lens);CHKERRQ(ierr); 1660 ierr = PetscFree(owner);CHKERRQ(ierr); 1661 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1662 1663 /* actually zap the local rows */ 1664 /* 1665 Zero the required rows. If the "diagonal block" of the matrix 1666 is square and the user wishes to set the diagonal we use separate 1667 code so that MatSetValues() is not called for each diagonal allocating 1668 new memory, thus calling lots of mallocs and slowing things down. 1669 1670 Contributed by: Matthew Knepley 1671 */ 1672 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1673 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1674 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1675 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1676 } else if (diag != 0.0) { 1677 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1678 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1679 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1680 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1681 } 1682 for (i=0; i<slen; i++) { 1683 row = lrows[i] + rstart_bs; 1684 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1685 } 1686 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1687 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1688 } else { 1689 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1690 } 1691 1692 ierr = PetscFree(lrows);CHKERRQ(ierr); 1693 1694 /* wait on sends */ 1695 if (nsends) { 1696 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1697 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1698 ierr = PetscFree(send_status);CHKERRQ(ierr); 1699 } 1700 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1701 ierr = PetscFree(svalues);CHKERRQ(ierr); 1702 1703 PetscFunctionReturn(0); 1704 } 1705 1706 #undef __FUNCT__ 1707 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1708 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1709 { 1710 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1711 PetscErrorCode ierr; 1712 1713 PetscFunctionBegin; 1714 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1715 PetscFunctionReturn(0); 1716 } 1717 1718 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1719 1720 #undef __FUNCT__ 1721 #define __FUNCT__ "MatEqual_MPIBAIJ" 1722 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1723 { 1724 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1725 Mat a,b,c,d; 1726 PetscTruth flg; 1727 PetscErrorCode ierr; 1728 1729 PetscFunctionBegin; 1730 a = matA->A; b = matA->B; 1731 c = matB->A; d = matB->B; 1732 1733 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1734 if (flg) { 1735 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1736 } 1737 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1738 PetscFunctionReturn(0); 1739 } 1740 1741 #undef __FUNCT__ 1742 #define __FUNCT__ "MatCopy_MPIBAIJ" 1743 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1744 { 1745 PetscErrorCode ierr; 1746 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1747 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1748 1749 PetscFunctionBegin; 1750 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1751 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1752 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1753 } else { 1754 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1755 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1756 } 1757 PetscFunctionReturn(0); 1758 } 1759 1760 #undef __FUNCT__ 1761 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1762 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1763 { 1764 PetscErrorCode ierr; 1765 1766 PetscFunctionBegin; 1767 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1768 PetscFunctionReturn(0); 1769 } 1770 1771 #include "petscblaslapack.h" 1772 #undef __FUNCT__ 1773 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1774 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1775 { 1776 PetscErrorCode ierr; 1777 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1778 PetscBLASInt bnz,one=1; 1779 Mat_SeqBAIJ *x,*y; 1780 1781 PetscFunctionBegin; 1782 if (str == SAME_NONZERO_PATTERN) { 1783 PetscScalar alpha = a; 1784 x = (Mat_SeqBAIJ *)xx->A->data; 1785 y = (Mat_SeqBAIJ *)yy->A->data; 1786 bnz = PetscBLASIntCast(x->nz); 1787 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1788 x = (Mat_SeqBAIJ *)xx->B->data; 1789 y = (Mat_SeqBAIJ *)yy->B->data; 1790 bnz = PetscBLASIntCast(x->nz); 1791 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1792 } else { 1793 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1794 } 1795 PetscFunctionReturn(0); 1796 } 1797 1798 #undef __FUNCT__ 1799 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1800 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1801 { 1802 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1803 PetscErrorCode ierr; 1804 1805 PetscFunctionBegin; 1806 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1807 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1808 PetscFunctionReturn(0); 1809 } 1810 1811 #undef __FUNCT__ 1812 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1813 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1814 { 1815 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1816 PetscErrorCode ierr; 1817 1818 PetscFunctionBegin; 1819 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1820 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1821 PetscFunctionReturn(0); 1822 } 1823 1824 /* -------------------------------------------------------------------*/ 1825 static struct _MatOps MatOps_Values = { 1826 MatSetValues_MPIBAIJ, 1827 MatGetRow_MPIBAIJ, 1828 MatRestoreRow_MPIBAIJ, 1829 MatMult_MPIBAIJ, 1830 /* 4*/ MatMultAdd_MPIBAIJ, 1831 MatMultTranspose_MPIBAIJ, 1832 MatMultTransposeAdd_MPIBAIJ, 1833 0, 1834 0, 1835 0, 1836 /*10*/ 0, 1837 0, 1838 0, 1839 0, 1840 MatTranspose_MPIBAIJ, 1841 /*15*/ MatGetInfo_MPIBAIJ, 1842 MatEqual_MPIBAIJ, 1843 MatGetDiagonal_MPIBAIJ, 1844 MatDiagonalScale_MPIBAIJ, 1845 MatNorm_MPIBAIJ, 1846 /*20*/ MatAssemblyBegin_MPIBAIJ, 1847 MatAssemblyEnd_MPIBAIJ, 1848 0, 1849 MatSetOption_MPIBAIJ, 1850 MatZeroEntries_MPIBAIJ, 1851 /*25*/ MatZeroRows_MPIBAIJ, 1852 0, 1853 0, 1854 0, 1855 0, 1856 /*30*/ MatSetUpPreallocation_MPIBAIJ, 1857 0, 1858 0, 1859 0, 1860 0, 1861 /*35*/ MatDuplicate_MPIBAIJ, 1862 0, 1863 0, 1864 0, 1865 0, 1866 /*40*/ MatAXPY_MPIBAIJ, 1867 MatGetSubMatrices_MPIBAIJ, 1868 MatIncreaseOverlap_MPIBAIJ, 1869 MatGetValues_MPIBAIJ, 1870 MatCopy_MPIBAIJ, 1871 /*45*/ 0, 1872 MatScale_MPIBAIJ, 1873 0, 1874 0, 1875 0, 1876 /*50*/ 0, 1877 0, 1878 0, 1879 0, 1880 0, 1881 /*55*/ 0, 1882 0, 1883 MatSetUnfactored_MPIBAIJ, 1884 0, 1885 MatSetValuesBlocked_MPIBAIJ, 1886 /*60*/ 0, 1887 MatDestroy_MPIBAIJ, 1888 MatView_MPIBAIJ, 1889 0, 1890 0, 1891 /*65*/ 0, 1892 0, 1893 0, 1894 0, 1895 0, 1896 /*70*/ MatGetRowMaxAbs_MPIBAIJ, 1897 0, 1898 0, 1899 0, 1900 0, 1901 /*75*/ 0, 1902 0, 1903 0, 1904 0, 1905 0, 1906 /*80*/ 0, 1907 0, 1908 0, 1909 0, 1910 MatLoad_MPIBAIJ, 1911 /*85*/ 0, 1912 0, 1913 0, 1914 0, 1915 0, 1916 /*90*/ 0, 1917 0, 1918 0, 1919 0, 1920 0, 1921 /*95*/ 0, 1922 0, 1923 0, 1924 0, 1925 0, 1926 /*100*/0, 1927 0, 1928 0, 1929 0, 1930 0, 1931 /*105*/0, 1932 MatRealPart_MPIBAIJ, 1933 MatImaginaryPart_MPIBAIJ}; 1934 1935 1936 EXTERN_C_BEGIN 1937 #undef __FUNCT__ 1938 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 1939 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1940 { 1941 PetscFunctionBegin; 1942 *a = ((Mat_MPIBAIJ *)A->data)->A; 1943 *iscopy = PETSC_FALSE; 1944 PetscFunctionReturn(0); 1945 } 1946 EXTERN_C_END 1947 1948 EXTERN_C_BEGIN 1949 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 1950 EXTERN_C_END 1951 1952 EXTERN_C_BEGIN 1953 #undef __FUNCT__ 1954 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 1955 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1956 { 1957 PetscInt m,rstart,cstart,cend; 1958 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 1959 const PetscInt *JJ=0; 1960 PetscScalar *values=0; 1961 PetscErrorCode ierr; 1962 1963 PetscFunctionBegin; 1964 1965 if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 1966 B->rmap->bs = bs; 1967 B->cmap->bs = bs; 1968 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 1969 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 1970 m = B->rmap->n/bs; 1971 rstart = B->rmap->rstart/bs; 1972 cstart = B->cmap->rstart/bs; 1973 cend = B->cmap->rend/bs; 1974 1975 if (ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 1976 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1977 o_nnz = d_nnz + m; 1978 for (i=0; i<m; i++) { 1979 nz = ii[i+1] - ii[i]; 1980 if (nz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 1981 nz_max = PetscMax(nz_max,nz); 1982 JJ = jj + ii[i]; 1983 for (j=0; j<nz; j++) { 1984 if (*JJ >= cstart) break; 1985 JJ++; 1986 } 1987 d = 0; 1988 for (; j<nz; j++) { 1989 if (*JJ++ >= cend) break; 1990 d++; 1991 } 1992 d_nnz[i] = d; 1993 o_nnz[i] = nz - d; 1994 } 1995 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1996 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1997 1998 values = (PetscScalar*)V; 1999 if (!values) { 2000 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2001 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2002 } 2003 for (i=0; i<m; i++) { 2004 PetscInt row = i + rstart; 2005 PetscInt ncols = ii[i+1] - ii[i]; 2006 const PetscInt *icols = jj + ii[i]; 2007 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2008 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2009 } 2010 2011 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2012 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2013 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2014 2015 PetscFunctionReturn(0); 2016 } 2017 EXTERN_C_END 2018 2019 #undef __FUNCT__ 2020 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2021 /*@C 2022 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2023 (the default parallel PETSc format). 2024 2025 Collective on MPI_Comm 2026 2027 Input Parameters: 2028 + A - the matrix 2029 . i - the indices into j for the start of each local row (starts with zero) 2030 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2031 - v - optional values in the matrix 2032 2033 Level: developer 2034 2035 .keywords: matrix, aij, compressed row, sparse, parallel 2036 2037 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2038 @*/ 2039 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2040 { 2041 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2042 2043 PetscFunctionBegin; 2044 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2045 if (f) { 2046 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2047 } 2048 PetscFunctionReturn(0); 2049 } 2050 2051 EXTERN_C_BEGIN 2052 #undef __FUNCT__ 2053 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2054 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2055 { 2056 Mat_MPIBAIJ *b; 2057 PetscErrorCode ierr; 2058 PetscInt i, newbs = PetscAbs(bs); 2059 2060 PetscFunctionBegin; 2061 B->preallocated = PETSC_TRUE; 2062 if (bs < 0) { 2063 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2064 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2065 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2066 bs = PetscAbs(bs); 2067 } 2068 if ((d_nnz || o_nnz) && newbs != bs) { 2069 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 2070 } 2071 bs = newbs; 2072 2073 2074 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2075 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2076 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2077 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2078 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2079 2080 B->rmap->bs = bs; 2081 B->cmap->bs = bs; 2082 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2083 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2084 2085 if (d_nnz) { 2086 for (i=0; i<B->rmap->n/bs; i++) { 2087 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2088 } 2089 } 2090 if (o_nnz) { 2091 for (i=0; i<B->rmap->n/bs; i++) { 2092 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2093 } 2094 } 2095 2096 b = (Mat_MPIBAIJ*)B->data; 2097 b->bs2 = bs*bs; 2098 b->mbs = B->rmap->n/bs; 2099 b->nbs = B->cmap->n/bs; 2100 b->Mbs = B->rmap->N/bs; 2101 b->Nbs = B->cmap->N/bs; 2102 2103 for (i=0; i<=b->size; i++) { 2104 b->rangebs[i] = B->rmap->range[i]/bs; 2105 } 2106 b->rstartbs = B->rmap->rstart/bs; 2107 b->rendbs = B->rmap->rend/bs; 2108 b->cstartbs = B->cmap->rstart/bs; 2109 b->cendbs = B->cmap->rend/bs; 2110 2111 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2112 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2113 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2114 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2115 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2116 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2117 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2118 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2119 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2120 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2121 2122 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 2123 2124 PetscFunctionReturn(0); 2125 } 2126 EXTERN_C_END 2127 2128 EXTERN_C_BEGIN 2129 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2130 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2131 EXTERN_C_END 2132 2133 /*MC 2134 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2135 2136 Options Database Keys: 2137 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2138 . -mat_block_size <bs> - set the blocksize used to store the matrix 2139 - -mat_use_hash_table <fact> 2140 2141 Level: beginner 2142 2143 .seealso: MatCreateMPIBAIJ 2144 M*/ 2145 2146 EXTERN_C_BEGIN 2147 #undef __FUNCT__ 2148 #define __FUNCT__ "MatCreate_MPIBAIJ" 2149 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2150 { 2151 Mat_MPIBAIJ *b; 2152 PetscErrorCode ierr; 2153 PetscTruth flg; 2154 2155 PetscFunctionBegin; 2156 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2157 B->data = (void*)b; 2158 2159 2160 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2161 B->mapping = 0; 2162 B->assembled = PETSC_FALSE; 2163 2164 B->insertmode = NOT_SET_VALUES; 2165 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 2166 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 2167 2168 /* build local table of row and column ownerships */ 2169 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 2170 2171 /* build cache for off array entries formed */ 2172 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 2173 b->donotstash = PETSC_FALSE; 2174 b->colmap = PETSC_NULL; 2175 b->garray = PETSC_NULL; 2176 b->roworiented = PETSC_TRUE; 2177 2178 /* stuff used in block assembly */ 2179 b->barray = 0; 2180 2181 /* stuff used for matrix vector multiply */ 2182 b->lvec = 0; 2183 b->Mvctx = 0; 2184 2185 /* stuff for MatGetRow() */ 2186 b->rowindices = 0; 2187 b->rowvalues = 0; 2188 b->getrowactive = PETSC_FALSE; 2189 2190 /* hash table stuff */ 2191 b->ht = 0; 2192 b->hd = 0; 2193 b->ht_size = 0; 2194 b->ht_flag = PETSC_FALSE; 2195 b->ht_fact = 0; 2196 b->ht_total_ct = 0; 2197 b->ht_insert_ct = 0; 2198 2199 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 2200 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2201 if (flg) { 2202 PetscReal fact = 1.39; 2203 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2204 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 2205 if (fact <= 1.0) fact = 1.39; 2206 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2207 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2208 } 2209 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2210 2211 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2212 "MatStoreValues_MPIBAIJ", 2213 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2214 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2215 "MatRetrieveValues_MPIBAIJ", 2216 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2217 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2218 "MatGetDiagonalBlock_MPIBAIJ", 2219 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2220 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2221 "MatMPIBAIJSetPreallocation_MPIBAIJ", 2222 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2223 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 2224 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 2225 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 2226 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 2227 "MatDiagonalScaleLocal_MPIBAIJ", 2228 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2229 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 2230 "MatSetHashTableFactor_MPIBAIJ", 2231 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2232 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 2233 PetscFunctionReturn(0); 2234 } 2235 EXTERN_C_END 2236 2237 /*MC 2238 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2239 2240 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2241 and MATMPIBAIJ otherwise. 2242 2243 Options Database Keys: 2244 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2245 2246 Level: beginner 2247 2248 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2249 M*/ 2250 2251 EXTERN_C_BEGIN 2252 #undef __FUNCT__ 2253 #define __FUNCT__ "MatCreate_BAIJ" 2254 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 2255 { 2256 PetscErrorCode ierr; 2257 PetscMPIInt size; 2258 2259 PetscFunctionBegin; 2260 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2261 if (size == 1) { 2262 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2263 } else { 2264 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2265 } 2266 PetscFunctionReturn(0); 2267 } 2268 EXTERN_C_END 2269 2270 #undef __FUNCT__ 2271 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2272 /*@C 2273 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2274 (block compressed row). For good matrix assembly performance 2275 the user should preallocate the matrix storage by setting the parameters 2276 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2277 performance can be increased by more than a factor of 50. 2278 2279 Collective on Mat 2280 2281 Input Parameters: 2282 + A - the matrix 2283 . bs - size of blockk 2284 . d_nz - number of block nonzeros per block row in diagonal portion of local 2285 submatrix (same for all local rows) 2286 . d_nnz - array containing the number of block nonzeros in the various block rows 2287 of the in diagonal portion of the local (possibly different for each block 2288 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2289 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2290 submatrix (same for all local rows). 2291 - o_nnz - array containing the number of nonzeros in the various block rows of the 2292 off-diagonal portion of the local submatrix (possibly different for 2293 each block row) or PETSC_NULL. 2294 2295 If the *_nnz parameter is given then the *_nz parameter is ignored 2296 2297 Options Database Keys: 2298 + -mat_block_size - size of the blocks to use 2299 - -mat_use_hash_table <fact> 2300 2301 Notes: 2302 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2303 than it must be used on all processors that share the object for that argument. 2304 2305 Storage Information: 2306 For a square global matrix we define each processor's diagonal portion 2307 to be its local rows and the corresponding columns (a square submatrix); 2308 each processor's off-diagonal portion encompasses the remainder of the 2309 local matrix (a rectangular submatrix). 2310 2311 The user can specify preallocated storage for the diagonal part of 2312 the local submatrix with either d_nz or d_nnz (not both). Set 2313 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2314 memory allocation. Likewise, specify preallocated storage for the 2315 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2316 2317 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2318 the figure below we depict these three local rows and all columns (0-11). 2319 2320 .vb 2321 0 1 2 3 4 5 6 7 8 9 10 11 2322 ------------------- 2323 row 3 | o o o d d d o o o o o o 2324 row 4 | o o o d d d o o o o o o 2325 row 5 | o o o d d d o o o o o o 2326 ------------------- 2327 .ve 2328 2329 Thus, any entries in the d locations are stored in the d (diagonal) 2330 submatrix, and any entries in the o locations are stored in the 2331 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2332 stored simply in the MATSEQBAIJ format for compressed row storage. 2333 2334 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2335 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2336 In general, for PDE problems in which most nonzeros are near the diagonal, 2337 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2338 or you will get TERRIBLE performance; see the users' manual chapter on 2339 matrices. 2340 2341 You can call MatGetInfo() to get information on how effective the preallocation was; 2342 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2343 You can also run with the option -info and look for messages with the string 2344 malloc in them to see if additional memory allocation was needed. 2345 2346 Level: intermediate 2347 2348 .keywords: matrix, block, aij, compressed row, sparse, parallel 2349 2350 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 2351 @*/ 2352 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2353 { 2354 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2355 2356 PetscFunctionBegin; 2357 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2358 if (f) { 2359 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2360 } 2361 PetscFunctionReturn(0); 2362 } 2363 2364 #undef __FUNCT__ 2365 #define __FUNCT__ "MatCreateMPIBAIJ" 2366 /*@C 2367 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2368 (block compressed row). For good matrix assembly performance 2369 the user should preallocate the matrix storage by setting the parameters 2370 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2371 performance can be increased by more than a factor of 50. 2372 2373 Collective on MPI_Comm 2374 2375 Input Parameters: 2376 + comm - MPI communicator 2377 . bs - size of blockk 2378 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2379 This value should be the same as the local size used in creating the 2380 y vector for the matrix-vector product y = Ax. 2381 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2382 This value should be the same as the local size used in creating the 2383 x vector for the matrix-vector product y = Ax. 2384 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2385 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2386 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2387 submatrix (same for all local rows) 2388 . d_nnz - array containing the number of nonzero blocks in the various block rows 2389 of the in diagonal portion of the local (possibly different for each block 2390 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2391 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2392 submatrix (same for all local rows). 2393 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2394 off-diagonal portion of the local submatrix (possibly different for 2395 each block row) or PETSC_NULL. 2396 2397 Output Parameter: 2398 . A - the matrix 2399 2400 Options Database Keys: 2401 + -mat_block_size - size of the blocks to use 2402 - -mat_use_hash_table <fact> 2403 2404 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2405 MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely 2406 true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles. 2407 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2408 2409 Notes: 2410 If the *_nnz parameter is given then the *_nz parameter is ignored 2411 2412 A nonzero block is any block that as 1 or more nonzeros in it 2413 2414 The user MUST specify either the local or global matrix dimensions 2415 (possibly both). 2416 2417 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2418 than it must be used on all processors that share the object for that argument. 2419 2420 Storage Information: 2421 For a square global matrix we define each processor's diagonal portion 2422 to be its local rows and the corresponding columns (a square submatrix); 2423 each processor's off-diagonal portion encompasses the remainder of the 2424 local matrix (a rectangular submatrix). 2425 2426 The user can specify preallocated storage for the diagonal part of 2427 the local submatrix with either d_nz or d_nnz (not both). Set 2428 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2429 memory allocation. Likewise, specify preallocated storage for the 2430 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2431 2432 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2433 the figure below we depict these three local rows and all columns (0-11). 2434 2435 .vb 2436 0 1 2 3 4 5 6 7 8 9 10 11 2437 ------------------- 2438 row 3 | o o o d d d o o o o o o 2439 row 4 | o o o d d d o o o o o o 2440 row 5 | o o o d d d o o o o o o 2441 ------------------- 2442 .ve 2443 2444 Thus, any entries in the d locations are stored in the d (diagonal) 2445 submatrix, and any entries in the o locations are stored in the 2446 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2447 stored simply in the MATSEQBAIJ format for compressed row storage. 2448 2449 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2450 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2451 In general, for PDE problems in which most nonzeros are near the diagonal, 2452 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2453 or you will get TERRIBLE performance; see the users' manual chapter on 2454 matrices. 2455 2456 Level: intermediate 2457 2458 .keywords: matrix, block, aij, compressed row, sparse, parallel 2459 2460 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2461 @*/ 2462 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2463 { 2464 PetscErrorCode ierr; 2465 PetscMPIInt size; 2466 2467 PetscFunctionBegin; 2468 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2469 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2470 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2471 if (size > 1) { 2472 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2473 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2474 } else { 2475 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2476 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2477 } 2478 PetscFunctionReturn(0); 2479 } 2480 2481 #undef __FUNCT__ 2482 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 2483 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2484 { 2485 Mat mat; 2486 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2487 PetscErrorCode ierr; 2488 PetscInt len=0; 2489 2490 PetscFunctionBegin; 2491 *newmat = 0; 2492 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2493 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2494 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2495 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2496 2497 mat->factor = matin->factor; 2498 mat->preallocated = PETSC_TRUE; 2499 mat->assembled = PETSC_TRUE; 2500 mat->insertmode = NOT_SET_VALUES; 2501 2502 a = (Mat_MPIBAIJ*)mat->data; 2503 mat->rmap->bs = matin->rmap->bs; 2504 a->bs2 = oldmat->bs2; 2505 a->mbs = oldmat->mbs; 2506 a->nbs = oldmat->nbs; 2507 a->Mbs = oldmat->Mbs; 2508 a->Nbs = oldmat->Nbs; 2509 2510 ierr = PetscMapCopy(((PetscObject)matin)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr); 2511 ierr = PetscMapCopy(((PetscObject)matin)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr); 2512 2513 a->size = oldmat->size; 2514 a->rank = oldmat->rank; 2515 a->donotstash = oldmat->donotstash; 2516 a->roworiented = oldmat->roworiented; 2517 a->rowindices = 0; 2518 a->rowvalues = 0; 2519 a->getrowactive = PETSC_FALSE; 2520 a->barray = 0; 2521 a->rstartbs = oldmat->rstartbs; 2522 a->rendbs = oldmat->rendbs; 2523 a->cstartbs = oldmat->cstartbs; 2524 a->cendbs = oldmat->cendbs; 2525 2526 /* hash table stuff */ 2527 a->ht = 0; 2528 a->hd = 0; 2529 a->ht_size = 0; 2530 a->ht_flag = oldmat->ht_flag; 2531 a->ht_fact = oldmat->ht_fact; 2532 a->ht_total_ct = 0; 2533 a->ht_insert_ct = 0; 2534 2535 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 2536 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2537 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2538 if (oldmat->colmap) { 2539 #if defined (PETSC_USE_CTABLE) 2540 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2541 #else 2542 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2543 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2544 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2545 #endif 2546 } else a->colmap = 0; 2547 2548 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2549 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2550 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2551 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2552 } else a->garray = 0; 2553 2554 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2555 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2556 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2557 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2558 2559 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2560 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2561 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2562 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2563 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2564 *newmat = mat; 2565 2566 PetscFunctionReturn(0); 2567 } 2568 2569 #include "petscsys.h" 2570 2571 #undef __FUNCT__ 2572 #define __FUNCT__ "MatLoad_MPIBAIJ" 2573 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 2574 { 2575 Mat A; 2576 PetscErrorCode ierr; 2577 int fd; 2578 PetscInt i,nz,j,rstart,rend; 2579 PetscScalar *vals,*buf; 2580 MPI_Comm comm = ((PetscObject)viewer)->comm; 2581 MPI_Status status; 2582 PetscMPIInt rank,size,maxnz; 2583 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 2584 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 2585 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 2586 PetscMPIInt tag = ((PetscObject)viewer)->tag; 2587 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 2588 PetscInt dcount,kmax,k,nzcount,tmp,mend; 2589 2590 PetscFunctionBegin; 2591 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 2592 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2593 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2594 2595 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2596 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2597 if (!rank) { 2598 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2599 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2600 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2601 } 2602 2603 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2604 M = header[1]; N = header[2]; 2605 2606 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2607 2608 /* 2609 This code adds extra rows to make sure the number of rows is 2610 divisible by the blocksize 2611 */ 2612 Mbs = M/bs; 2613 extra_rows = bs - M + bs*Mbs; 2614 if (extra_rows == bs) extra_rows = 0; 2615 else Mbs++; 2616 if (extra_rows && !rank) { 2617 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2618 } 2619 2620 /* determine ownership of all rows */ 2621 mbs = Mbs/size + ((Mbs % size) > rank); 2622 m = mbs*bs; 2623 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 2624 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2625 2626 /* process 0 needs enough room for process with most rows */ 2627 if (!rank) { 2628 mmax = rowners[1]; 2629 for (i=2; i<size; i++) { 2630 mmax = PetscMax(mmax,rowners[i]); 2631 } 2632 mmax*=bs; 2633 } else mmax = m; 2634 2635 rowners[0] = 0; 2636 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2637 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2638 rstart = rowners[rank]; 2639 rend = rowners[rank+1]; 2640 2641 /* distribute row lengths to all processors */ 2642 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 2643 if (!rank) { 2644 mend = m; 2645 if (size == 1) mend = mend - extra_rows; 2646 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 2647 for (j=mend; j<m; j++) locrowlens[j] = 1; 2648 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2649 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2650 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2651 for (j=0; j<m; j++) { 2652 procsnz[0] += locrowlens[j]; 2653 } 2654 for (i=1; i<size; i++) { 2655 mend = browners[i+1] - browners[i]; 2656 if (i == size-1) mend = mend - extra_rows; 2657 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 2658 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 2659 /* calculate the number of nonzeros on each processor */ 2660 for (j=0; j<browners[i+1]-browners[i]; j++) { 2661 procsnz[i] += rowlengths[j]; 2662 } 2663 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2664 } 2665 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2666 } else { 2667 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2668 } 2669 2670 if (!rank) { 2671 /* determine max buffer needed and allocate it */ 2672 maxnz = procsnz[0]; 2673 for (i=1; i<size; i++) { 2674 maxnz = PetscMax(maxnz,procsnz[i]); 2675 } 2676 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2677 2678 /* read in my part of the matrix column indices */ 2679 nz = procsnz[0]; 2680 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2681 mycols = ibuf; 2682 if (size == 1) nz -= extra_rows; 2683 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2684 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2685 2686 /* read in every ones (except the last) and ship off */ 2687 for (i=1; i<size-1; i++) { 2688 nz = procsnz[i]; 2689 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2690 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2691 } 2692 /* read in the stuff for the last proc */ 2693 if (size != 1) { 2694 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2695 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2696 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2697 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2698 } 2699 ierr = PetscFree(cols);CHKERRQ(ierr); 2700 } else { 2701 /* determine buffer space needed for message */ 2702 nz = 0; 2703 for (i=0; i<m; i++) { 2704 nz += locrowlens[i]; 2705 } 2706 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2707 mycols = ibuf; 2708 /* receive message of column indices*/ 2709 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2710 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2711 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2712 } 2713 2714 /* loop over local rows, determining number of off diagonal entries */ 2715 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2716 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2717 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2718 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2719 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2720 rowcount = 0; nzcount = 0; 2721 for (i=0; i<mbs; i++) { 2722 dcount = 0; 2723 odcount = 0; 2724 for (j=0; j<bs; j++) { 2725 kmax = locrowlens[rowcount]; 2726 for (k=0; k<kmax; k++) { 2727 tmp = mycols[nzcount++]/bs; 2728 if (!mask[tmp]) { 2729 mask[tmp] = 1; 2730 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2731 else masked1[dcount++] = tmp; 2732 } 2733 } 2734 rowcount++; 2735 } 2736 2737 dlens[i] = dcount; 2738 odlens[i] = odcount; 2739 2740 /* zero out the mask elements we set */ 2741 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2742 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2743 } 2744 2745 /* create our matrix */ 2746 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2747 ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2748 ierr = MatSetType(A,type);CHKERRQ(ierr) 2749 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2750 2751 if (!rank) { 2752 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2753 /* read in my part of the matrix numerical values */ 2754 nz = procsnz[0]; 2755 vals = buf; 2756 mycols = ibuf; 2757 if (size == 1) nz -= extra_rows; 2758 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2759 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2760 2761 /* insert into matrix */ 2762 jj = rstart*bs; 2763 for (i=0; i<m; i++) { 2764 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2765 mycols += locrowlens[i]; 2766 vals += locrowlens[i]; 2767 jj++; 2768 } 2769 /* read in other processors (except the last one) and ship out */ 2770 for (i=1; i<size-1; i++) { 2771 nz = procsnz[i]; 2772 vals = buf; 2773 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2774 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2775 } 2776 /* the last proc */ 2777 if (size != 1){ 2778 nz = procsnz[i] - extra_rows; 2779 vals = buf; 2780 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2781 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2782 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2783 } 2784 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2785 } else { 2786 /* receive numeric values */ 2787 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2788 2789 /* receive message of values*/ 2790 vals = buf; 2791 mycols = ibuf; 2792 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2793 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2794 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2795 2796 /* insert into matrix */ 2797 jj = rstart*bs; 2798 for (i=0; i<m; i++) { 2799 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2800 mycols += locrowlens[i]; 2801 vals += locrowlens[i]; 2802 jj++; 2803 } 2804 } 2805 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2806 ierr = PetscFree(buf);CHKERRQ(ierr); 2807 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2808 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2809 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2810 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2811 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2812 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2813 2814 *newmat = A; 2815 PetscFunctionReturn(0); 2816 } 2817 2818 #undef __FUNCT__ 2819 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2820 /*@ 2821 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2822 2823 Input Parameters: 2824 . mat - the matrix 2825 . fact - factor 2826 2827 Collective on Mat 2828 2829 Level: advanced 2830 2831 Notes: 2832 This can also be set by the command line option: -mat_use_hash_table <fact> 2833 2834 .keywords: matrix, hashtable, factor, HT 2835 2836 .seealso: MatSetOption() 2837 @*/ 2838 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2839 { 2840 PetscErrorCode ierr,(*f)(Mat,PetscReal); 2841 2842 PetscFunctionBegin; 2843 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 2844 if (f) { 2845 ierr = (*f)(mat,fact);CHKERRQ(ierr); 2846 } 2847 PetscFunctionReturn(0); 2848 } 2849 2850 EXTERN_C_BEGIN 2851 #undef __FUNCT__ 2852 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 2853 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 2854 { 2855 Mat_MPIBAIJ *baij; 2856 2857 PetscFunctionBegin; 2858 baij = (Mat_MPIBAIJ*)mat->data; 2859 baij->ht_fact = fact; 2860 PetscFunctionReturn(0); 2861 } 2862 EXTERN_C_END 2863 2864 #undef __FUNCT__ 2865 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2866 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2867 { 2868 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2869 PetscFunctionBegin; 2870 *Ad = a->A; 2871 *Ao = a->B; 2872 *colmap = a->garray; 2873 PetscFunctionReturn(0); 2874 } 2875 2876 /* 2877 Special version for direct calls from Fortran (to eliminate two function call overheads 2878 */ 2879 #if defined(PETSC_HAVE_FORTRAN_CAPS) 2880 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 2881 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 2882 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 2883 #endif 2884 2885 #undef __FUNCT__ 2886 #define __FUNCT__ "matmpibiajsetvaluesblocked" 2887 /*@C 2888 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 2889 2890 Collective on Mat 2891 2892 Input Parameters: 2893 + mat - the matrix 2894 . min - number of input rows 2895 . im - input rows 2896 . nin - number of input columns 2897 . in - input columns 2898 . v - numerical values input 2899 - addvin - INSERT_VALUES or ADD_VALUES 2900 2901 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 2902 2903 Level: advanced 2904 2905 .seealso: MatSetValuesBlocked() 2906 @*/ 2907 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 2908 { 2909 /* convert input arguments to C version */ 2910 Mat mat = *matin; 2911 PetscInt m = *min, n = *nin; 2912 InsertMode addv = *addvin; 2913 2914 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2915 const MatScalar *value; 2916 MatScalar *barray=baij->barray; 2917 PetscTruth roworiented = baij->roworiented; 2918 PetscErrorCode ierr; 2919 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 2920 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 2921 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 2922 2923 PetscFunctionBegin; 2924 /* tasks normally handled by MatSetValuesBlocked() */ 2925 if (mat->insertmode == NOT_SET_VALUES) { 2926 mat->insertmode = addv; 2927 } 2928 #if defined(PETSC_USE_DEBUG) 2929 else if (mat->insertmode != addv) { 2930 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 2931 } 2932 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2933 #endif 2934 if (mat->assembled) { 2935 mat->was_assembled = PETSC_TRUE; 2936 mat->assembled = PETSC_FALSE; 2937 } 2938 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 2939 2940 2941 if(!barray) { 2942 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 2943 baij->barray = barray; 2944 } 2945 2946 if (roworiented) { 2947 stepval = (n-1)*bs; 2948 } else { 2949 stepval = (m-1)*bs; 2950 } 2951 for (i=0; i<m; i++) { 2952 if (im[i] < 0) continue; 2953 #if defined(PETSC_USE_DEBUG) 2954 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 2955 #endif 2956 if (im[i] >= rstart && im[i] < rend) { 2957 row = im[i] - rstart; 2958 for (j=0; j<n; j++) { 2959 /* If NumCol = 1 then a copy is not required */ 2960 if ((roworiented) && (n == 1)) { 2961 barray = (MatScalar*)v + i*bs2; 2962 } else if((!roworiented) && (m == 1)) { 2963 barray = (MatScalar*)v + j*bs2; 2964 } else { /* Here a copy is required */ 2965 if (roworiented) { 2966 value = v + i*(stepval+bs)*bs + j*bs; 2967 } else { 2968 value = v + j*(stepval+bs)*bs + i*bs; 2969 } 2970 for (ii=0; ii<bs; ii++,value+=stepval) { 2971 for (jj=0; jj<bs; jj++) { 2972 *barray++ = *value++; 2973 } 2974 } 2975 barray -=bs2; 2976 } 2977 2978 if (in[j] >= cstart && in[j] < cend){ 2979 col = in[j] - cstart; 2980 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 2981 } 2982 else if (in[j] < 0) continue; 2983 #if defined(PETSC_USE_DEBUG) 2984 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 2985 #endif 2986 else { 2987 if (mat->was_assembled) { 2988 if (!baij->colmap) { 2989 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2990 } 2991 2992 #if defined(PETSC_USE_DEBUG) 2993 #if defined (PETSC_USE_CTABLE) 2994 { PetscInt data; 2995 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 2996 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 2997 } 2998 #else 2999 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 3000 #endif 3001 #endif 3002 #if defined (PETSC_USE_CTABLE) 3003 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3004 col = (col - 1)/bs; 3005 #else 3006 col = (baij->colmap[in[j]] - 1)/bs; 3007 #endif 3008 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3009 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3010 col = in[j]; 3011 } 3012 } 3013 else col = in[j]; 3014 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3015 } 3016 } 3017 } else { 3018 if (!baij->donotstash) { 3019 if (roworiented) { 3020 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3021 } else { 3022 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3023 } 3024 } 3025 } 3026 } 3027 3028 /* task normally handled by MatSetValuesBlocked() */ 3029 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3030 PetscFunctionReturn(0); 3031 } 3032