1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 15 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 18 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 19 { 20 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt i,*idxb = 0; 23 PetscScalar *va,*vb; 24 Vec vtmp; 25 26 PetscFunctionBegin; 27 ierr = PetscMemzero(idx,A->cmap.n*sizeof(PetscInt));CHKERRQ(ierr); 28 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 29 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 30 if (idx) { 31 for (i=0; i<A->cmap.n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap.rstart;} 32 } 33 34 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap.n,&vtmp);CHKERRQ(ierr); 35 if (idx) {ierr = PetscMalloc(A->rmap.n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 36 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 37 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 38 39 for (i=0; i<A->rmap.n; i++){ 40 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap.bs*a->garray[idxb[i]/A->cmap.bs] + (idxb[i] % A->cmap.bs);} 41 } 42 43 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 44 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 45 if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);} 46 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 47 PetscFunctionReturn(0); 48 } 49 50 EXTERN_C_BEGIN 51 #undef __FUNCT__ 52 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 53 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 54 { 55 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 56 PetscErrorCode ierr; 57 58 PetscFunctionBegin; 59 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 60 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 61 PetscFunctionReturn(0); 62 } 63 EXTERN_C_END 64 65 EXTERN_C_BEGIN 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 68 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 69 { 70 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 75 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 76 PetscFunctionReturn(0); 77 } 78 EXTERN_C_END 79 80 /* 81 Local utility routine that creates a mapping from the global column 82 number to the local number in the off-diagonal part of the local 83 storage of the matrix. This is done in a non scable way since the 84 length of colmap equals the global matrix length. 85 */ 86 #undef __FUNCT__ 87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 89 { 90 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 91 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 92 PetscErrorCode ierr; 93 PetscInt nbs = B->nbs,i,bs=mat->rmap.bs; 94 95 PetscFunctionBegin; 96 #if defined (PETSC_USE_CTABLE) 97 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 98 for (i=0; i<nbs; i++){ 99 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 100 } 101 #else 102 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 103 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 105 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 106 #endif 107 PetscFunctionReturn(0); 108 } 109 110 #define CHUNKSIZE 10 111 112 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 113 { \ 114 \ 115 brow = row/bs; \ 116 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 117 rmax = aimax[brow]; nrow = ailen[brow]; \ 118 bcol = col/bs; \ 119 ridx = row % bs; cidx = col % bs; \ 120 low = 0; high = nrow; \ 121 while (high-low > 3) { \ 122 t = (low+high)/2; \ 123 if (rp[t] > bcol) high = t; \ 124 else low = t; \ 125 } \ 126 for (_i=low; _i<high; _i++) { \ 127 if (rp[_i] > bcol) break; \ 128 if (rp[_i] == bcol) { \ 129 bap = ap + bs2*_i + bs*cidx + ridx; \ 130 if (addv == ADD_VALUES) *bap += value; \ 131 else *bap = value; \ 132 goto a_noinsert; \ 133 } \ 134 } \ 135 if (a->nonew == 1) goto a_noinsert; \ 136 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 137 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 138 N = nrow++ - 1; \ 139 /* shift up all the later entries in this row */ \ 140 for (ii=N; ii>=_i; ii--) { \ 141 rp[ii+1] = rp[ii]; \ 142 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 143 } \ 144 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 145 rp[_i] = bcol; \ 146 ap[bs2*_i + bs*cidx + ridx] = value; \ 147 a_noinsert:; \ 148 ailen[brow] = nrow; \ 149 } 150 151 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 152 { \ 153 brow = row/bs; \ 154 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 155 rmax = bimax[brow]; nrow = bilen[brow]; \ 156 bcol = col/bs; \ 157 ridx = row % bs; cidx = col % bs; \ 158 low = 0; high = nrow; \ 159 while (high-low > 3) { \ 160 t = (low+high)/2; \ 161 if (rp[t] > bcol) high = t; \ 162 else low = t; \ 163 } \ 164 for (_i=low; _i<high; _i++) { \ 165 if (rp[_i] > bcol) break; \ 166 if (rp[_i] == bcol) { \ 167 bap = ap + bs2*_i + bs*cidx + ridx; \ 168 if (addv == ADD_VALUES) *bap += value; \ 169 else *bap = value; \ 170 goto b_noinsert; \ 171 } \ 172 } \ 173 if (b->nonew == 1) goto b_noinsert; \ 174 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 175 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 176 CHKMEMQ;\ 177 N = nrow++ - 1; \ 178 /* shift up all the later entries in this row */ \ 179 for (ii=N; ii>=_i; ii--) { \ 180 rp[ii+1] = rp[ii]; \ 181 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 182 } \ 183 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 184 rp[_i] = bcol; \ 185 ap[bs2*_i + bs*cidx + ridx] = value; \ 186 b_noinsert:; \ 187 bilen[brow] = nrow; \ 188 } 189 190 #undef __FUNCT__ 191 #define __FUNCT__ "MatSetValues_MPIBAIJ" 192 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 193 { 194 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 195 MatScalar value; 196 PetscTruth roworiented = baij->roworiented; 197 PetscErrorCode ierr; 198 PetscInt i,j,row,col; 199 PetscInt rstart_orig=mat->rmap.rstart; 200 PetscInt rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart; 201 PetscInt cend_orig=mat->cmap.rend,bs=mat->rmap.bs; 202 203 /* Some Variables required in the macro */ 204 Mat A = baij->A; 205 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 206 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 207 MatScalar *aa=a->a; 208 209 Mat B = baij->B; 210 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 211 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 212 MatScalar *ba=b->a; 213 214 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 215 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 216 MatScalar *ap,*bap; 217 218 PetscFunctionBegin; 219 for (i=0; i<m; i++) { 220 if (im[i] < 0) continue; 221 #if defined(PETSC_USE_DEBUG) 222 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 223 #endif 224 if (im[i] >= rstart_orig && im[i] < rend_orig) { 225 row = im[i] - rstart_orig; 226 for (j=0; j<n; j++) { 227 if (in[j] >= cstart_orig && in[j] < cend_orig){ 228 col = in[j] - cstart_orig; 229 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 230 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_DEBUG) 234 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap.N-1);} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!baij->colmap) { 239 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 243 col = col - 1; 244 #else 245 col = baij->colmap[in[j]/bs] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 248 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 251 B = baij->B; 252 b = (Mat_SeqBAIJ*)(B)->data; 253 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 254 ba=b->a; 255 } else col += in[j]%bs; 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 259 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 260 } 261 } 262 } else { 263 if (!baij->donotstash) { 264 if (roworiented) { 265 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 266 } else { 267 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 268 } 269 } 270 } 271 } 272 PetscFunctionReturn(0); 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 277 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 278 { 279 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 280 const PetscScalar *value; 281 MatScalar *barray=baij->barray; 282 PetscTruth roworiented = baij->roworiented; 283 PetscErrorCode ierr; 284 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 285 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 286 PetscInt cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2; 287 288 PetscFunctionBegin; 289 if(!barray) { 290 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 291 baij->barray = barray; 292 } 293 294 if (roworiented) { 295 stepval = (n-1)*bs; 296 } else { 297 stepval = (m-1)*bs; 298 } 299 for (i=0; i<m; i++) { 300 if (im[i] < 0) continue; 301 #if defined(PETSC_USE_DEBUG) 302 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 303 #endif 304 if (im[i] >= rstart && im[i] < rend) { 305 row = im[i] - rstart; 306 for (j=0; j<n; j++) { 307 /* If NumCol = 1 then a copy is not required */ 308 if ((roworiented) && (n == 1)) { 309 barray = (MatScalar*)v + i*bs2; 310 } else if((!roworiented) && (m == 1)) { 311 barray = (MatScalar*)v + j*bs2; 312 } else { /* Here a copy is required */ 313 if (roworiented) { 314 value = v + i*(stepval+bs)*bs + j*bs; 315 } else { 316 value = v + j*(stepval+bs)*bs + i*bs; 317 } 318 for (ii=0; ii<bs; ii++,value+=stepval) { 319 for (jj=0; jj<bs; jj++) { 320 *barray++ = *value++; 321 } 322 } 323 barray -=bs2; 324 } 325 326 if (in[j] >= cstart && in[j] < cend){ 327 col = in[j] - cstart; 328 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 329 } 330 else if (in[j] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 333 #endif 334 else { 335 if (mat->was_assembled) { 336 if (!baij->colmap) { 337 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 338 } 339 340 #if defined(PETSC_USE_DEBUG) 341 #if defined (PETSC_USE_CTABLE) 342 { PetscInt data; 343 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 344 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 345 } 346 #else 347 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 348 #endif 349 #endif 350 #if defined (PETSC_USE_CTABLE) 351 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 352 col = (col - 1)/bs; 353 #else 354 col = (baij->colmap[in[j]] - 1)/bs; 355 #endif 356 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 357 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 358 col = in[j]; 359 } 360 } 361 else col = in[j]; 362 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 363 } 364 } 365 } else { 366 if (!baij->donotstash) { 367 if (roworiented) { 368 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 369 } else { 370 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 371 } 372 } 373 } 374 } 375 PetscFunctionReturn(0); 376 } 377 378 #define HASH_KEY 0.6180339887 379 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 380 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 381 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 382 #undef __FUNCT__ 383 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 384 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 385 { 386 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 387 PetscTruth roworiented = baij->roworiented; 388 PetscErrorCode ierr; 389 PetscInt i,j,row,col; 390 PetscInt rstart_orig=mat->rmap.rstart; 391 PetscInt rend_orig=mat->rmap.rend,Nbs=baij->Nbs; 392 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap.bs,*HT=baij->ht,idx; 393 PetscReal tmp; 394 MatScalar **HD = baij->hd,value; 395 #if defined(PETSC_USE_DEBUG) 396 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 397 #endif 398 399 PetscFunctionBegin; 400 401 for (i=0; i<m; i++) { 402 #if defined(PETSC_USE_DEBUG) 403 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 404 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 405 #endif 406 row = im[i]; 407 if (row >= rstart_orig && row < rend_orig) { 408 for (j=0; j<n; j++) { 409 col = in[j]; 410 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 411 /* Look up PetscInto the Hash Table */ 412 key = (row/bs)*Nbs+(col/bs)+1; 413 h1 = HASH(size,key,tmp); 414 415 416 idx = h1; 417 #if defined(PETSC_USE_DEBUG) 418 insert_ct++; 419 total_ct++; 420 if (HT[idx] != key) { 421 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 422 if (idx == size) { 423 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 424 if (idx == h1) { 425 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 426 } 427 } 428 } 429 #else 430 if (HT[idx] != key) { 431 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 432 if (idx == size) { 433 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 434 if (idx == h1) { 435 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 436 } 437 } 438 } 439 #endif 440 /* A HASH table entry is found, so insert the values at the correct address */ 441 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 442 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 443 } 444 } else { 445 if (!baij->donotstash) { 446 if (roworiented) { 447 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 448 } else { 449 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 450 } 451 } 452 } 453 } 454 #if defined(PETSC_USE_DEBUG) 455 baij->ht_total_ct = total_ct; 456 baij->ht_insert_ct = insert_ct; 457 #endif 458 PetscFunctionReturn(0); 459 } 460 461 #undef __FUNCT__ 462 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 463 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 464 { 465 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 466 PetscTruth roworiented = baij->roworiented; 467 PetscErrorCode ierr; 468 PetscInt i,j,ii,jj,row,col; 469 PetscInt rstart=baij->rstartbs; 470 PetscInt rend=mat->rmap.rend,stepval,bs=mat->rmap.bs,bs2=baij->bs2,nbs2=n*bs2; 471 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 472 PetscReal tmp; 473 MatScalar **HD = baij->hd,*baij_a; 474 const PetscScalar *v_t,*value; 475 #if defined(PETSC_USE_DEBUG) 476 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 477 #endif 478 479 PetscFunctionBegin; 480 481 if (roworiented) { 482 stepval = (n-1)*bs; 483 } else { 484 stepval = (m-1)*bs; 485 } 486 for (i=0; i<m; i++) { 487 #if defined(PETSC_USE_DEBUG) 488 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 489 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 490 #endif 491 row = im[i]; 492 v_t = v + i*nbs2; 493 if (row >= rstart && row < rend) { 494 for (j=0; j<n; j++) { 495 col = in[j]; 496 497 /* Look up into the Hash Table */ 498 key = row*Nbs+col+1; 499 h1 = HASH(size,key,tmp); 500 501 idx = h1; 502 #if defined(PETSC_USE_DEBUG) 503 total_ct++; 504 insert_ct++; 505 if (HT[idx] != key) { 506 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 507 if (idx == size) { 508 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 509 if (idx == h1) { 510 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 511 } 512 } 513 } 514 #else 515 if (HT[idx] != key) { 516 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 517 if (idx == size) { 518 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 519 if (idx == h1) { 520 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 521 } 522 } 523 } 524 #endif 525 baij_a = HD[idx]; 526 if (roworiented) { 527 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 528 /* value = v + (i*(stepval+bs)+j)*bs; */ 529 value = v_t; 530 v_t += bs; 531 if (addv == ADD_VALUES) { 532 for (ii=0; ii<bs; ii++,value+=stepval) { 533 for (jj=ii; jj<bs2; jj+=bs) { 534 baij_a[jj] += *value++; 535 } 536 } 537 } else { 538 for (ii=0; ii<bs; ii++,value+=stepval) { 539 for (jj=ii; jj<bs2; jj+=bs) { 540 baij_a[jj] = *value++; 541 } 542 } 543 } 544 } else { 545 value = v + j*(stepval+bs)*bs + i*bs; 546 if (addv == ADD_VALUES) { 547 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 548 for (jj=0; jj<bs; jj++) { 549 baij_a[jj] += *value++; 550 } 551 } 552 } else { 553 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 554 for (jj=0; jj<bs; jj++) { 555 baij_a[jj] = *value++; 556 } 557 } 558 } 559 } 560 } 561 } else { 562 if (!baij->donotstash) { 563 if (roworiented) { 564 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 565 } else { 566 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 567 } 568 } 569 } 570 } 571 #if defined(PETSC_USE_DEBUG) 572 baij->ht_total_ct = total_ct; 573 baij->ht_insert_ct = insert_ct; 574 #endif 575 PetscFunctionReturn(0); 576 } 577 578 #undef __FUNCT__ 579 #define __FUNCT__ "MatGetValues_MPIBAIJ" 580 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 581 { 582 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 583 PetscErrorCode ierr; 584 PetscInt bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend; 585 PetscInt bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data; 586 587 PetscFunctionBegin; 588 for (i=0; i<m; i++) { 589 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 590 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 591 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 592 row = idxm[i] - bsrstart; 593 for (j=0; j<n; j++) { 594 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 595 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 596 if (idxn[j] >= bscstart && idxn[j] < bscend){ 597 col = idxn[j] - bscstart; 598 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 599 } else { 600 if (!baij->colmap) { 601 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 602 } 603 #if defined (PETSC_USE_CTABLE) 604 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 605 data --; 606 #else 607 data = baij->colmap[idxn[j]/bs]-1; 608 #endif 609 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 610 else { 611 col = data + idxn[j]%bs; 612 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 613 } 614 } 615 } 616 } else { 617 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 618 } 619 } 620 PetscFunctionReturn(0); 621 } 622 623 #undef __FUNCT__ 624 #define __FUNCT__ "MatNorm_MPIBAIJ" 625 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 626 { 627 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 628 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 629 PetscErrorCode ierr; 630 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap.bs,nz,row,col; 631 PetscReal sum = 0.0; 632 MatScalar *v; 633 634 PetscFunctionBegin; 635 if (baij->size == 1) { 636 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 637 } else { 638 if (type == NORM_FROBENIUS) { 639 v = amat->a; 640 nz = amat->nz*bs2; 641 for (i=0; i<nz; i++) { 642 #if defined(PETSC_USE_COMPLEX) 643 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 644 #else 645 sum += (*v)*(*v); v++; 646 #endif 647 } 648 v = bmat->a; 649 nz = bmat->nz*bs2; 650 for (i=0; i<nz; i++) { 651 #if defined(PETSC_USE_COMPLEX) 652 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 653 #else 654 sum += (*v)*(*v); v++; 655 #endif 656 } 657 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 658 *nrm = sqrt(*nrm); 659 } else if (type == NORM_1) { /* max column sum */ 660 PetscReal *tmp,*tmp2; 661 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 662 ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 663 tmp2 = tmp + mat->cmap.N; 664 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 665 v = amat->a; jj = amat->j; 666 for (i=0; i<amat->nz; i++) { 667 for (j=0; j<bs; j++){ 668 col = bs*(cstart + *jj) + j; /* column index */ 669 for (row=0; row<bs; row++){ 670 tmp[col] += PetscAbsScalar(*v); v++; 671 } 672 } 673 jj++; 674 } 675 v = bmat->a; jj = bmat->j; 676 for (i=0; i<bmat->nz; i++) { 677 for (j=0; j<bs; j++){ 678 col = bs*garray[*jj] + j; 679 for (row=0; row<bs; row++){ 680 tmp[col] += PetscAbsScalar(*v); v++; 681 } 682 } 683 jj++; 684 } 685 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 686 *nrm = 0.0; 687 for (j=0; j<mat->cmap.N; j++) { 688 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 689 } 690 ierr = PetscFree(tmp);CHKERRQ(ierr); 691 } else if (type == NORM_INFINITY) { /* max row sum */ 692 PetscReal *sums; 693 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr) 694 sum = 0.0; 695 for (j=0; j<amat->mbs; j++) { 696 for (row=0; row<bs; row++) sums[row] = 0.0; 697 v = amat->a + bs2*amat->i[j]; 698 nz = amat->i[j+1]-amat->i[j]; 699 for (i=0; i<nz; i++) { 700 for (col=0; col<bs; col++){ 701 for (row=0; row<bs; row++){ 702 sums[row] += PetscAbsScalar(*v); v++; 703 } 704 } 705 } 706 v = bmat->a + bs2*bmat->i[j]; 707 nz = bmat->i[j+1]-bmat->i[j]; 708 for (i=0; i<nz; i++) { 709 for (col=0; col<bs; col++){ 710 for (row=0; row<bs; row++){ 711 sums[row] += PetscAbsScalar(*v); v++; 712 } 713 } 714 } 715 for (row=0; row<bs; row++){ 716 if (sums[row] > sum) sum = sums[row]; 717 } 718 } 719 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 720 ierr = PetscFree(sums);CHKERRQ(ierr); 721 } else { 722 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 723 } 724 } 725 PetscFunctionReturn(0); 726 } 727 728 /* 729 Creates the hash table, and sets the table 730 This table is created only once. 731 If new entried need to be added to the matrix 732 then the hash table has to be destroyed and 733 recreated. 734 */ 735 #undef __FUNCT__ 736 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 737 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 738 { 739 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 740 Mat A = baij->A,B=baij->B; 741 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 742 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 743 PetscErrorCode ierr; 744 PetscInt size,bs2=baij->bs2,rstart=baij->rstartbs; 745 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 746 PetscInt *HT,key; 747 MatScalar **HD; 748 PetscReal tmp; 749 #if defined(PETSC_USE_INFO) 750 PetscInt ct=0,max=0; 751 #endif 752 753 PetscFunctionBegin; 754 baij->ht_size=(PetscInt)(factor*nz); 755 size = baij->ht_size; 756 757 if (baij->ht) { 758 PetscFunctionReturn(0); 759 } 760 761 /* Allocate Memory for Hash Table */ 762 ierr = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 763 baij->ht = (PetscInt*)(baij->hd + size); 764 HD = baij->hd; 765 HT = baij->ht; 766 767 768 ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr); 769 770 771 /* Loop Over A */ 772 for (i=0; i<a->mbs; i++) { 773 for (j=ai[i]; j<ai[i+1]; j++) { 774 row = i+rstart; 775 col = aj[j]+cstart; 776 777 key = row*Nbs + col + 1; 778 h1 = HASH(size,key,tmp); 779 for (k=0; k<size; k++){ 780 if (!HT[(h1+k)%size]) { 781 HT[(h1+k)%size] = key; 782 HD[(h1+k)%size] = a->a + j*bs2; 783 break; 784 #if defined(PETSC_USE_INFO) 785 } else { 786 ct++; 787 #endif 788 } 789 } 790 #if defined(PETSC_USE_INFO) 791 if (k> max) max = k; 792 #endif 793 } 794 } 795 /* Loop Over B */ 796 for (i=0; i<b->mbs; i++) { 797 for (j=bi[i]; j<bi[i+1]; j++) { 798 row = i+rstart; 799 col = garray[bj[j]]; 800 key = row*Nbs + col + 1; 801 h1 = HASH(size,key,tmp); 802 for (k=0; k<size; k++){ 803 if (!HT[(h1+k)%size]) { 804 HT[(h1+k)%size] = key; 805 HD[(h1+k)%size] = b->a + j*bs2; 806 break; 807 #if defined(PETSC_USE_INFO) 808 } else { 809 ct++; 810 #endif 811 } 812 } 813 #if defined(PETSC_USE_INFO) 814 if (k> max) max = k; 815 #endif 816 } 817 } 818 819 /* Print Summary */ 820 #if defined(PETSC_USE_INFO) 821 for (i=0,j=0; i<size; i++) { 822 if (HT[i]) {j++;} 823 } 824 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 825 #endif 826 PetscFunctionReturn(0); 827 } 828 829 #undef __FUNCT__ 830 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 831 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 832 { 833 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 834 PetscErrorCode ierr; 835 PetscInt nstash,reallocs; 836 InsertMode addv; 837 838 PetscFunctionBegin; 839 if (baij->donotstash) { 840 PetscFunctionReturn(0); 841 } 842 843 /* make sure all processors are either in INSERTMODE or ADDMODE */ 844 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 845 if (addv == (ADD_VALUES|INSERT_VALUES)) { 846 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 847 } 848 mat->insertmode = addv; /* in case this processor had no cache */ 849 850 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr); 851 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 852 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 853 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 854 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 855 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 856 PetscFunctionReturn(0); 857 } 858 859 #undef __FUNCT__ 860 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 861 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 862 { 863 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 864 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 865 PetscErrorCode ierr; 866 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 867 PetscInt *row,*col; 868 PetscTruth r1,r2,r3,other_disassembled; 869 MatScalar *val; 870 InsertMode addv = mat->insertmode; 871 PetscMPIInt n; 872 873 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 874 PetscFunctionBegin; 875 if (!baij->donotstash) { 876 while (1) { 877 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 878 if (!flg) break; 879 880 for (i=0; i<n;) { 881 /* Now identify the consecutive vals belonging to the same row */ 882 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 883 if (j < n) ncols = j-i; 884 else ncols = n-i; 885 /* Now assemble all these values with a single function call */ 886 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 887 i = j; 888 } 889 } 890 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 891 /* Now process the block-stash. Since the values are stashed column-oriented, 892 set the roworiented flag to column oriented, and after MatSetValues() 893 restore the original flags */ 894 r1 = baij->roworiented; 895 r2 = a->roworiented; 896 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 897 baij->roworiented = PETSC_FALSE; 898 a->roworiented = PETSC_FALSE; 899 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 900 while (1) { 901 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 902 if (!flg) break; 903 904 for (i=0; i<n;) { 905 /* Now identify the consecutive vals belonging to the same row */ 906 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 907 if (j < n) ncols = j-i; 908 else ncols = n-i; 909 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 910 i = j; 911 } 912 } 913 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 914 baij->roworiented = r1; 915 a->roworiented = r2; 916 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 917 } 918 919 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 920 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 921 922 /* determine if any processor has disassembled, if so we must 923 also disassemble ourselfs, in order that we may reassemble. */ 924 /* 925 if nonzero structure of submatrix B cannot change then we know that 926 no processor disassembled thus we can skip this stuff 927 */ 928 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 929 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 930 if (mat->was_assembled && !other_disassembled) { 931 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 932 } 933 } 934 935 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 936 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 937 } 938 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 939 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 940 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 941 942 #if defined(PETSC_USE_INFO) 943 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 944 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 945 baij->ht_total_ct = 0; 946 baij->ht_insert_ct = 0; 947 } 948 #endif 949 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 950 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 951 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 952 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 953 } 954 955 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 956 baij->rowvalues = 0; 957 PetscFunctionReturn(0); 958 } 959 960 #undef __FUNCT__ 961 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 962 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 963 { 964 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 965 PetscErrorCode ierr; 966 PetscMPIInt size = baij->size,rank = baij->rank; 967 PetscInt bs = mat->rmap.bs; 968 PetscTruth iascii,isdraw; 969 PetscViewer sviewer; 970 PetscViewerFormat format; 971 972 PetscFunctionBegin; 973 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 974 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 975 if (iascii) { 976 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 977 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 978 MatInfo info; 979 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 980 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 981 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 982 rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 983 mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr); 984 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 985 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 986 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 987 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 988 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 989 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 990 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 991 PetscFunctionReturn(0); 992 } else if (format == PETSC_VIEWER_ASCII_INFO) { 993 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 994 PetscFunctionReturn(0); 995 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 996 PetscFunctionReturn(0); 997 } 998 } 999 1000 if (isdraw) { 1001 PetscDraw draw; 1002 PetscTruth isnull; 1003 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1004 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1005 } 1006 1007 if (size == 1) { 1008 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1009 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1010 } else { 1011 /* assemble the entire matrix onto first processor. */ 1012 Mat A; 1013 Mat_SeqBAIJ *Aloc; 1014 PetscInt M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1015 MatScalar *a; 1016 1017 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1018 /* Perhaps this should be the type of mat? */ 1019 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1020 if (!rank) { 1021 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1022 } else { 1023 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1024 } 1025 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1026 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1027 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1028 1029 /* copy over the A part */ 1030 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1031 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1032 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1033 1034 for (i=0; i<mbs; i++) { 1035 rvals[0] = bs*(baij->rstartbs + i); 1036 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1037 for (j=ai[i]; j<ai[i+1]; j++) { 1038 col = (baij->cstartbs+aj[j])*bs; 1039 for (k=0; k<bs; k++) { 1040 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1041 col++; a += bs; 1042 } 1043 } 1044 } 1045 /* copy over the B part */ 1046 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1047 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1048 for (i=0; i<mbs; i++) { 1049 rvals[0] = bs*(baij->rstartbs + i); 1050 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1051 for (j=ai[i]; j<ai[i+1]; j++) { 1052 col = baij->garray[aj[j]]*bs; 1053 for (k=0; k<bs; k++) { 1054 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1055 col++; a += bs; 1056 } 1057 } 1058 } 1059 ierr = PetscFree(rvals);CHKERRQ(ierr); 1060 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1061 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1062 /* 1063 Everyone has to call to draw the matrix since the graphics waits are 1064 synchronized across all processors that share the PetscDraw object 1065 */ 1066 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1067 if (!rank) { 1068 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1069 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1070 } 1071 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1072 ierr = MatDestroy(A);CHKERRQ(ierr); 1073 } 1074 PetscFunctionReturn(0); 1075 } 1076 1077 #undef __FUNCT__ 1078 #define __FUNCT__ "MatView_MPIBAIJ" 1079 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1080 { 1081 PetscErrorCode ierr; 1082 PetscTruth iascii,isdraw,issocket,isbinary; 1083 1084 PetscFunctionBegin; 1085 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1086 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1087 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1088 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1089 if (iascii || isdraw || issocket || isbinary) { 1090 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1091 } else { 1092 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1093 } 1094 PetscFunctionReturn(0); 1095 } 1096 1097 #undef __FUNCT__ 1098 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1099 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1100 { 1101 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1102 PetscErrorCode ierr; 1103 1104 PetscFunctionBegin; 1105 #if defined(PETSC_USE_LOG) 1106 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N); 1107 #endif 1108 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1109 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1110 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1111 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1112 #if defined (PETSC_USE_CTABLE) 1113 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 1114 #else 1115 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1116 #endif 1117 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1118 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1119 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1120 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1121 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1122 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1123 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1124 ierr = PetscFree(baij);CHKERRQ(ierr); 1125 1126 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1127 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1128 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1129 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1130 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1131 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1132 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1133 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1134 PetscFunctionReturn(0); 1135 } 1136 1137 #undef __FUNCT__ 1138 #define __FUNCT__ "MatMult_MPIBAIJ" 1139 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1140 { 1141 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1142 PetscErrorCode ierr; 1143 PetscInt nt; 1144 1145 PetscFunctionBegin; 1146 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1147 if (nt != A->cmap.n) { 1148 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1149 } 1150 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1151 if (nt != A->rmap.n) { 1152 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1153 } 1154 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1155 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1156 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1157 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1158 PetscFunctionReturn(0); 1159 } 1160 1161 #undef __FUNCT__ 1162 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1163 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1164 { 1165 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1166 PetscErrorCode ierr; 1167 1168 PetscFunctionBegin; 1169 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1170 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1171 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1172 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1178 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1179 { 1180 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1181 PetscErrorCode ierr; 1182 PetscTruth merged; 1183 1184 PetscFunctionBegin; 1185 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1186 /* do nondiagonal part */ 1187 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1188 if (!merged) { 1189 /* send it on its way */ 1190 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1191 /* do local part */ 1192 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1193 /* receive remote parts: note this assumes the values are not actually */ 1194 /* inserted in yy until the next line */ 1195 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1196 } else { 1197 /* do local part */ 1198 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1199 /* send it on its way */ 1200 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1201 /* values actually were received in the Begin() but we need to call this nop */ 1202 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1203 } 1204 PetscFunctionReturn(0); 1205 } 1206 1207 #undef __FUNCT__ 1208 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1209 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1210 { 1211 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1212 PetscErrorCode ierr; 1213 1214 PetscFunctionBegin; 1215 /* do nondiagonal part */ 1216 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1217 /* send it on its way */ 1218 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1219 /* do local part */ 1220 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1221 /* receive remote parts: note this assumes the values are not actually */ 1222 /* inserted in yy until the next line, which is true for my implementation*/ 1223 /* but is not perhaps always true. */ 1224 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 /* 1229 This only works correctly for square matrices where the subblock A->A is the 1230 diagonal block 1231 */ 1232 #undef __FUNCT__ 1233 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1234 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1235 { 1236 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1237 PetscErrorCode ierr; 1238 1239 PetscFunctionBegin; 1240 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1241 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1242 PetscFunctionReturn(0); 1243 } 1244 1245 #undef __FUNCT__ 1246 #define __FUNCT__ "MatScale_MPIBAIJ" 1247 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1248 { 1249 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1250 PetscErrorCode ierr; 1251 1252 PetscFunctionBegin; 1253 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1254 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1260 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1261 { 1262 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1263 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1264 PetscErrorCode ierr; 1265 PetscInt bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1266 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend; 1267 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1268 1269 PetscFunctionBegin; 1270 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1271 mat->getrowactive = PETSC_TRUE; 1272 1273 if (!mat->rowvalues && (idx || v)) { 1274 /* 1275 allocate enough space to hold information from the longest row. 1276 */ 1277 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1278 PetscInt max = 1,mbs = mat->mbs,tmp; 1279 for (i=0; i<mbs; i++) { 1280 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1281 if (max < tmp) { max = tmp; } 1282 } 1283 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1284 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1285 } 1286 1287 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1288 lrow = row - brstart; 1289 1290 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1291 if (!v) {pvA = 0; pvB = 0;} 1292 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1293 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1294 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1295 nztot = nzA + nzB; 1296 1297 cmap = mat->garray; 1298 if (v || idx) { 1299 if (nztot) { 1300 /* Sort by increasing column numbers, assuming A and B already sorted */ 1301 PetscInt imark = -1; 1302 if (v) { 1303 *v = v_p = mat->rowvalues; 1304 for (i=0; i<nzB; i++) { 1305 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1306 else break; 1307 } 1308 imark = i; 1309 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1310 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1311 } 1312 if (idx) { 1313 *idx = idx_p = mat->rowindices; 1314 if (imark > -1) { 1315 for (i=0; i<imark; i++) { 1316 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1317 } 1318 } else { 1319 for (i=0; i<nzB; i++) { 1320 if (cmap[cworkB[i]/bs] < cstart) 1321 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1322 else break; 1323 } 1324 imark = i; 1325 } 1326 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1327 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1328 } 1329 } else { 1330 if (idx) *idx = 0; 1331 if (v) *v = 0; 1332 } 1333 } 1334 *nz = nztot; 1335 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1336 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1337 PetscFunctionReturn(0); 1338 } 1339 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1342 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1343 { 1344 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1345 1346 PetscFunctionBegin; 1347 if (!baij->getrowactive) { 1348 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1349 } 1350 baij->getrowactive = PETSC_FALSE; 1351 PetscFunctionReturn(0); 1352 } 1353 1354 #undef __FUNCT__ 1355 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1356 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1357 { 1358 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1359 PetscErrorCode ierr; 1360 1361 PetscFunctionBegin; 1362 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1363 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1364 PetscFunctionReturn(0); 1365 } 1366 1367 #undef __FUNCT__ 1368 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1369 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1370 { 1371 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1372 Mat A = a->A,B = a->B; 1373 PetscErrorCode ierr; 1374 PetscReal isend[5],irecv[5]; 1375 1376 PetscFunctionBegin; 1377 info->block_size = (PetscReal)matin->rmap.bs; 1378 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1379 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1380 isend[3] = info->memory; isend[4] = info->mallocs; 1381 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1382 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1383 isend[3] += info->memory; isend[4] += info->mallocs; 1384 if (flag == MAT_LOCAL) { 1385 info->nz_used = isend[0]; 1386 info->nz_allocated = isend[1]; 1387 info->nz_unneeded = isend[2]; 1388 info->memory = isend[3]; 1389 info->mallocs = isend[4]; 1390 } else if (flag == MAT_GLOBAL_MAX) { 1391 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1392 info->nz_used = irecv[0]; 1393 info->nz_allocated = irecv[1]; 1394 info->nz_unneeded = irecv[2]; 1395 info->memory = irecv[3]; 1396 info->mallocs = irecv[4]; 1397 } else if (flag == MAT_GLOBAL_SUM) { 1398 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1399 info->nz_used = irecv[0]; 1400 info->nz_allocated = irecv[1]; 1401 info->nz_unneeded = irecv[2]; 1402 info->memory = irecv[3]; 1403 info->mallocs = irecv[4]; 1404 } else { 1405 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1406 } 1407 info->rows_global = (PetscReal)A->rmap.N; 1408 info->columns_global = (PetscReal)A->cmap.N; 1409 info->rows_local = (PetscReal)A->rmap.N; 1410 info->columns_local = (PetscReal)A->cmap.N; 1411 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1412 info->fill_ratio_needed = 0; 1413 info->factor_mallocs = 0; 1414 PetscFunctionReturn(0); 1415 } 1416 1417 #undef __FUNCT__ 1418 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1419 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg) 1420 { 1421 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1422 PetscErrorCode ierr; 1423 1424 PetscFunctionBegin; 1425 switch (op) { 1426 case MAT_NEW_NONZERO_LOCATIONS: 1427 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1428 case MAT_KEEP_ZEROED_ROWS: 1429 case MAT_NEW_NONZERO_LOCATION_ERR: 1430 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1431 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1432 break; 1433 case MAT_ROW_ORIENTED: 1434 a->roworiented = flg; 1435 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1436 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1437 break; 1438 case MAT_NEW_DIAGONALS: 1439 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1440 break; 1441 case MAT_IGNORE_OFF_PROC_ENTRIES: 1442 a->donotstash = flg; 1443 break; 1444 case MAT_USE_HASH_TABLE: 1445 a->ht_flag = flg; 1446 break; 1447 case MAT_SYMMETRIC: 1448 case MAT_STRUCTURALLY_SYMMETRIC: 1449 case MAT_HERMITIAN: 1450 case MAT_SYMMETRY_ETERNAL: 1451 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1452 break; 1453 default: 1454 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1455 } 1456 PetscFunctionReturn(0); 1457 } 1458 1459 #undef __FUNCT__ 1460 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1461 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1462 { 1463 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1464 Mat_SeqBAIJ *Aloc; 1465 Mat B; 1466 PetscErrorCode ierr; 1467 PetscInt M=A->rmap.N,N=A->cmap.N,*ai,*aj,i,*rvals,j,k,col; 1468 PetscInt bs=A->rmap.bs,mbs=baij->mbs; 1469 MatScalar *a; 1470 1471 PetscFunctionBegin; 1472 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1473 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1474 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1475 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1476 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1477 ierr = MatMPIBAIJSetPreallocation(B,A->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1478 } else { 1479 B = *matout; 1480 } 1481 1482 /* copy over the A part */ 1483 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1484 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1485 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1486 1487 for (i=0; i<mbs; i++) { 1488 rvals[0] = bs*(baij->rstartbs + i); 1489 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1490 for (j=ai[i]; j<ai[i+1]; j++) { 1491 col = (baij->cstartbs+aj[j])*bs; 1492 for (k=0; k<bs; k++) { 1493 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1494 col++; a += bs; 1495 } 1496 } 1497 } 1498 /* copy over the B part */ 1499 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1500 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1501 for (i=0; i<mbs; i++) { 1502 rvals[0] = bs*(baij->rstartbs + i); 1503 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1504 for (j=ai[i]; j<ai[i+1]; j++) { 1505 col = baij->garray[aj[j]]*bs; 1506 for (k=0; k<bs; k++) { 1507 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1508 col++; a += bs; 1509 } 1510 } 1511 } 1512 ierr = PetscFree(rvals);CHKERRQ(ierr); 1513 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1514 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1515 1516 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1517 *matout = B; 1518 } else { 1519 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1520 } 1521 PetscFunctionReturn(0); 1522 } 1523 1524 #undef __FUNCT__ 1525 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1526 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1527 { 1528 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1529 Mat a = baij->A,b = baij->B; 1530 PetscErrorCode ierr; 1531 PetscInt s1,s2,s3; 1532 1533 PetscFunctionBegin; 1534 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1535 if (rr) { 1536 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1537 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1538 /* Overlap communication with computation. */ 1539 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1540 } 1541 if (ll) { 1542 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1543 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1544 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1545 } 1546 /* scale the diagonal block */ 1547 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1548 1549 if (rr) { 1550 /* Do a scatter end and then right scale the off-diagonal block */ 1551 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1552 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1553 } 1554 1555 PetscFunctionReturn(0); 1556 } 1557 1558 #undef __FUNCT__ 1559 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1560 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 1561 { 1562 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1563 PetscErrorCode ierr; 1564 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1565 PetscInt i,*owners = A->rmap.range; 1566 PetscInt *nprocs,j,idx,nsends,row; 1567 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1568 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1569 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap.rstart; 1570 MPI_Comm comm = ((PetscObject)A)->comm; 1571 MPI_Request *send_waits,*recv_waits; 1572 MPI_Status recv_status,*send_status; 1573 #if defined(PETSC_DEBUG) 1574 PetscTruth found = PETSC_FALSE; 1575 #endif 1576 1577 PetscFunctionBegin; 1578 /* first count number of contributors to each processor */ 1579 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1580 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1581 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1582 j = 0; 1583 for (i=0; i<N; i++) { 1584 if (lastidx > (idx = rows[i])) j = 0; 1585 lastidx = idx; 1586 for (; j<size; j++) { 1587 if (idx >= owners[j] && idx < owners[j+1]) { 1588 nprocs[2*j]++; 1589 nprocs[2*j+1] = 1; 1590 owner[i] = j; 1591 #if defined(PETSC_DEBUG) 1592 found = PETSC_TRUE; 1593 #endif 1594 break; 1595 } 1596 } 1597 #if defined(PETSC_DEBUG) 1598 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1599 found = PETSC_FALSE; 1600 #endif 1601 } 1602 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1603 1604 /* inform other processors of number of messages and max length*/ 1605 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1606 1607 /* post receives: */ 1608 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1609 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1610 for (i=0; i<nrecvs; i++) { 1611 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1612 } 1613 1614 /* do sends: 1615 1) starts[i] gives the starting index in svalues for stuff going to 1616 the ith processor 1617 */ 1618 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1619 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1620 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1621 starts[0] = 0; 1622 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1623 for (i=0; i<N; i++) { 1624 svalues[starts[owner[i]]++] = rows[i]; 1625 } 1626 1627 starts[0] = 0; 1628 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1629 count = 0; 1630 for (i=0; i<size; i++) { 1631 if (nprocs[2*i+1]) { 1632 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1633 } 1634 } 1635 ierr = PetscFree(starts);CHKERRQ(ierr); 1636 1637 base = owners[rank]; 1638 1639 /* wait on receives */ 1640 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 1641 source = lens + nrecvs; 1642 count = nrecvs; slen = 0; 1643 while (count) { 1644 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1645 /* unpack receives into our local space */ 1646 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1647 source[imdex] = recv_status.MPI_SOURCE; 1648 lens[imdex] = n; 1649 slen += n; 1650 count--; 1651 } 1652 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1653 1654 /* move the data into the send scatter */ 1655 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1656 count = 0; 1657 for (i=0; i<nrecvs; i++) { 1658 values = rvalues + i*nmax; 1659 for (j=0; j<lens[i]; j++) { 1660 lrows[count++] = values[j] - base; 1661 } 1662 } 1663 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1664 ierr = PetscFree(lens);CHKERRQ(ierr); 1665 ierr = PetscFree(owner);CHKERRQ(ierr); 1666 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1667 1668 /* actually zap the local rows */ 1669 /* 1670 Zero the required rows. If the "diagonal block" of the matrix 1671 is square and the user wishes to set the diagonal we use separate 1672 code so that MatSetValues() is not called for each diagonal allocating 1673 new memory, thus calling lots of mallocs and slowing things down. 1674 1675 Contributed by: Matthew Knepley 1676 */ 1677 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1678 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1679 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 1680 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1681 } else if (diag != 0.0) { 1682 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1683 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1684 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1685 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1686 } 1687 for (i=0; i<slen; i++) { 1688 row = lrows[i] + rstart_bs; 1689 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1690 } 1691 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1692 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1693 } else { 1694 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1695 } 1696 1697 ierr = PetscFree(lrows);CHKERRQ(ierr); 1698 1699 /* wait on sends */ 1700 if (nsends) { 1701 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1702 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1703 ierr = PetscFree(send_status);CHKERRQ(ierr); 1704 } 1705 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1706 ierr = PetscFree(svalues);CHKERRQ(ierr); 1707 1708 PetscFunctionReturn(0); 1709 } 1710 1711 #undef __FUNCT__ 1712 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1713 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1714 { 1715 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1716 PetscErrorCode ierr; 1717 1718 PetscFunctionBegin; 1719 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1720 PetscFunctionReturn(0); 1721 } 1722 1723 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1724 1725 #undef __FUNCT__ 1726 #define __FUNCT__ "MatEqual_MPIBAIJ" 1727 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1728 { 1729 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1730 Mat a,b,c,d; 1731 PetscTruth flg; 1732 PetscErrorCode ierr; 1733 1734 PetscFunctionBegin; 1735 a = matA->A; b = matA->B; 1736 c = matB->A; d = matB->B; 1737 1738 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1739 if (flg) { 1740 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1741 } 1742 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1743 PetscFunctionReturn(0); 1744 } 1745 1746 #undef __FUNCT__ 1747 #define __FUNCT__ "MatCopy_MPIBAIJ" 1748 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1749 { 1750 PetscErrorCode ierr; 1751 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1752 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1753 1754 PetscFunctionBegin; 1755 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1756 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1757 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1758 } else { 1759 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1760 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1761 } 1762 PetscFunctionReturn(0); 1763 } 1764 1765 #undef __FUNCT__ 1766 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1767 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1768 { 1769 PetscErrorCode ierr; 1770 1771 PetscFunctionBegin; 1772 ierr = MatMPIBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1773 PetscFunctionReturn(0); 1774 } 1775 1776 #include "petscblaslapack.h" 1777 #undef __FUNCT__ 1778 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1779 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1780 { 1781 PetscErrorCode ierr; 1782 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1783 PetscBLASInt bnz,one=1; 1784 Mat_SeqBAIJ *x,*y; 1785 1786 PetscFunctionBegin; 1787 if (str == SAME_NONZERO_PATTERN) { 1788 PetscScalar alpha = a; 1789 x = (Mat_SeqBAIJ *)xx->A->data; 1790 y = (Mat_SeqBAIJ *)yy->A->data; 1791 bnz = PetscBLASIntCast(x->nz); 1792 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1793 x = (Mat_SeqBAIJ *)xx->B->data; 1794 y = (Mat_SeqBAIJ *)yy->B->data; 1795 bnz = PetscBLASIntCast(x->nz); 1796 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1797 } else { 1798 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1799 } 1800 PetscFunctionReturn(0); 1801 } 1802 1803 #undef __FUNCT__ 1804 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1805 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1806 { 1807 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1808 PetscErrorCode ierr; 1809 1810 PetscFunctionBegin; 1811 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1812 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1813 PetscFunctionReturn(0); 1814 } 1815 1816 #undef __FUNCT__ 1817 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1818 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1819 { 1820 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1821 PetscErrorCode ierr; 1822 1823 PetscFunctionBegin; 1824 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1825 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1826 PetscFunctionReturn(0); 1827 } 1828 1829 /* -------------------------------------------------------------------*/ 1830 static struct _MatOps MatOps_Values = { 1831 MatSetValues_MPIBAIJ, 1832 MatGetRow_MPIBAIJ, 1833 MatRestoreRow_MPIBAIJ, 1834 MatMult_MPIBAIJ, 1835 /* 4*/ MatMultAdd_MPIBAIJ, 1836 MatMultTranspose_MPIBAIJ, 1837 MatMultTransposeAdd_MPIBAIJ, 1838 0, 1839 0, 1840 0, 1841 /*10*/ 0, 1842 0, 1843 0, 1844 0, 1845 MatTranspose_MPIBAIJ, 1846 /*15*/ MatGetInfo_MPIBAIJ, 1847 MatEqual_MPIBAIJ, 1848 MatGetDiagonal_MPIBAIJ, 1849 MatDiagonalScale_MPIBAIJ, 1850 MatNorm_MPIBAIJ, 1851 /*20*/ MatAssemblyBegin_MPIBAIJ, 1852 MatAssemblyEnd_MPIBAIJ, 1853 0, 1854 MatSetOption_MPIBAIJ, 1855 MatZeroEntries_MPIBAIJ, 1856 /*25*/ MatZeroRows_MPIBAIJ, 1857 0, 1858 0, 1859 0, 1860 0, 1861 /*30*/ MatSetUpPreallocation_MPIBAIJ, 1862 0, 1863 0, 1864 0, 1865 0, 1866 /*35*/ MatDuplicate_MPIBAIJ, 1867 0, 1868 0, 1869 0, 1870 0, 1871 /*40*/ MatAXPY_MPIBAIJ, 1872 MatGetSubMatrices_MPIBAIJ, 1873 MatIncreaseOverlap_MPIBAIJ, 1874 MatGetValues_MPIBAIJ, 1875 MatCopy_MPIBAIJ, 1876 /*45*/ 0, 1877 MatScale_MPIBAIJ, 1878 0, 1879 0, 1880 0, 1881 /*50*/ 0, 1882 0, 1883 0, 1884 0, 1885 0, 1886 /*55*/ 0, 1887 0, 1888 MatSetUnfactored_MPIBAIJ, 1889 0, 1890 MatSetValuesBlocked_MPIBAIJ, 1891 /*60*/ 0, 1892 MatDestroy_MPIBAIJ, 1893 MatView_MPIBAIJ, 1894 0, 1895 0, 1896 /*65*/ 0, 1897 0, 1898 0, 1899 0, 1900 0, 1901 /*70*/ MatGetRowMaxAbs_MPIBAIJ, 1902 0, 1903 0, 1904 0, 1905 0, 1906 /*75*/ 0, 1907 0, 1908 0, 1909 0, 1910 0, 1911 /*80*/ 0, 1912 0, 1913 0, 1914 0, 1915 MatLoad_MPIBAIJ, 1916 /*85*/ 0, 1917 0, 1918 0, 1919 0, 1920 0, 1921 /*90*/ 0, 1922 0, 1923 0, 1924 0, 1925 0, 1926 /*95*/ 0, 1927 0, 1928 0, 1929 0, 1930 0, 1931 /*100*/0, 1932 0, 1933 0, 1934 0, 1935 0, 1936 /*105*/0, 1937 MatRealPart_MPIBAIJ, 1938 MatImaginaryPart_MPIBAIJ}; 1939 1940 1941 EXTERN_C_BEGIN 1942 #undef __FUNCT__ 1943 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 1944 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1945 { 1946 PetscFunctionBegin; 1947 *a = ((Mat_MPIBAIJ *)A->data)->A; 1948 *iscopy = PETSC_FALSE; 1949 PetscFunctionReturn(0); 1950 } 1951 EXTERN_C_END 1952 1953 EXTERN_C_BEGIN 1954 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 1955 EXTERN_C_END 1956 1957 #undef __FUNCT__ 1958 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 1959 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 1960 { 1961 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)B->data; 1962 PetscInt m = B->rmap.n/bs,cstart = baij->cstartbs, cend = baij->cendbs,j,nnz,i,d; 1963 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = baij->rstartbs,ii; 1964 const PetscInt *JJ; 1965 PetscScalar *values; 1966 PetscErrorCode ierr; 1967 1968 PetscFunctionBegin; 1969 #if defined(PETSC_OPT_g) 1970 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"Ii[0] must be 0 it is %D",Ii[0]); 1971 #endif 1972 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1973 o_nnz = d_nnz + m; 1974 1975 for (i=0; i<m; i++) { 1976 nnz = Ii[i+1]- Ii[i]; 1977 JJ = J + Ii[i]; 1978 nnz_max = PetscMax(nnz_max,nnz); 1979 #if defined(PETSC_OPT_g) 1980 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 1981 #endif 1982 for (j=0; j<nnz; j++) { 1983 if (*JJ >= cstart) break; 1984 JJ++; 1985 } 1986 d = 0; 1987 for (; j<nnz; j++) { 1988 if (*JJ++ >= cend) break; 1989 d++; 1990 } 1991 d_nnz[i] = d; 1992 o_nnz[i] = nnz - d; 1993 } 1994 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1995 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1996 1997 if (v) values = (PetscScalar*)v; 1998 else { 1999 ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2000 ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2001 } 2002 2003 for (i=0; i<m; i++) { 2004 ii = i + rstart; 2005 nnz = Ii[i+1]- Ii[i]; 2006 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2007 } 2008 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2009 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2010 2011 if (!v) { 2012 ierr = PetscFree(values);CHKERRQ(ierr); 2013 } 2014 PetscFunctionReturn(0); 2015 } 2016 2017 #undef __FUNCT__ 2018 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2019 /*@C 2020 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2021 (the default parallel PETSc format). 2022 2023 Collective on MPI_Comm 2024 2025 Input Parameters: 2026 + A - the matrix 2027 . i - the indices into j for the start of each local row (starts with zero) 2028 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2029 - v - optional values in the matrix 2030 2031 Level: developer 2032 2033 .keywords: matrix, aij, compressed row, sparse, parallel 2034 2035 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2036 @*/ 2037 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2038 { 2039 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2040 2041 PetscFunctionBegin; 2042 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2043 if (f) { 2044 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2045 } 2046 PetscFunctionReturn(0); 2047 } 2048 2049 EXTERN_C_BEGIN 2050 #undef __FUNCT__ 2051 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2052 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2053 { 2054 Mat_MPIBAIJ *b; 2055 PetscErrorCode ierr; 2056 PetscInt i; 2057 2058 PetscFunctionBegin; 2059 B->preallocated = PETSC_TRUE; 2060 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2061 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2062 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2063 2064 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2065 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2066 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2067 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2068 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2069 2070 B->rmap.bs = bs; 2071 B->cmap.bs = bs; 2072 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2073 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2074 2075 if (d_nnz) { 2076 for (i=0; i<B->rmap.n/bs; i++) { 2077 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2078 } 2079 } 2080 if (o_nnz) { 2081 for (i=0; i<B->rmap.n/bs; i++) { 2082 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2083 } 2084 } 2085 2086 b = (Mat_MPIBAIJ*)B->data; 2087 b->bs2 = bs*bs; 2088 b->mbs = B->rmap.n/bs; 2089 b->nbs = B->cmap.n/bs; 2090 b->Mbs = B->rmap.N/bs; 2091 b->Nbs = B->cmap.N/bs; 2092 2093 for (i=0; i<=b->size; i++) { 2094 b->rangebs[i] = B->rmap.range[i]/bs; 2095 } 2096 b->rstartbs = B->rmap.rstart/bs; 2097 b->rendbs = B->rmap.rend/bs; 2098 b->cstartbs = B->cmap.rstart/bs; 2099 b->cendbs = B->cmap.rend/bs; 2100 2101 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2102 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 2103 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2104 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2105 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2106 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2107 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 2108 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2109 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2110 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2111 2112 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 2113 2114 PetscFunctionReturn(0); 2115 } 2116 EXTERN_C_END 2117 2118 EXTERN_C_BEGIN 2119 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2120 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2121 EXTERN_C_END 2122 2123 /*MC 2124 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2125 2126 Options Database Keys: 2127 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2128 . -mat_block_size <bs> - set the blocksize used to store the matrix 2129 - -mat_use_hash_table <fact> 2130 2131 Level: beginner 2132 2133 .seealso: MatCreateMPIBAIJ 2134 M*/ 2135 2136 EXTERN_C_BEGIN 2137 #undef __FUNCT__ 2138 #define __FUNCT__ "MatCreate_MPIBAIJ" 2139 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2140 { 2141 Mat_MPIBAIJ *b; 2142 PetscErrorCode ierr; 2143 PetscTruth flg; 2144 2145 PetscFunctionBegin; 2146 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2147 B->data = (void*)b; 2148 2149 2150 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2151 B->mapping = 0; 2152 B->factor = 0; 2153 B->assembled = PETSC_FALSE; 2154 2155 B->insertmode = NOT_SET_VALUES; 2156 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 2157 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 2158 2159 /* build local table of row and column ownerships */ 2160 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 2161 2162 /* build cache for off array entries formed */ 2163 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 2164 b->donotstash = PETSC_FALSE; 2165 b->colmap = PETSC_NULL; 2166 b->garray = PETSC_NULL; 2167 b->roworiented = PETSC_TRUE; 2168 2169 /* stuff used in block assembly */ 2170 b->barray = 0; 2171 2172 /* stuff used for matrix vector multiply */ 2173 b->lvec = 0; 2174 b->Mvctx = 0; 2175 2176 /* stuff for MatGetRow() */ 2177 b->rowindices = 0; 2178 b->rowvalues = 0; 2179 b->getrowactive = PETSC_FALSE; 2180 2181 /* hash table stuff */ 2182 b->ht = 0; 2183 b->hd = 0; 2184 b->ht_size = 0; 2185 b->ht_flag = PETSC_FALSE; 2186 b->ht_fact = 0; 2187 b->ht_total_ct = 0; 2188 b->ht_insert_ct = 0; 2189 2190 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 2191 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2192 if (flg) { 2193 PetscReal fact = 1.39; 2194 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2195 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 2196 if (fact <= 1.0) fact = 1.39; 2197 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2198 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2199 } 2200 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2201 2202 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2203 "MatStoreValues_MPIBAIJ", 2204 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2205 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2206 "MatRetrieveValues_MPIBAIJ", 2207 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2208 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2209 "MatGetDiagonalBlock_MPIBAIJ", 2210 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2211 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2212 "MatMPIBAIJSetPreallocation_MPIBAIJ", 2213 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2214 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 2215 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 2216 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 2217 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 2218 "MatDiagonalScaleLocal_MPIBAIJ", 2219 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2220 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 2221 "MatSetHashTableFactor_MPIBAIJ", 2222 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2223 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 2224 PetscFunctionReturn(0); 2225 } 2226 EXTERN_C_END 2227 2228 /*MC 2229 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2230 2231 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2232 and MATMPIBAIJ otherwise. 2233 2234 Options Database Keys: 2235 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2236 2237 Level: beginner 2238 2239 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2240 M*/ 2241 2242 EXTERN_C_BEGIN 2243 #undef __FUNCT__ 2244 #define __FUNCT__ "MatCreate_BAIJ" 2245 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 2246 { 2247 PetscErrorCode ierr; 2248 PetscMPIInt size; 2249 2250 PetscFunctionBegin; 2251 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2252 if (size == 1) { 2253 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2254 } else { 2255 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2256 } 2257 PetscFunctionReturn(0); 2258 } 2259 EXTERN_C_END 2260 2261 #undef __FUNCT__ 2262 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2263 /*@C 2264 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2265 (block compressed row). For good matrix assembly performance 2266 the user should preallocate the matrix storage by setting the parameters 2267 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2268 performance can be increased by more than a factor of 50. 2269 2270 Collective on Mat 2271 2272 Input Parameters: 2273 + A - the matrix 2274 . bs - size of blockk 2275 . d_nz - number of block nonzeros per block row in diagonal portion of local 2276 submatrix (same for all local rows) 2277 . d_nnz - array containing the number of block nonzeros in the various block rows 2278 of the in diagonal portion of the local (possibly different for each block 2279 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2280 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2281 submatrix (same for all local rows). 2282 - o_nnz - array containing the number of nonzeros in the various block rows of the 2283 off-diagonal portion of the local submatrix (possibly different for 2284 each block row) or PETSC_NULL. 2285 2286 If the *_nnz parameter is given then the *_nz parameter is ignored 2287 2288 Options Database Keys: 2289 + -mat_block_size - size of the blocks to use 2290 - -mat_use_hash_table <fact> 2291 2292 Notes: 2293 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2294 than it must be used on all processors that share the object for that argument. 2295 2296 Storage Information: 2297 For a square global matrix we define each processor's diagonal portion 2298 to be its local rows and the corresponding columns (a square submatrix); 2299 each processor's off-diagonal portion encompasses the remainder of the 2300 local matrix (a rectangular submatrix). 2301 2302 The user can specify preallocated storage for the diagonal part of 2303 the local submatrix with either d_nz or d_nnz (not both). Set 2304 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2305 memory allocation. Likewise, specify preallocated storage for the 2306 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2307 2308 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2309 the figure below we depict these three local rows and all columns (0-11). 2310 2311 .vb 2312 0 1 2 3 4 5 6 7 8 9 10 11 2313 ------------------- 2314 row 3 | o o o d d d o o o o o o 2315 row 4 | o o o d d d o o o o o o 2316 row 5 | o o o d d d o o o o o o 2317 ------------------- 2318 .ve 2319 2320 Thus, any entries in the d locations are stored in the d (diagonal) 2321 submatrix, and any entries in the o locations are stored in the 2322 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2323 stored simply in the MATSEQBAIJ format for compressed row storage. 2324 2325 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2326 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2327 In general, for PDE problems in which most nonzeros are near the diagonal, 2328 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2329 or you will get TERRIBLE performance; see the users' manual chapter on 2330 matrices. 2331 2332 You can call MatGetInfo() to get information on how effective the preallocation was; 2333 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2334 You can also run with the option -info and look for messages with the string 2335 malloc in them to see if additional memory allocation was needed. 2336 2337 Level: intermediate 2338 2339 .keywords: matrix, block, aij, compressed row, sparse, parallel 2340 2341 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 2342 @*/ 2343 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2344 { 2345 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2346 2347 PetscFunctionBegin; 2348 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2349 if (f) { 2350 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2351 } 2352 PetscFunctionReturn(0); 2353 } 2354 2355 #undef __FUNCT__ 2356 #define __FUNCT__ "MatCreateMPIBAIJ" 2357 /*@C 2358 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2359 (block compressed row). For good matrix assembly performance 2360 the user should preallocate the matrix storage by setting the parameters 2361 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2362 performance can be increased by more than a factor of 50. 2363 2364 Collective on MPI_Comm 2365 2366 Input Parameters: 2367 + comm - MPI communicator 2368 . bs - size of blockk 2369 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2370 This value should be the same as the local size used in creating the 2371 y vector for the matrix-vector product y = Ax. 2372 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2373 This value should be the same as the local size used in creating the 2374 x vector for the matrix-vector product y = Ax. 2375 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2376 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2377 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2378 submatrix (same for all local rows) 2379 . d_nnz - array containing the number of nonzero blocks in the various block rows 2380 of the in diagonal portion of the local (possibly different for each block 2381 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2382 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2383 submatrix (same for all local rows). 2384 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2385 off-diagonal portion of the local submatrix (possibly different for 2386 each block row) or PETSC_NULL. 2387 2388 Output Parameter: 2389 . A - the matrix 2390 2391 Options Database Keys: 2392 + -mat_block_size - size of the blocks to use 2393 - -mat_use_hash_table <fact> 2394 2395 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2396 MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely 2397 true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles. 2398 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2399 2400 Notes: 2401 If the *_nnz parameter is given then the *_nz parameter is ignored 2402 2403 A nonzero block is any block that as 1 or more nonzeros in it 2404 2405 The user MUST specify either the local or global matrix dimensions 2406 (possibly both). 2407 2408 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2409 than it must be used on all processors that share the object for that argument. 2410 2411 Storage Information: 2412 For a square global matrix we define each processor's diagonal portion 2413 to be its local rows and the corresponding columns (a square submatrix); 2414 each processor's off-diagonal portion encompasses the remainder of the 2415 local matrix (a rectangular submatrix). 2416 2417 The user can specify preallocated storage for the diagonal part of 2418 the local submatrix with either d_nz or d_nnz (not both). Set 2419 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2420 memory allocation. Likewise, specify preallocated storage for the 2421 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2422 2423 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2424 the figure below we depict these three local rows and all columns (0-11). 2425 2426 .vb 2427 0 1 2 3 4 5 6 7 8 9 10 11 2428 ------------------- 2429 row 3 | o o o d d d o o o o o o 2430 row 4 | o o o d d d o o o o o o 2431 row 5 | o o o d d d o o o o o o 2432 ------------------- 2433 .ve 2434 2435 Thus, any entries in the d locations are stored in the d (diagonal) 2436 submatrix, and any entries in the o locations are stored in the 2437 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2438 stored simply in the MATSEQBAIJ format for compressed row storage. 2439 2440 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2441 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2442 In general, for PDE problems in which most nonzeros are near the diagonal, 2443 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2444 or you will get TERRIBLE performance; see the users' manual chapter on 2445 matrices. 2446 2447 Level: intermediate 2448 2449 .keywords: matrix, block, aij, compressed row, sparse, parallel 2450 2451 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2452 @*/ 2453 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2454 { 2455 PetscErrorCode ierr; 2456 PetscMPIInt size; 2457 2458 PetscFunctionBegin; 2459 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2460 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2461 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2462 if (size > 1) { 2463 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2464 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2465 } else { 2466 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2467 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2468 } 2469 PetscFunctionReturn(0); 2470 } 2471 2472 #undef __FUNCT__ 2473 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 2474 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2475 { 2476 Mat mat; 2477 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2478 PetscErrorCode ierr; 2479 PetscInt len=0; 2480 2481 PetscFunctionBegin; 2482 *newmat = 0; 2483 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2484 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2485 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2486 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2487 2488 mat->factor = matin->factor; 2489 mat->preallocated = PETSC_TRUE; 2490 mat->assembled = PETSC_TRUE; 2491 mat->insertmode = NOT_SET_VALUES; 2492 2493 a = (Mat_MPIBAIJ*)mat->data; 2494 mat->rmap.bs = matin->rmap.bs; 2495 a->bs2 = oldmat->bs2; 2496 a->mbs = oldmat->mbs; 2497 a->nbs = oldmat->nbs; 2498 a->Mbs = oldmat->Mbs; 2499 a->Nbs = oldmat->Nbs; 2500 2501 ierr = PetscMapCopy(((PetscObject)matin)->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2502 ierr = PetscMapCopy(((PetscObject)matin)->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2503 2504 a->size = oldmat->size; 2505 a->rank = oldmat->rank; 2506 a->donotstash = oldmat->donotstash; 2507 a->roworiented = oldmat->roworiented; 2508 a->rowindices = 0; 2509 a->rowvalues = 0; 2510 a->getrowactive = PETSC_FALSE; 2511 a->barray = 0; 2512 a->rstartbs = oldmat->rstartbs; 2513 a->rendbs = oldmat->rendbs; 2514 a->cstartbs = oldmat->cstartbs; 2515 a->cendbs = oldmat->cendbs; 2516 2517 /* hash table stuff */ 2518 a->ht = 0; 2519 a->hd = 0; 2520 a->ht_size = 0; 2521 a->ht_flag = oldmat->ht_flag; 2522 a->ht_fact = oldmat->ht_fact; 2523 a->ht_total_ct = 0; 2524 a->ht_insert_ct = 0; 2525 2526 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 2527 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2528 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr); 2529 if (oldmat->colmap) { 2530 #if defined (PETSC_USE_CTABLE) 2531 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2532 #else 2533 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2534 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2535 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2536 #endif 2537 } else a->colmap = 0; 2538 2539 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2540 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2541 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2542 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2543 } else a->garray = 0; 2544 2545 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2546 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2547 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2548 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2549 2550 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2551 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2552 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2553 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2554 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2555 *newmat = mat; 2556 2557 PetscFunctionReturn(0); 2558 } 2559 2560 #include "petscsys.h" 2561 2562 #undef __FUNCT__ 2563 #define __FUNCT__ "MatLoad_MPIBAIJ" 2564 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2565 { 2566 Mat A; 2567 PetscErrorCode ierr; 2568 int fd; 2569 PetscInt i,nz,j,rstart,rend; 2570 PetscScalar *vals,*buf; 2571 MPI_Comm comm = ((PetscObject)viewer)->comm; 2572 MPI_Status status; 2573 PetscMPIInt rank,size,maxnz; 2574 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 2575 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 2576 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 2577 PetscMPIInt tag = ((PetscObject)viewer)->tag; 2578 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 2579 PetscInt dcount,kmax,k,nzcount,tmp,mend; 2580 2581 PetscFunctionBegin; 2582 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 2583 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2584 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2585 2586 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2587 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2588 if (!rank) { 2589 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2590 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2591 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2592 } 2593 2594 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2595 M = header[1]; N = header[2]; 2596 2597 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2598 2599 /* 2600 This code adds extra rows to make sure the number of rows is 2601 divisible by the blocksize 2602 */ 2603 Mbs = M/bs; 2604 extra_rows = bs - M + bs*Mbs; 2605 if (extra_rows == bs) extra_rows = 0; 2606 else Mbs++; 2607 if (extra_rows && !rank) { 2608 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2609 } 2610 2611 /* determine ownership of all rows */ 2612 mbs = Mbs/size + ((Mbs % size) > rank); 2613 m = mbs*bs; 2614 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 2615 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2616 2617 /* process 0 needs enough room for process with most rows */ 2618 if (!rank) { 2619 mmax = rowners[1]; 2620 for (i=2; i<size; i++) { 2621 mmax = PetscMax(mmax,rowners[i]); 2622 } 2623 mmax*=bs; 2624 } else mmax = m; 2625 2626 rowners[0] = 0; 2627 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2628 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2629 rstart = rowners[rank]; 2630 rend = rowners[rank+1]; 2631 2632 /* distribute row lengths to all processors */ 2633 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 2634 if (!rank) { 2635 mend = m; 2636 if (size == 1) mend = mend - extra_rows; 2637 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 2638 for (j=mend; j<m; j++) locrowlens[j] = 1; 2639 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2640 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2641 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2642 for (j=0; j<m; j++) { 2643 procsnz[0] += locrowlens[j]; 2644 } 2645 for (i=1; i<size; i++) { 2646 mend = browners[i+1] - browners[i]; 2647 if (i == size-1) mend = mend - extra_rows; 2648 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 2649 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 2650 /* calculate the number of nonzeros on each processor */ 2651 for (j=0; j<browners[i+1]-browners[i]; j++) { 2652 procsnz[i] += rowlengths[j]; 2653 } 2654 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2655 } 2656 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2657 } else { 2658 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2659 } 2660 2661 if (!rank) { 2662 /* determine max buffer needed and allocate it */ 2663 maxnz = procsnz[0]; 2664 for (i=1; i<size; i++) { 2665 maxnz = PetscMax(maxnz,procsnz[i]); 2666 } 2667 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2668 2669 /* read in my part of the matrix column indices */ 2670 nz = procsnz[0]; 2671 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2672 mycols = ibuf; 2673 if (size == 1) nz -= extra_rows; 2674 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2675 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2676 2677 /* read in every ones (except the last) and ship off */ 2678 for (i=1; i<size-1; i++) { 2679 nz = procsnz[i]; 2680 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2681 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2682 } 2683 /* read in the stuff for the last proc */ 2684 if (size != 1) { 2685 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2686 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2687 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2688 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2689 } 2690 ierr = PetscFree(cols);CHKERRQ(ierr); 2691 } else { 2692 /* determine buffer space needed for message */ 2693 nz = 0; 2694 for (i=0; i<m; i++) { 2695 nz += locrowlens[i]; 2696 } 2697 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2698 mycols = ibuf; 2699 /* receive message of column indices*/ 2700 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2701 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2702 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2703 } 2704 2705 /* loop over local rows, determining number of off diagonal entries */ 2706 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2707 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2708 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2709 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2710 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2711 rowcount = 0; nzcount = 0; 2712 for (i=0; i<mbs; i++) { 2713 dcount = 0; 2714 odcount = 0; 2715 for (j=0; j<bs; j++) { 2716 kmax = locrowlens[rowcount]; 2717 for (k=0; k<kmax; k++) { 2718 tmp = mycols[nzcount++]/bs; 2719 if (!mask[tmp]) { 2720 mask[tmp] = 1; 2721 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2722 else masked1[dcount++] = tmp; 2723 } 2724 } 2725 rowcount++; 2726 } 2727 2728 dlens[i] = dcount; 2729 odlens[i] = odcount; 2730 2731 /* zero out the mask elements we set */ 2732 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2733 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2734 } 2735 2736 /* create our matrix */ 2737 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2738 ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2739 ierr = MatSetType(A,type);CHKERRQ(ierr) 2740 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2741 2742 if (!rank) { 2743 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2744 /* read in my part of the matrix numerical values */ 2745 nz = procsnz[0]; 2746 vals = buf; 2747 mycols = ibuf; 2748 if (size == 1) nz -= extra_rows; 2749 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2750 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2751 2752 /* insert into matrix */ 2753 jj = rstart*bs; 2754 for (i=0; i<m; i++) { 2755 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2756 mycols += locrowlens[i]; 2757 vals += locrowlens[i]; 2758 jj++; 2759 } 2760 /* read in other processors (except the last one) and ship out */ 2761 for (i=1; i<size-1; i++) { 2762 nz = procsnz[i]; 2763 vals = buf; 2764 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2765 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2766 } 2767 /* the last proc */ 2768 if (size != 1){ 2769 nz = procsnz[i] - extra_rows; 2770 vals = buf; 2771 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2772 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2773 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2774 } 2775 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2776 } else { 2777 /* receive numeric values */ 2778 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2779 2780 /* receive message of values*/ 2781 vals = buf; 2782 mycols = ibuf; 2783 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2784 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2785 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2786 2787 /* insert into matrix */ 2788 jj = rstart*bs; 2789 for (i=0; i<m; i++) { 2790 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2791 mycols += locrowlens[i]; 2792 vals += locrowlens[i]; 2793 jj++; 2794 } 2795 } 2796 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2797 ierr = PetscFree(buf);CHKERRQ(ierr); 2798 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2799 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2800 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2801 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2802 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2803 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2804 2805 *newmat = A; 2806 PetscFunctionReturn(0); 2807 } 2808 2809 #undef __FUNCT__ 2810 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2811 /*@ 2812 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2813 2814 Input Parameters: 2815 . mat - the matrix 2816 . fact - factor 2817 2818 Collective on Mat 2819 2820 Level: advanced 2821 2822 Notes: 2823 This can also be set by the command line option: -mat_use_hash_table <fact> 2824 2825 .keywords: matrix, hashtable, factor, HT 2826 2827 .seealso: MatSetOption() 2828 @*/ 2829 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2830 { 2831 PetscErrorCode ierr,(*f)(Mat,PetscReal); 2832 2833 PetscFunctionBegin; 2834 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 2835 if (f) { 2836 ierr = (*f)(mat,fact);CHKERRQ(ierr); 2837 } 2838 PetscFunctionReturn(0); 2839 } 2840 2841 EXTERN_C_BEGIN 2842 #undef __FUNCT__ 2843 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 2844 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 2845 { 2846 Mat_MPIBAIJ *baij; 2847 2848 PetscFunctionBegin; 2849 baij = (Mat_MPIBAIJ*)mat->data; 2850 baij->ht_fact = fact; 2851 PetscFunctionReturn(0); 2852 } 2853 EXTERN_C_END 2854 2855 #undef __FUNCT__ 2856 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2857 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2858 { 2859 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2860 PetscFunctionBegin; 2861 *Ad = a->A; 2862 *Ao = a->B; 2863 *colmap = a->garray; 2864 PetscFunctionReturn(0); 2865 } 2866 2867 /* 2868 Special version for direct calls from Fortran (to eliminate two function call overheads 2869 */ 2870 #if defined(PETSC_HAVE_FORTRAN_CAPS) 2871 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 2872 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 2873 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 2874 #endif 2875 2876 #undef __FUNCT__ 2877 #define __FUNCT__ "matmpibiajsetvaluesblocked" 2878 /*@C 2879 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 2880 2881 Collective on Mat 2882 2883 Input Parameters: 2884 + mat - the matrix 2885 . min - number of input rows 2886 . im - input rows 2887 . nin - number of input columns 2888 . in - input columns 2889 . v - numerical values input 2890 - addvin - INSERT_VALUES or ADD_VALUES 2891 2892 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 2893 2894 Level: advanced 2895 2896 .seealso: MatSetValuesBlocked() 2897 @*/ 2898 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 2899 { 2900 /* convert input arguments to C version */ 2901 Mat mat = *matin; 2902 PetscInt m = *min, n = *nin; 2903 InsertMode addv = *addvin; 2904 2905 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2906 const MatScalar *value; 2907 MatScalar *barray=baij->barray; 2908 PetscTruth roworiented = baij->roworiented; 2909 PetscErrorCode ierr; 2910 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 2911 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 2912 PetscInt cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2; 2913 2914 PetscFunctionBegin; 2915 /* tasks normally handled by MatSetValuesBlocked() */ 2916 if (mat->insertmode == NOT_SET_VALUES) { 2917 mat->insertmode = addv; 2918 } 2919 #if defined(PETSC_USE_DEBUG) 2920 else if (mat->insertmode != addv) { 2921 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 2922 } 2923 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2924 #endif 2925 if (mat->assembled) { 2926 mat->was_assembled = PETSC_TRUE; 2927 mat->assembled = PETSC_FALSE; 2928 } 2929 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 2930 2931 2932 if(!barray) { 2933 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 2934 baij->barray = barray; 2935 } 2936 2937 if (roworiented) { 2938 stepval = (n-1)*bs; 2939 } else { 2940 stepval = (m-1)*bs; 2941 } 2942 for (i=0; i<m; i++) { 2943 if (im[i] < 0) continue; 2944 #if defined(PETSC_USE_DEBUG) 2945 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 2946 #endif 2947 if (im[i] >= rstart && im[i] < rend) { 2948 row = im[i] - rstart; 2949 for (j=0; j<n; j++) { 2950 /* If NumCol = 1 then a copy is not required */ 2951 if ((roworiented) && (n == 1)) { 2952 barray = (MatScalar*)v + i*bs2; 2953 } else if((!roworiented) && (m == 1)) { 2954 barray = (MatScalar*)v + j*bs2; 2955 } else { /* Here a copy is required */ 2956 if (roworiented) { 2957 value = v + i*(stepval+bs)*bs + j*bs; 2958 } else { 2959 value = v + j*(stepval+bs)*bs + i*bs; 2960 } 2961 for (ii=0; ii<bs; ii++,value+=stepval) { 2962 for (jj=0; jj<bs; jj++) { 2963 *barray++ = *value++; 2964 } 2965 } 2966 barray -=bs2; 2967 } 2968 2969 if (in[j] >= cstart && in[j] < cend){ 2970 col = in[j] - cstart; 2971 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 2972 } 2973 else if (in[j] < 0) continue; 2974 #if defined(PETSC_USE_DEBUG) 2975 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 2976 #endif 2977 else { 2978 if (mat->was_assembled) { 2979 if (!baij->colmap) { 2980 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2981 } 2982 2983 #if defined(PETSC_USE_DEBUG) 2984 #if defined (PETSC_USE_CTABLE) 2985 { PetscInt data; 2986 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 2987 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 2988 } 2989 #else 2990 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 2991 #endif 2992 #endif 2993 #if defined (PETSC_USE_CTABLE) 2994 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 2995 col = (col - 1)/bs; 2996 #else 2997 col = (baij->colmap[in[j]] - 1)/bs; 2998 #endif 2999 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3000 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3001 col = in[j]; 3002 } 3003 } 3004 else col = in[j]; 3005 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3006 } 3007 } 3008 } else { 3009 if (!baij->donotstash) { 3010 if (roworiented) { 3011 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3012 } else { 3013 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3014 } 3015 } 3016 } 3017 } 3018 3019 /* task normally handled by MatSetValuesBlocked() */ 3020 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3021 PetscFunctionReturn(0); 3022 } 3023