1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 15 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 18 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 19 { 20 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt i,*idxb = 0; 23 PetscScalar *va,*vb; 24 Vec vtmp; 25 26 PetscFunctionBegin; 27 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 28 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 29 if (idx) { 30 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 31 } 32 33 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 34 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 35 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 36 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 37 38 for (i=0; i<A->rmap->n; i++){ 39 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 40 } 41 42 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 43 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 44 if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);} 45 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 46 PetscFunctionReturn(0); 47 } 48 49 EXTERN_C_BEGIN 50 #undef __FUNCT__ 51 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 52 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 53 { 54 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 55 PetscErrorCode ierr; 56 57 PetscFunctionBegin; 58 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 59 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 60 PetscFunctionReturn(0); 61 } 62 EXTERN_C_END 63 64 EXTERN_C_BEGIN 65 #undef __FUNCT__ 66 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 67 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 68 { 69 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 70 PetscErrorCode ierr; 71 72 PetscFunctionBegin; 73 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 74 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 75 PetscFunctionReturn(0); 76 } 77 EXTERN_C_END 78 79 /* 80 Local utility routine that creates a mapping from the global column 81 number to the local number in the off-diagonal part of the local 82 storage of the matrix. This is done in a non scable way since the 83 length of colmap equals the global matrix length. 84 */ 85 #undef __FUNCT__ 86 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 87 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 88 { 89 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 90 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 91 PetscErrorCode ierr; 92 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 93 94 PetscFunctionBegin; 95 #if defined (PETSC_USE_CTABLE) 96 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 97 for (i=0; i<nbs; i++){ 98 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 99 } 100 #else 101 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 102 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 103 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 105 #endif 106 PetscFunctionReturn(0); 107 } 108 109 #define CHUNKSIZE 10 110 111 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 112 { \ 113 \ 114 brow = row/bs; \ 115 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 116 rmax = aimax[brow]; nrow = ailen[brow]; \ 117 bcol = col/bs; \ 118 ridx = row % bs; cidx = col % bs; \ 119 low = 0; high = nrow; \ 120 while (high-low > 3) { \ 121 t = (low+high)/2; \ 122 if (rp[t] > bcol) high = t; \ 123 else low = t; \ 124 } \ 125 for (_i=low; _i<high; _i++) { \ 126 if (rp[_i] > bcol) break; \ 127 if (rp[_i] == bcol) { \ 128 bap = ap + bs2*_i + bs*cidx + ridx; \ 129 if (addv == ADD_VALUES) *bap += value; \ 130 else *bap = value; \ 131 goto a_noinsert; \ 132 } \ 133 } \ 134 if (a->nonew == 1) goto a_noinsert; \ 135 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 136 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 137 N = nrow++ - 1; \ 138 /* shift up all the later entries in this row */ \ 139 for (ii=N; ii>=_i; ii--) { \ 140 rp[ii+1] = rp[ii]; \ 141 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 142 } \ 143 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 144 rp[_i] = bcol; \ 145 ap[bs2*_i + bs*cidx + ridx] = value; \ 146 a_noinsert:; \ 147 ailen[brow] = nrow; \ 148 } 149 150 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 151 { \ 152 brow = row/bs; \ 153 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 154 rmax = bimax[brow]; nrow = bilen[brow]; \ 155 bcol = col/bs; \ 156 ridx = row % bs; cidx = col % bs; \ 157 low = 0; high = nrow; \ 158 while (high-low > 3) { \ 159 t = (low+high)/2; \ 160 if (rp[t] > bcol) high = t; \ 161 else low = t; \ 162 } \ 163 for (_i=low; _i<high; _i++) { \ 164 if (rp[_i] > bcol) break; \ 165 if (rp[_i] == bcol) { \ 166 bap = ap + bs2*_i + bs*cidx + ridx; \ 167 if (addv == ADD_VALUES) *bap += value; \ 168 else *bap = value; \ 169 goto b_noinsert; \ 170 } \ 171 } \ 172 if (b->nonew == 1) goto b_noinsert; \ 173 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 174 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 175 CHKMEMQ;\ 176 N = nrow++ - 1; \ 177 /* shift up all the later entries in this row */ \ 178 for (ii=N; ii>=_i; ii--) { \ 179 rp[ii+1] = rp[ii]; \ 180 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 181 } \ 182 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 183 rp[_i] = bcol; \ 184 ap[bs2*_i + bs*cidx + ridx] = value; \ 185 b_noinsert:; \ 186 bilen[brow] = nrow; \ 187 } 188 189 #undef __FUNCT__ 190 #define __FUNCT__ "MatSetValues_MPIBAIJ" 191 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 192 { 193 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 194 MatScalar value; 195 PetscTruth roworiented = baij->roworiented; 196 PetscErrorCode ierr; 197 PetscInt i,j,row,col; 198 PetscInt rstart_orig=mat->rmap->rstart; 199 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 200 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 201 202 /* Some Variables required in the macro */ 203 Mat A = baij->A; 204 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 205 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 206 MatScalar *aa=a->a; 207 208 Mat B = baij->B; 209 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 210 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 211 MatScalar *ba=b->a; 212 213 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 214 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 215 MatScalar *ap,*bap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_DEBUG) 221 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 222 #endif 223 if (im[i] >= rstart_orig && im[i] < rend_orig) { 224 row = im[i] - rstart_orig; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart_orig && in[j] < cend_orig){ 227 col = in[j] - cstart_orig; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 230 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 231 } else if (in[j] < 0) continue; 232 #if defined(PETSC_USE_DEBUG) 233 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1);} 234 #endif 235 else { 236 if (mat->was_assembled) { 237 if (!baij->colmap) { 238 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 239 } 240 #if defined (PETSC_USE_CTABLE) 241 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 242 col = col - 1; 243 #else 244 col = baij->colmap[in[j]/bs] - 1; 245 #endif 246 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 247 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 248 col = in[j]; 249 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 250 B = baij->B; 251 b = (Mat_SeqBAIJ*)(B)->data; 252 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 253 ba=b->a; 254 } else col += in[j]%bs; 255 } else col = in[j]; 256 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 257 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 258 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 259 } 260 } 261 } else { 262 if (!baij->donotstash) { 263 if (roworiented) { 264 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 265 } else { 266 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 267 } 268 } 269 } 270 } 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 276 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 277 { 278 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 279 const PetscScalar *value; 280 MatScalar *barray=baij->barray; 281 PetscTruth roworiented = baij->roworiented; 282 PetscErrorCode ierr; 283 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 284 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 285 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 286 287 PetscFunctionBegin; 288 if(!barray) { 289 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 290 baij->barray = barray; 291 } 292 293 if (roworiented) { 294 stepval = (n-1)*bs; 295 } else { 296 stepval = (m-1)*bs; 297 } 298 for (i=0; i<m; i++) { 299 if (im[i] < 0) continue; 300 #if defined(PETSC_USE_DEBUG) 301 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 302 #endif 303 if (im[i] >= rstart && im[i] < rend) { 304 row = im[i] - rstart; 305 for (j=0; j<n; j++) { 306 /* If NumCol = 1 then a copy is not required */ 307 if ((roworiented) && (n == 1)) { 308 barray = (MatScalar*)v + i*bs2; 309 } else if((!roworiented) && (m == 1)) { 310 barray = (MatScalar*)v + j*bs2; 311 } else { /* Here a copy is required */ 312 if (roworiented) { 313 value = v + i*(stepval+bs)*bs + j*bs; 314 } else { 315 value = v + j*(stepval+bs)*bs + i*bs; 316 } 317 for (ii=0; ii<bs; ii++,value+=stepval) { 318 for (jj=0; jj<bs; jj++) { 319 *barray++ = *value++; 320 } 321 } 322 barray -=bs2; 323 } 324 325 if (in[j] >= cstart && in[j] < cend){ 326 col = in[j] - cstart; 327 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 328 } 329 else if (in[j] < 0) continue; 330 #if defined(PETSC_USE_DEBUG) 331 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 332 #endif 333 else { 334 if (mat->was_assembled) { 335 if (!baij->colmap) { 336 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 337 } 338 339 #if defined(PETSC_USE_DEBUG) 340 #if defined (PETSC_USE_CTABLE) 341 { PetscInt data; 342 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 343 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 344 } 345 #else 346 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 347 #endif 348 #endif 349 #if defined (PETSC_USE_CTABLE) 350 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 351 col = (col - 1)/bs; 352 #else 353 col = (baij->colmap[in[j]] - 1)/bs; 354 #endif 355 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 356 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 357 col = in[j]; 358 } 359 } 360 else col = in[j]; 361 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 362 } 363 } 364 } else { 365 if (!baij->donotstash) { 366 if (roworiented) { 367 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 368 } else { 369 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 370 } 371 } 372 } 373 } 374 PetscFunctionReturn(0); 375 } 376 377 #define HASH_KEY 0.6180339887 378 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 379 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 380 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 381 #undef __FUNCT__ 382 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 383 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 384 { 385 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 386 PetscTruth roworiented = baij->roworiented; 387 PetscErrorCode ierr; 388 PetscInt i,j,row,col; 389 PetscInt rstart_orig=mat->rmap->rstart; 390 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 391 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 392 PetscReal tmp; 393 MatScalar **HD = baij->hd,value; 394 #if defined(PETSC_USE_DEBUG) 395 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 396 #endif 397 398 PetscFunctionBegin; 399 400 for (i=0; i<m; i++) { 401 #if defined(PETSC_USE_DEBUG) 402 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 403 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 404 #endif 405 row = im[i]; 406 if (row >= rstart_orig && row < rend_orig) { 407 for (j=0; j<n; j++) { 408 col = in[j]; 409 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 410 /* Look up PetscInto the Hash Table */ 411 key = (row/bs)*Nbs+(col/bs)+1; 412 h1 = HASH(size,key,tmp); 413 414 415 idx = h1; 416 #if defined(PETSC_USE_DEBUG) 417 insert_ct++; 418 total_ct++; 419 if (HT[idx] != key) { 420 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 421 if (idx == size) { 422 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 423 if (idx == h1) { 424 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 425 } 426 } 427 } 428 #else 429 if (HT[idx] != key) { 430 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 431 if (idx == size) { 432 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 433 if (idx == h1) { 434 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 435 } 436 } 437 } 438 #endif 439 /* A HASH table entry is found, so insert the values at the correct address */ 440 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 441 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 442 } 443 } else { 444 if (!baij->donotstash) { 445 if (roworiented) { 446 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 447 } else { 448 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 449 } 450 } 451 } 452 } 453 #if defined(PETSC_USE_DEBUG) 454 baij->ht_total_ct = total_ct; 455 baij->ht_insert_ct = insert_ct; 456 #endif 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 463 { 464 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 465 PetscTruth roworiented = baij->roworiented; 466 PetscErrorCode ierr; 467 PetscInt i,j,ii,jj,row,col; 468 PetscInt rstart=baij->rstartbs; 469 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 470 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 471 PetscReal tmp; 472 MatScalar **HD = baij->hd,*baij_a; 473 const PetscScalar *v_t,*value; 474 #if defined(PETSC_USE_DEBUG) 475 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 476 #endif 477 478 PetscFunctionBegin; 479 480 if (roworiented) { 481 stepval = (n-1)*bs; 482 } else { 483 stepval = (m-1)*bs; 484 } 485 for (i=0; i<m; i++) { 486 #if defined(PETSC_USE_DEBUG) 487 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 488 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 489 #endif 490 row = im[i]; 491 v_t = v + i*nbs2; 492 if (row >= rstart && row < rend) { 493 for (j=0; j<n; j++) { 494 col = in[j]; 495 496 /* Look up into the Hash Table */ 497 key = row*Nbs+col+1; 498 h1 = HASH(size,key,tmp); 499 500 idx = h1; 501 #if defined(PETSC_USE_DEBUG) 502 total_ct++; 503 insert_ct++; 504 if (HT[idx] != key) { 505 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 506 if (idx == size) { 507 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 508 if (idx == h1) { 509 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 510 } 511 } 512 } 513 #else 514 if (HT[idx] != key) { 515 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 516 if (idx == size) { 517 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 518 if (idx == h1) { 519 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 520 } 521 } 522 } 523 #endif 524 baij_a = HD[idx]; 525 if (roworiented) { 526 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 527 /* value = v + (i*(stepval+bs)+j)*bs; */ 528 value = v_t; 529 v_t += bs; 530 if (addv == ADD_VALUES) { 531 for (ii=0; ii<bs; ii++,value+=stepval) { 532 for (jj=ii; jj<bs2; jj+=bs) { 533 baij_a[jj] += *value++; 534 } 535 } 536 } else { 537 for (ii=0; ii<bs; ii++,value+=stepval) { 538 for (jj=ii; jj<bs2; jj+=bs) { 539 baij_a[jj] = *value++; 540 } 541 } 542 } 543 } else { 544 value = v + j*(stepval+bs)*bs + i*bs; 545 if (addv == ADD_VALUES) { 546 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 547 for (jj=0; jj<bs; jj++) { 548 baij_a[jj] += *value++; 549 } 550 } 551 } else { 552 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 553 for (jj=0; jj<bs; jj++) { 554 baij_a[jj] = *value++; 555 } 556 } 557 } 558 } 559 } 560 } else { 561 if (!baij->donotstash) { 562 if (roworiented) { 563 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 564 } else { 565 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 566 } 567 } 568 } 569 } 570 #if defined(PETSC_USE_DEBUG) 571 baij->ht_total_ct = total_ct; 572 baij->ht_insert_ct = insert_ct; 573 #endif 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatGetValues_MPIBAIJ" 579 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 580 { 581 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 582 PetscErrorCode ierr; 583 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 584 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 585 586 PetscFunctionBegin; 587 for (i=0; i<m; i++) { 588 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 589 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 590 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 591 row = idxm[i] - bsrstart; 592 for (j=0; j<n; j++) { 593 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 594 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 595 if (idxn[j] >= bscstart && idxn[j] < bscend){ 596 col = idxn[j] - bscstart; 597 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 598 } else { 599 if (!baij->colmap) { 600 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 601 } 602 #if defined (PETSC_USE_CTABLE) 603 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 604 data --; 605 #else 606 data = baij->colmap[idxn[j]/bs]-1; 607 #endif 608 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 609 else { 610 col = data + idxn[j]%bs; 611 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 612 } 613 } 614 } 615 } else { 616 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 617 } 618 } 619 PetscFunctionReturn(0); 620 } 621 622 #undef __FUNCT__ 623 #define __FUNCT__ "MatNorm_MPIBAIJ" 624 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 625 { 626 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 627 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 628 PetscErrorCode ierr; 629 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 630 PetscReal sum = 0.0; 631 MatScalar *v; 632 633 PetscFunctionBegin; 634 if (baij->size == 1) { 635 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 636 } else { 637 if (type == NORM_FROBENIUS) { 638 v = amat->a; 639 nz = amat->nz*bs2; 640 for (i=0; i<nz; i++) { 641 #if defined(PETSC_USE_COMPLEX) 642 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 643 #else 644 sum += (*v)*(*v); v++; 645 #endif 646 } 647 v = bmat->a; 648 nz = bmat->nz*bs2; 649 for (i=0; i<nz; i++) { 650 #if defined(PETSC_USE_COMPLEX) 651 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 652 #else 653 sum += (*v)*(*v); v++; 654 #endif 655 } 656 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 657 *nrm = sqrt(*nrm); 658 } else if (type == NORM_1) { /* max column sum */ 659 PetscReal *tmp,*tmp2; 660 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 661 ierr = PetscMalloc((2*mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 662 tmp2 = tmp + mat->cmap->N; 663 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 664 v = amat->a; jj = amat->j; 665 for (i=0; i<amat->nz; i++) { 666 for (j=0; j<bs; j++){ 667 col = bs*(cstart + *jj) + j; /* column index */ 668 for (row=0; row<bs; row++){ 669 tmp[col] += PetscAbsScalar(*v); v++; 670 } 671 } 672 jj++; 673 } 674 v = bmat->a; jj = bmat->j; 675 for (i=0; i<bmat->nz; i++) { 676 for (j=0; j<bs; j++){ 677 col = bs*garray[*jj] + j; 678 for (row=0; row<bs; row++){ 679 tmp[col] += PetscAbsScalar(*v); v++; 680 } 681 } 682 jj++; 683 } 684 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 685 *nrm = 0.0; 686 for (j=0; j<mat->cmap->N; j++) { 687 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 688 } 689 ierr = PetscFree(tmp);CHKERRQ(ierr); 690 } else if (type == NORM_INFINITY) { /* max row sum */ 691 PetscReal *sums; 692 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr) 693 sum = 0.0; 694 for (j=0; j<amat->mbs; j++) { 695 for (row=0; row<bs; row++) sums[row] = 0.0; 696 v = amat->a + bs2*amat->i[j]; 697 nz = amat->i[j+1]-amat->i[j]; 698 for (i=0; i<nz; i++) { 699 for (col=0; col<bs; col++){ 700 for (row=0; row<bs; row++){ 701 sums[row] += PetscAbsScalar(*v); v++; 702 } 703 } 704 } 705 v = bmat->a + bs2*bmat->i[j]; 706 nz = bmat->i[j+1]-bmat->i[j]; 707 for (i=0; i<nz; i++) { 708 for (col=0; col<bs; col++){ 709 for (row=0; row<bs; row++){ 710 sums[row] += PetscAbsScalar(*v); v++; 711 } 712 } 713 } 714 for (row=0; row<bs; row++){ 715 if (sums[row] > sum) sum = sums[row]; 716 } 717 } 718 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 719 ierr = PetscFree(sums);CHKERRQ(ierr); 720 } else { 721 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 722 } 723 } 724 PetscFunctionReturn(0); 725 } 726 727 /* 728 Creates the hash table, and sets the table 729 This table is created only once. 730 If new entried need to be added to the matrix 731 then the hash table has to be destroyed and 732 recreated. 733 */ 734 #undef __FUNCT__ 735 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 736 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 737 { 738 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 739 Mat A = baij->A,B=baij->B; 740 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 741 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 742 PetscErrorCode ierr; 743 PetscInt size,bs2=baij->bs2,rstart=baij->rstartbs; 744 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 745 PetscInt *HT,key; 746 MatScalar **HD; 747 PetscReal tmp; 748 #if defined(PETSC_USE_INFO) 749 PetscInt ct=0,max=0; 750 #endif 751 752 PetscFunctionBegin; 753 baij->ht_size=(PetscInt)(factor*nz); 754 size = baij->ht_size; 755 756 if (baij->ht) { 757 PetscFunctionReturn(0); 758 } 759 760 /* Allocate Memory for Hash Table */ 761 ierr = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 762 baij->ht = (PetscInt*)(baij->hd + size); 763 HD = baij->hd; 764 HT = baij->ht; 765 766 767 ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr); 768 769 770 /* Loop Over A */ 771 for (i=0; i<a->mbs; i++) { 772 for (j=ai[i]; j<ai[i+1]; j++) { 773 row = i+rstart; 774 col = aj[j]+cstart; 775 776 key = row*Nbs + col + 1; 777 h1 = HASH(size,key,tmp); 778 for (k=0; k<size; k++){ 779 if (!HT[(h1+k)%size]) { 780 HT[(h1+k)%size] = key; 781 HD[(h1+k)%size] = a->a + j*bs2; 782 break; 783 #if defined(PETSC_USE_INFO) 784 } else { 785 ct++; 786 #endif 787 } 788 } 789 #if defined(PETSC_USE_INFO) 790 if (k> max) max = k; 791 #endif 792 } 793 } 794 /* Loop Over B */ 795 for (i=0; i<b->mbs; i++) { 796 for (j=bi[i]; j<bi[i+1]; j++) { 797 row = i+rstart; 798 col = garray[bj[j]]; 799 key = row*Nbs + col + 1; 800 h1 = HASH(size,key,tmp); 801 for (k=0; k<size; k++){ 802 if (!HT[(h1+k)%size]) { 803 HT[(h1+k)%size] = key; 804 HD[(h1+k)%size] = b->a + j*bs2; 805 break; 806 #if defined(PETSC_USE_INFO) 807 } else { 808 ct++; 809 #endif 810 } 811 } 812 #if defined(PETSC_USE_INFO) 813 if (k> max) max = k; 814 #endif 815 } 816 } 817 818 /* Print Summary */ 819 #if defined(PETSC_USE_INFO) 820 for (i=0,j=0; i<size; i++) { 821 if (HT[i]) {j++;} 822 } 823 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 824 #endif 825 PetscFunctionReturn(0); 826 } 827 828 #undef __FUNCT__ 829 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 830 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 831 { 832 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 833 PetscErrorCode ierr; 834 PetscInt nstash,reallocs; 835 InsertMode addv; 836 837 PetscFunctionBegin; 838 if (baij->donotstash) { 839 PetscFunctionReturn(0); 840 } 841 842 /* make sure all processors are either in INSERTMODE or ADDMODE */ 843 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 844 if (addv == (ADD_VALUES|INSERT_VALUES)) { 845 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 846 } 847 mat->insertmode = addv; /* in case this processor had no cache */ 848 849 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 850 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 851 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 852 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 853 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 854 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 855 PetscFunctionReturn(0); 856 } 857 858 #undef __FUNCT__ 859 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 860 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 861 { 862 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 863 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 864 PetscErrorCode ierr; 865 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 866 PetscInt *row,*col; 867 PetscTruth r1,r2,r3,other_disassembled; 868 MatScalar *val; 869 InsertMode addv = mat->insertmode; 870 PetscMPIInt n; 871 872 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 873 PetscFunctionBegin; 874 if (!baij->donotstash) { 875 while (1) { 876 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 877 if (!flg) break; 878 879 for (i=0; i<n;) { 880 /* Now identify the consecutive vals belonging to the same row */ 881 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 882 if (j < n) ncols = j-i; 883 else ncols = n-i; 884 /* Now assemble all these values with a single function call */ 885 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 886 i = j; 887 } 888 } 889 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 890 /* Now process the block-stash. Since the values are stashed column-oriented, 891 set the roworiented flag to column oriented, and after MatSetValues() 892 restore the original flags */ 893 r1 = baij->roworiented; 894 r2 = a->roworiented; 895 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 896 baij->roworiented = PETSC_FALSE; 897 a->roworiented = PETSC_FALSE; 898 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 899 while (1) { 900 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 901 if (!flg) break; 902 903 for (i=0; i<n;) { 904 /* Now identify the consecutive vals belonging to the same row */ 905 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 906 if (j < n) ncols = j-i; 907 else ncols = n-i; 908 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 909 i = j; 910 } 911 } 912 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 913 baij->roworiented = r1; 914 a->roworiented = r2; 915 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 916 } 917 918 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 919 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 920 921 /* determine if any processor has disassembled, if so we must 922 also disassemble ourselfs, in order that we may reassemble. */ 923 /* 924 if nonzero structure of submatrix B cannot change then we know that 925 no processor disassembled thus we can skip this stuff 926 */ 927 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 928 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 929 if (mat->was_assembled && !other_disassembled) { 930 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 931 } 932 } 933 934 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 935 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 936 } 937 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 938 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 939 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 940 941 #if defined(PETSC_USE_INFO) 942 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 943 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 944 baij->ht_total_ct = 0; 945 baij->ht_insert_ct = 0; 946 } 947 #endif 948 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 949 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 950 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 951 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 952 } 953 954 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 955 baij->rowvalues = 0; 956 PetscFunctionReturn(0); 957 } 958 959 #undef __FUNCT__ 960 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 961 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 962 { 963 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 964 PetscErrorCode ierr; 965 PetscMPIInt size = baij->size,rank = baij->rank; 966 PetscInt bs = mat->rmap->bs; 967 PetscTruth iascii,isdraw; 968 PetscViewer sviewer; 969 PetscViewerFormat format; 970 971 PetscFunctionBegin; 972 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 973 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 974 if (iascii) { 975 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 976 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 977 MatInfo info; 978 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 979 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 980 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 981 rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 982 mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 983 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 984 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 985 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 986 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 987 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 988 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 989 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 990 PetscFunctionReturn(0); 991 } else if (format == PETSC_VIEWER_ASCII_INFO) { 992 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 995 PetscFunctionReturn(0); 996 } 997 } 998 999 if (isdraw) { 1000 PetscDraw draw; 1001 PetscTruth isnull; 1002 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1003 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1004 } 1005 1006 if (size == 1) { 1007 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1008 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1009 } else { 1010 /* assemble the entire matrix onto first processor. */ 1011 Mat A; 1012 Mat_SeqBAIJ *Aloc; 1013 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1014 MatScalar *a; 1015 1016 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1017 /* Perhaps this should be the type of mat? */ 1018 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1019 if (!rank) { 1020 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1021 } else { 1022 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1023 } 1024 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1025 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1026 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1027 1028 /* copy over the A part */ 1029 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1030 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1031 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1032 1033 for (i=0; i<mbs; i++) { 1034 rvals[0] = bs*(baij->rstartbs + i); 1035 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1036 for (j=ai[i]; j<ai[i+1]; j++) { 1037 col = (baij->cstartbs+aj[j])*bs; 1038 for (k=0; k<bs; k++) { 1039 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1040 col++; a += bs; 1041 } 1042 } 1043 } 1044 /* copy over the B part */ 1045 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1046 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1047 for (i=0; i<mbs; i++) { 1048 rvals[0] = bs*(baij->rstartbs + i); 1049 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1050 for (j=ai[i]; j<ai[i+1]; j++) { 1051 col = baij->garray[aj[j]]*bs; 1052 for (k=0; k<bs; k++) { 1053 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1054 col++; a += bs; 1055 } 1056 } 1057 } 1058 ierr = PetscFree(rvals);CHKERRQ(ierr); 1059 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1060 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1061 /* 1062 Everyone has to call to draw the matrix since the graphics waits are 1063 synchronized across all processors that share the PetscDraw object 1064 */ 1065 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1066 if (!rank) { 1067 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1068 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1069 } 1070 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1071 ierr = MatDestroy(A);CHKERRQ(ierr); 1072 } 1073 PetscFunctionReturn(0); 1074 } 1075 1076 #undef __FUNCT__ 1077 #define __FUNCT__ "MatView_MPIBAIJ" 1078 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1079 { 1080 PetscErrorCode ierr; 1081 PetscTruth iascii,isdraw,issocket,isbinary; 1082 1083 PetscFunctionBegin; 1084 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1085 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1086 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1087 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1088 if (iascii || isdraw || issocket || isbinary) { 1089 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1090 } else { 1091 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1092 } 1093 PetscFunctionReturn(0); 1094 } 1095 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1098 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1099 { 1100 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1101 PetscErrorCode ierr; 1102 1103 PetscFunctionBegin; 1104 #if defined(PETSC_USE_LOG) 1105 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1106 #endif 1107 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1108 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1109 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1110 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1111 #if defined (PETSC_USE_CTABLE) 1112 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 1113 #else 1114 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1115 #endif 1116 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1117 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1118 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1119 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1120 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1121 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1122 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1123 ierr = PetscFree(baij);CHKERRQ(ierr); 1124 1125 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1126 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1127 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1128 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1129 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1130 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1131 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1132 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1133 PetscFunctionReturn(0); 1134 } 1135 1136 #undef __FUNCT__ 1137 #define __FUNCT__ "MatMult_MPIBAIJ" 1138 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1139 { 1140 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1141 PetscErrorCode ierr; 1142 PetscInt nt; 1143 1144 PetscFunctionBegin; 1145 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1146 if (nt != A->cmap->n) { 1147 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1148 } 1149 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1150 if (nt != A->rmap->n) { 1151 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1152 } 1153 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1154 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1155 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1156 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1157 PetscFunctionReturn(0); 1158 } 1159 1160 #undef __FUNCT__ 1161 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1162 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1163 { 1164 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1165 PetscErrorCode ierr; 1166 1167 PetscFunctionBegin; 1168 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1169 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1170 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1171 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1172 PetscFunctionReturn(0); 1173 } 1174 1175 #undef __FUNCT__ 1176 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1177 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1178 { 1179 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1180 PetscErrorCode ierr; 1181 PetscTruth merged; 1182 1183 PetscFunctionBegin; 1184 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1185 /* do nondiagonal part */ 1186 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1187 if (!merged) { 1188 /* send it on its way */ 1189 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1190 /* do local part */ 1191 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1192 /* receive remote parts: note this assumes the values are not actually */ 1193 /* inserted in yy until the next line */ 1194 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1195 } else { 1196 /* do local part */ 1197 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1198 /* send it on its way */ 1199 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1200 /* values actually were received in the Begin() but we need to call this nop */ 1201 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1202 } 1203 PetscFunctionReturn(0); 1204 } 1205 1206 #undef __FUNCT__ 1207 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1208 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1209 { 1210 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1211 PetscErrorCode ierr; 1212 1213 PetscFunctionBegin; 1214 /* do nondiagonal part */ 1215 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1216 /* send it on its way */ 1217 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1218 /* do local part */ 1219 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1220 /* receive remote parts: note this assumes the values are not actually */ 1221 /* inserted in yy until the next line, which is true for my implementation*/ 1222 /* but is not perhaps always true. */ 1223 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 /* 1228 This only works correctly for square matrices where the subblock A->A is the 1229 diagonal block 1230 */ 1231 #undef __FUNCT__ 1232 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1233 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1234 { 1235 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1236 PetscErrorCode ierr; 1237 1238 PetscFunctionBegin; 1239 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1240 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatScale_MPIBAIJ" 1246 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1247 { 1248 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1249 PetscErrorCode ierr; 1250 1251 PetscFunctionBegin; 1252 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1253 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1254 PetscFunctionReturn(0); 1255 } 1256 1257 #undef __FUNCT__ 1258 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1259 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1260 { 1261 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1262 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1263 PetscErrorCode ierr; 1264 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1265 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1266 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1267 1268 PetscFunctionBegin; 1269 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1270 mat->getrowactive = PETSC_TRUE; 1271 1272 if (!mat->rowvalues && (idx || v)) { 1273 /* 1274 allocate enough space to hold information from the longest row. 1275 */ 1276 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1277 PetscInt max = 1,mbs = mat->mbs,tmp; 1278 for (i=0; i<mbs; i++) { 1279 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1280 if (max < tmp) { max = tmp; } 1281 } 1282 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1283 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1284 } 1285 1286 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1287 lrow = row - brstart; 1288 1289 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1290 if (!v) {pvA = 0; pvB = 0;} 1291 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1292 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1293 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1294 nztot = nzA + nzB; 1295 1296 cmap = mat->garray; 1297 if (v || idx) { 1298 if (nztot) { 1299 /* Sort by increasing column numbers, assuming A and B already sorted */ 1300 PetscInt imark = -1; 1301 if (v) { 1302 *v = v_p = mat->rowvalues; 1303 for (i=0; i<nzB; i++) { 1304 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1305 else break; 1306 } 1307 imark = i; 1308 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1309 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1310 } 1311 if (idx) { 1312 *idx = idx_p = mat->rowindices; 1313 if (imark > -1) { 1314 for (i=0; i<imark; i++) { 1315 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1316 } 1317 } else { 1318 for (i=0; i<nzB; i++) { 1319 if (cmap[cworkB[i]/bs] < cstart) 1320 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1321 else break; 1322 } 1323 imark = i; 1324 } 1325 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1326 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1327 } 1328 } else { 1329 if (idx) *idx = 0; 1330 if (v) *v = 0; 1331 } 1332 } 1333 *nz = nztot; 1334 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1335 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1336 PetscFunctionReturn(0); 1337 } 1338 1339 #undef __FUNCT__ 1340 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1341 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1342 { 1343 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1344 1345 PetscFunctionBegin; 1346 if (!baij->getrowactive) { 1347 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1348 } 1349 baij->getrowactive = PETSC_FALSE; 1350 PetscFunctionReturn(0); 1351 } 1352 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1355 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1356 { 1357 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1358 PetscErrorCode ierr; 1359 1360 PetscFunctionBegin; 1361 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1362 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1368 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1369 { 1370 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1371 Mat A = a->A,B = a->B; 1372 PetscErrorCode ierr; 1373 PetscReal isend[5],irecv[5]; 1374 1375 PetscFunctionBegin; 1376 info->block_size = (PetscReal)matin->rmap->bs; 1377 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1378 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1379 isend[3] = info->memory; isend[4] = info->mallocs; 1380 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1381 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1382 isend[3] += info->memory; isend[4] += info->mallocs; 1383 if (flag == MAT_LOCAL) { 1384 info->nz_used = isend[0]; 1385 info->nz_allocated = isend[1]; 1386 info->nz_unneeded = isend[2]; 1387 info->memory = isend[3]; 1388 info->mallocs = isend[4]; 1389 } else if (flag == MAT_GLOBAL_MAX) { 1390 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1391 info->nz_used = irecv[0]; 1392 info->nz_allocated = irecv[1]; 1393 info->nz_unneeded = irecv[2]; 1394 info->memory = irecv[3]; 1395 info->mallocs = irecv[4]; 1396 } else if (flag == MAT_GLOBAL_SUM) { 1397 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1398 info->nz_used = irecv[0]; 1399 info->nz_allocated = irecv[1]; 1400 info->nz_unneeded = irecv[2]; 1401 info->memory = irecv[3]; 1402 info->mallocs = irecv[4]; 1403 } else { 1404 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1405 } 1406 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1407 info->fill_ratio_needed = 0; 1408 info->factor_mallocs = 0; 1409 PetscFunctionReturn(0); 1410 } 1411 1412 #undef __FUNCT__ 1413 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1414 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg) 1415 { 1416 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1417 PetscErrorCode ierr; 1418 1419 PetscFunctionBegin; 1420 switch (op) { 1421 case MAT_NEW_NONZERO_LOCATIONS: 1422 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1423 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1424 case MAT_KEEP_NONZERO_PATTERN: 1425 case MAT_NEW_NONZERO_LOCATION_ERR: 1426 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1427 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1428 break; 1429 case MAT_ROW_ORIENTED: 1430 a->roworiented = flg; 1431 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1432 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1433 break; 1434 case MAT_NEW_DIAGONALS: 1435 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1436 break; 1437 case MAT_IGNORE_OFF_PROC_ENTRIES: 1438 a->donotstash = flg; 1439 break; 1440 case MAT_USE_HASH_TABLE: 1441 a->ht_flag = flg; 1442 break; 1443 case MAT_SYMMETRIC: 1444 case MAT_STRUCTURALLY_SYMMETRIC: 1445 case MAT_HERMITIAN: 1446 case MAT_SYMMETRY_ETERNAL: 1447 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1448 break; 1449 default: 1450 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1451 } 1452 PetscFunctionReturn(0); 1453 } 1454 1455 #undef __FUNCT__ 1456 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1457 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1458 { 1459 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1460 Mat_SeqBAIJ *Aloc; 1461 Mat B; 1462 PetscErrorCode ierr; 1463 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1464 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1465 MatScalar *a; 1466 1467 PetscFunctionBegin; 1468 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1469 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1470 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1471 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1472 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1473 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1474 } else { 1475 B = *matout; 1476 } 1477 1478 /* copy over the A part */ 1479 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1480 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1481 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1482 1483 for (i=0; i<mbs; i++) { 1484 rvals[0] = bs*(baij->rstartbs + i); 1485 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1486 for (j=ai[i]; j<ai[i+1]; j++) { 1487 col = (baij->cstartbs+aj[j])*bs; 1488 for (k=0; k<bs; k++) { 1489 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1490 col++; a += bs; 1491 } 1492 } 1493 } 1494 /* copy over the B part */ 1495 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1496 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1497 for (i=0; i<mbs; i++) { 1498 rvals[0] = bs*(baij->rstartbs + i); 1499 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1500 for (j=ai[i]; j<ai[i+1]; j++) { 1501 col = baij->garray[aj[j]]*bs; 1502 for (k=0; k<bs; k++) { 1503 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1504 col++; a += bs; 1505 } 1506 } 1507 } 1508 ierr = PetscFree(rvals);CHKERRQ(ierr); 1509 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1510 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1511 1512 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1513 *matout = B; 1514 } else { 1515 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1516 } 1517 PetscFunctionReturn(0); 1518 } 1519 1520 #undef __FUNCT__ 1521 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1522 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1523 { 1524 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1525 Mat a = baij->A,b = baij->B; 1526 PetscErrorCode ierr; 1527 PetscInt s1,s2,s3; 1528 1529 PetscFunctionBegin; 1530 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1531 if (rr) { 1532 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1533 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1534 /* Overlap communication with computation. */ 1535 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1536 } 1537 if (ll) { 1538 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1539 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1540 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1541 } 1542 /* scale the diagonal block */ 1543 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1544 1545 if (rr) { 1546 /* Do a scatter end and then right scale the off-diagonal block */ 1547 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1548 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1549 } 1550 1551 PetscFunctionReturn(0); 1552 } 1553 1554 #undef __FUNCT__ 1555 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1556 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 1557 { 1558 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1559 PetscErrorCode ierr; 1560 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1561 PetscInt i,*owners = A->rmap->range; 1562 PetscInt *nprocs,j,idx,nsends,row; 1563 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1564 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1565 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1566 MPI_Comm comm = ((PetscObject)A)->comm; 1567 MPI_Request *send_waits,*recv_waits; 1568 MPI_Status recv_status,*send_status; 1569 #if defined(PETSC_DEBUG) 1570 PetscTruth found = PETSC_FALSE; 1571 #endif 1572 1573 PetscFunctionBegin; 1574 /* first count number of contributors to each processor */ 1575 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1576 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1577 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1578 j = 0; 1579 for (i=0; i<N; i++) { 1580 if (lastidx > (idx = rows[i])) j = 0; 1581 lastidx = idx; 1582 for (; j<size; j++) { 1583 if (idx >= owners[j] && idx < owners[j+1]) { 1584 nprocs[2*j]++; 1585 nprocs[2*j+1] = 1; 1586 owner[i] = j; 1587 #if defined(PETSC_DEBUG) 1588 found = PETSC_TRUE; 1589 #endif 1590 break; 1591 } 1592 } 1593 #if defined(PETSC_DEBUG) 1594 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1595 found = PETSC_FALSE; 1596 #endif 1597 } 1598 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1599 1600 /* inform other processors of number of messages and max length*/ 1601 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1602 1603 /* post receives: */ 1604 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1605 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1606 for (i=0; i<nrecvs; i++) { 1607 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1608 } 1609 1610 /* do sends: 1611 1) starts[i] gives the starting index in svalues for stuff going to 1612 the ith processor 1613 */ 1614 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1615 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1616 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1617 starts[0] = 0; 1618 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1619 for (i=0; i<N; i++) { 1620 svalues[starts[owner[i]]++] = rows[i]; 1621 } 1622 1623 starts[0] = 0; 1624 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1625 count = 0; 1626 for (i=0; i<size; i++) { 1627 if (nprocs[2*i+1]) { 1628 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1629 } 1630 } 1631 ierr = PetscFree(starts);CHKERRQ(ierr); 1632 1633 base = owners[rank]; 1634 1635 /* wait on receives */ 1636 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 1637 source = lens + nrecvs; 1638 count = nrecvs; slen = 0; 1639 while (count) { 1640 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1641 /* unpack receives into our local space */ 1642 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1643 source[imdex] = recv_status.MPI_SOURCE; 1644 lens[imdex] = n; 1645 slen += n; 1646 count--; 1647 } 1648 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1649 1650 /* move the data into the send scatter */ 1651 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1652 count = 0; 1653 for (i=0; i<nrecvs; i++) { 1654 values = rvalues + i*nmax; 1655 for (j=0; j<lens[i]; j++) { 1656 lrows[count++] = values[j] - base; 1657 } 1658 } 1659 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1660 ierr = PetscFree(lens);CHKERRQ(ierr); 1661 ierr = PetscFree(owner);CHKERRQ(ierr); 1662 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1663 1664 /* actually zap the local rows */ 1665 /* 1666 Zero the required rows. If the "diagonal block" of the matrix 1667 is square and the user wishes to set the diagonal we use separate 1668 code so that MatSetValues() is not called for each diagonal allocating 1669 new memory, thus calling lots of mallocs and slowing things down. 1670 1671 */ 1672 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1673 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1674 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1675 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1676 } else if (diag != 0.0) { 1677 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1678 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1679 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1680 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1681 } 1682 for (i=0; i<slen; i++) { 1683 row = lrows[i] + rstart_bs; 1684 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1685 } 1686 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1687 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1688 } else { 1689 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1690 } 1691 1692 ierr = PetscFree(lrows);CHKERRQ(ierr); 1693 1694 /* wait on sends */ 1695 if (nsends) { 1696 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1697 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1698 ierr = PetscFree(send_status);CHKERRQ(ierr); 1699 } 1700 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1701 ierr = PetscFree(svalues);CHKERRQ(ierr); 1702 1703 PetscFunctionReturn(0); 1704 } 1705 1706 #undef __FUNCT__ 1707 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1708 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1709 { 1710 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1711 PetscErrorCode ierr; 1712 1713 PetscFunctionBegin; 1714 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1715 PetscFunctionReturn(0); 1716 } 1717 1718 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1719 1720 #undef __FUNCT__ 1721 #define __FUNCT__ "MatEqual_MPIBAIJ" 1722 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1723 { 1724 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1725 Mat a,b,c,d; 1726 PetscTruth flg; 1727 PetscErrorCode ierr; 1728 1729 PetscFunctionBegin; 1730 a = matA->A; b = matA->B; 1731 c = matB->A; d = matB->B; 1732 1733 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1734 if (flg) { 1735 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1736 } 1737 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1738 PetscFunctionReturn(0); 1739 } 1740 1741 #undef __FUNCT__ 1742 #define __FUNCT__ "MatCopy_MPIBAIJ" 1743 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1744 { 1745 PetscErrorCode ierr; 1746 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1747 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1748 1749 PetscFunctionBegin; 1750 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1751 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1752 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1753 } else { 1754 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1755 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1756 } 1757 PetscFunctionReturn(0); 1758 } 1759 1760 #undef __FUNCT__ 1761 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1762 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1763 { 1764 PetscErrorCode ierr; 1765 1766 PetscFunctionBegin; 1767 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1768 PetscFunctionReturn(0); 1769 } 1770 1771 #include "petscblaslapack.h" 1772 #undef __FUNCT__ 1773 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1774 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1775 { 1776 PetscErrorCode ierr; 1777 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1778 PetscBLASInt bnz,one=1; 1779 Mat_SeqBAIJ *x,*y; 1780 1781 PetscFunctionBegin; 1782 if (str == SAME_NONZERO_PATTERN) { 1783 PetscScalar alpha = a; 1784 x = (Mat_SeqBAIJ *)xx->A->data; 1785 y = (Mat_SeqBAIJ *)yy->A->data; 1786 bnz = PetscBLASIntCast(x->nz); 1787 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1788 x = (Mat_SeqBAIJ *)xx->B->data; 1789 y = (Mat_SeqBAIJ *)yy->B->data; 1790 bnz = PetscBLASIntCast(x->nz); 1791 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1792 } else { 1793 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1794 } 1795 PetscFunctionReturn(0); 1796 } 1797 1798 #undef __FUNCT__ 1799 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ" 1800 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs) 1801 { 1802 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1803 PetscErrorCode ierr; 1804 1805 PetscFunctionBegin; 1806 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1807 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1808 PetscFunctionReturn(0); 1809 } 1810 1811 #undef __FUNCT__ 1812 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1813 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1814 { 1815 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1816 PetscErrorCode ierr; 1817 1818 PetscFunctionBegin; 1819 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1820 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1821 PetscFunctionReturn(0); 1822 } 1823 1824 #undef __FUNCT__ 1825 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1826 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1827 { 1828 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1829 PetscErrorCode ierr; 1830 1831 PetscFunctionBegin; 1832 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1833 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1834 PetscFunctionReturn(0); 1835 } 1836 1837 #undef __FUNCT__ 1838 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 1839 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1840 { 1841 PetscErrorCode ierr; 1842 IS iscol_local; 1843 PetscInt csize; 1844 1845 PetscFunctionBegin; 1846 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1847 if (call == MAT_REUSE_MATRIX) { 1848 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1849 if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1850 } else { 1851 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1852 } 1853 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1854 if (call == MAT_INITIAL_MATRIX) { 1855 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1856 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 1857 } 1858 PetscFunctionReturn(0); 1859 } 1860 1861 #undef __FUNCT__ 1862 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 1863 /* 1864 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1865 in local and then by concatenating the local matrices the end result. 1866 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 1867 */ 1868 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 1869 { 1870 PetscErrorCode ierr; 1871 PetscMPIInt rank,size; 1872 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 1873 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 1874 Mat *local,M,Mreuse; 1875 MatScalar *vwork,*aa; 1876 MPI_Comm comm = ((PetscObject)mat)->comm; 1877 Mat_SeqBAIJ *aij; 1878 1879 1880 PetscFunctionBegin; 1881 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1882 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1883 1884 if (call == MAT_REUSE_MATRIX) { 1885 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1886 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1887 local = &Mreuse; 1888 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1889 } else { 1890 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1891 Mreuse = *local; 1892 ierr = PetscFree(local);CHKERRQ(ierr); 1893 } 1894 1895 /* 1896 m - number of local rows 1897 n - number of columns (same on all processors) 1898 rstart - first row in new global matrix generated 1899 */ 1900 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 1901 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1902 m = m/bs; 1903 n = n/bs; 1904 1905 if (call == MAT_INITIAL_MATRIX) { 1906 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1907 ii = aij->i; 1908 jj = aij->j; 1909 1910 /* 1911 Determine the number of non-zeros in the diagonal and off-diagonal 1912 portions of the matrix in order to do correct preallocation 1913 */ 1914 1915 /* first get start and end of "diagonal" columns */ 1916 if (csize == PETSC_DECIDE) { 1917 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 1918 if (mglobal == n*bs) { /* square matrix */ 1919 nlocal = m; 1920 } else { 1921 nlocal = n/size + ((n % size) > rank); 1922 } 1923 } else { 1924 nlocal = csize/bs; 1925 } 1926 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1927 rstart = rend - nlocal; 1928 if (rank == size - 1 && rend != n) { 1929 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 1930 } 1931 1932 /* next, compute all the lengths */ 1933 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 1934 olens = dlens + m; 1935 for (i=0; i<m; i++) { 1936 jend = ii[i+1] - ii[i]; 1937 olen = 0; 1938 dlen = 0; 1939 for (j=0; j<jend; j++) { 1940 if (*jj < rstart || *jj >= rend) olen++; 1941 else dlen++; 1942 jj++; 1943 } 1944 olens[i] = olen; 1945 dlens[i] = dlen; 1946 } 1947 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 1948 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 1949 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 1950 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 1951 ierr = PetscFree(dlens);CHKERRQ(ierr); 1952 } else { 1953 PetscInt ml,nl; 1954 1955 M = *newmat; 1956 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1957 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1958 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1959 /* 1960 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1961 rather than the slower MatSetValues(). 1962 */ 1963 M->was_assembled = PETSC_TRUE; 1964 M->assembled = PETSC_FALSE; 1965 } 1966 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1967 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1968 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1969 ii = aij->i; 1970 jj = aij->j; 1971 aa = aij->a; 1972 for (i=0; i<m; i++) { 1973 row = rstart/bs + i; 1974 nz = ii[i+1] - ii[i]; 1975 cwork = jj; jj += nz; 1976 vwork = aa; aa += nz; 1977 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1978 } 1979 1980 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1981 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1982 *newmat = M; 1983 1984 /* save submatrix used in processor for next request */ 1985 if (call == MAT_INITIAL_MATRIX) { 1986 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 1987 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 1988 } 1989 1990 PetscFunctionReturn(0); 1991 } 1992 1993 #undef __FUNCT__ 1994 #define __FUNCT__ "MatPermute_MPIBAIJ" 1995 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 1996 { 1997 MPI_Comm comm,pcomm; 1998 PetscInt first,local_size,nrows; 1999 const PetscInt *rows; 2000 PetscMPIInt size; 2001 IS crowp,growp,irowp,lrowp,lcolp,icolp; 2002 PetscErrorCode ierr; 2003 2004 PetscFunctionBegin; 2005 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2006 /* make a collective version of 'rowp' */ 2007 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2008 if (pcomm==comm) { 2009 crowp = rowp; 2010 } else { 2011 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2012 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2013 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 2014 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2015 } 2016 /* collect the global row permutation and invert it */ 2017 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2018 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2019 if (pcomm!=comm) { 2020 ierr = ISDestroy(crowp);CHKERRQ(ierr); 2021 } 2022 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2023 /* get the local target indices */ 2024 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2025 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2026 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2027 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 2028 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2029 ierr = ISDestroy(irowp);CHKERRQ(ierr); 2030 /* the column permutation is so much easier; 2031 make a local version of 'colp' and invert it */ 2032 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2033 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2034 if (size==1) { 2035 lcolp = colp; 2036 } else { 2037 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2038 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2039 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 2040 } 2041 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2042 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2043 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2044 if (size>1) { 2045 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2046 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 2047 } 2048 /* now we just get the submatrix */ 2049 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2050 /* clean up */ 2051 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 2052 ierr = ISDestroy(icolp);CHKERRQ(ierr); 2053 PetscFunctionReturn(0); 2054 } 2055 2056 #undef __FUNCT__ 2057 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2058 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2059 { 2060 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2061 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2062 2063 PetscFunctionBegin; 2064 if (nghosts) { *nghosts = B->nbs;} 2065 if (ghosts) {*ghosts = baij->garray;} 2066 PetscFunctionReturn(0); 2067 } 2068 2069 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat); 2070 2071 #undef __FUNCT__ 2072 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2073 /* 2074 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2075 */ 2076 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2077 { 2078 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2079 PetscErrorCode ierr; 2080 PetscMPIInt size,*ncolsonproc,*disp,nn; 2081 PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col; 2082 const PetscInt *is; 2083 PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj; 2084 PetscInt *rowhit,M,cstart,cend,colb; 2085 PetscInt *columnsforrow,l; 2086 IS *isa; 2087 PetscTruth done,flg; 2088 ISLocalToGlobalMapping map = mat->bmapping; 2089 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2090 2091 PetscFunctionBegin; 2092 if (!mat->assembled) { 2093 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2094 } 2095 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2096 2097 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2098 2099 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2100 M = mat->rmap->n/bs; 2101 cstart = mat->cmap->rstart/bs; 2102 cend = mat->cmap->rend/bs; 2103 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2104 c->N = mat->cmap->N/bs; 2105 c->m = mat->rmap->n/bs; 2106 c->rstart = mat->rmap->rstart/bs; 2107 2108 c->ncolors = nis; 2109 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2110 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2111 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2112 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2113 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2114 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2115 2116 /* Allow access to data structures of local part of matrix */ 2117 if (!baij->colmap) { 2118 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2119 } 2120 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2121 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2122 2123 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2124 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2125 2126 for (i=0; i<nis; i++) { 2127 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2128 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2129 c->ncolumns[i] = n; 2130 if (n) { 2131 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2132 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2133 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2134 } else { 2135 c->columns[i] = 0; 2136 } 2137 2138 if (ctype == IS_COLORING_GLOBAL){ 2139 /* Determine the total (parallel) number of columns of this color */ 2140 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2141 ierr = PetscMalloc(2*size*sizeof(PetscInt*),&ncolsonproc);CHKERRQ(ierr); 2142 disp = ncolsonproc + size; 2143 2144 nn = PetscMPIIntCast(n); 2145 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2146 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2147 if (!nctot) { 2148 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2149 } 2150 2151 disp[0] = 0; 2152 for (j=1; j<size; j++) { 2153 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2154 } 2155 2156 /* Get complete list of columns for color on each processor */ 2157 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2158 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2159 ierr = PetscFree(ncolsonproc);CHKERRQ(ierr); 2160 } else if (ctype == IS_COLORING_GHOSTED){ 2161 /* Determine local number of columns of this color on this process, including ghost points */ 2162 nctot = n; 2163 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2164 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2165 } else { 2166 SETERRQ(PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2167 } 2168 2169 /* 2170 Mark all rows affect by these columns 2171 */ 2172 /* Temporary option to allow for debugging/testing */ 2173 flg = PETSC_FALSE; 2174 ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2175 if (!flg) {/*-----------------------------------------------------------------------------*/ 2176 /* crude, fast version */ 2177 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2178 /* loop over columns*/ 2179 for (j=0; j<nctot; j++) { 2180 if (ctype == IS_COLORING_GHOSTED) { 2181 col = ltog[cols[j]]; 2182 } else { 2183 col = cols[j]; 2184 } 2185 if (col >= cstart && col < cend) { 2186 /* column is in diagonal block of matrix */ 2187 rows = A_cj + A_ci[col-cstart]; 2188 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2189 } else { 2190 #if defined (PETSC_USE_CTABLE) 2191 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr) 2192 colb --; 2193 #else 2194 colb = baij->colmap[col] - 1; 2195 #endif 2196 if (colb == -1) { 2197 m = 0; 2198 } else { 2199 colb = colb/bs; 2200 rows = B_cj + B_ci[colb]; 2201 m = B_ci[colb+1] - B_ci[colb]; 2202 } 2203 } 2204 /* loop over columns marking them in rowhit */ 2205 for (k=0; k<m; k++) { 2206 rowhit[*rows++] = col + 1; 2207 } 2208 } 2209 2210 /* count the number of hits */ 2211 nrows = 0; 2212 for (j=0; j<M; j++) { 2213 if (rowhit[j]) nrows++; 2214 } 2215 c->nrows[i] = nrows; 2216 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2217 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2218 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2219 nrows = 0; 2220 for (j=0; j<M; j++) { 2221 if (rowhit[j]) { 2222 c->rows[i][nrows] = j; 2223 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2224 nrows++; 2225 } 2226 } 2227 } else {/*-------------------------------------------------------------------------------*/ 2228 /* slow version, using rowhit as a linked list */ 2229 PetscInt currentcol,fm,mfm; 2230 rowhit[M] = M; 2231 nrows = 0; 2232 /* loop over columns*/ 2233 for (j=0; j<nctot; j++) { 2234 if (ctype == IS_COLORING_GHOSTED) { 2235 col = ltog[cols[j]]; 2236 } else { 2237 col = cols[j]; 2238 } 2239 if (col >= cstart && col < cend) { 2240 /* column is in diagonal block of matrix */ 2241 rows = A_cj + A_ci[col-cstart]; 2242 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2243 } else { 2244 #if defined (PETSC_USE_CTABLE) 2245 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2246 colb --; 2247 #else 2248 colb = baij->colmap[col] - 1; 2249 #endif 2250 if (colb == -1) { 2251 m = 0; 2252 } else { 2253 colb = colb/bs; 2254 rows = B_cj + B_ci[colb]; 2255 m = B_ci[colb+1] - B_ci[colb]; 2256 } 2257 } 2258 2259 /* loop over columns marking them in rowhit */ 2260 fm = M; /* fm points to first entry in linked list */ 2261 for (k=0; k<m; k++) { 2262 currentcol = *rows++; 2263 /* is it already in the list? */ 2264 do { 2265 mfm = fm; 2266 fm = rowhit[fm]; 2267 } while (fm < currentcol); 2268 /* not in list so add it */ 2269 if (fm != currentcol) { 2270 nrows++; 2271 columnsforrow[currentcol] = col; 2272 /* next three lines insert new entry into linked list */ 2273 rowhit[mfm] = currentcol; 2274 rowhit[currentcol] = fm; 2275 fm = currentcol; 2276 /* fm points to present position in list since we know the columns are sorted */ 2277 } else { 2278 SETERRQ(PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2279 } 2280 } 2281 } 2282 c->nrows[i] = nrows; 2283 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2284 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2285 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2286 /* now store the linked list of rows into c->rows[i] */ 2287 nrows = 0; 2288 fm = rowhit[M]; 2289 do { 2290 c->rows[i][nrows] = fm; 2291 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2292 fm = rowhit[fm]; 2293 } while (fm < M); 2294 } /* ---------------------------------------------------------------------------------------*/ 2295 ierr = PetscFree(cols);CHKERRQ(ierr); 2296 } 2297 2298 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2299 /* 2300 vscale will contain the "diagonal" on processor scalings followed by the off processor 2301 */ 2302 if (ctype == IS_COLORING_GLOBAL) { 2303 PetscInt *garray; 2304 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2305 for (i=0; i<baij->B->cmap->n/bs; i++) { 2306 for (j=0; j<bs; j++) { 2307 garray[i*bs+j] = bs*baij->garray[i]+j; 2308 } 2309 } 2310 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2311 ierr = PetscFree(garray);CHKERRQ(ierr); 2312 CHKMEMQ; 2313 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2314 for (k=0; k<c->ncolors; k++) { 2315 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2316 for (l=0; l<c->nrows[k]; l++) { 2317 col = c->columnsforrow[k][l]; 2318 if (col >= cstart && col < cend) { 2319 /* column is in diagonal block of matrix */ 2320 colb = col - cstart; 2321 } else { 2322 /* column is in "off-processor" part */ 2323 #if defined (PETSC_USE_CTABLE) 2324 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2325 colb --; 2326 #else 2327 colb = baij->colmap[col] - 1; 2328 #endif 2329 colb = colb/bs; 2330 colb += cend - cstart; 2331 } 2332 c->vscaleforrow[k][l] = colb; 2333 } 2334 } 2335 } else if (ctype == IS_COLORING_GHOSTED) { 2336 /* Get gtol mapping */ 2337 PetscInt N = mat->cmap->N, *gtol; 2338 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2339 for (i=0; i<N; i++) gtol[i] = -1; 2340 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2341 2342 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2343 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2344 for (k=0; k<c->ncolors; k++) { 2345 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2346 for (l=0; l<c->nrows[k]; l++) { 2347 col = c->columnsforrow[k][l]; /* global column index */ 2348 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2349 } 2350 } 2351 ierr = PetscFree(gtol);CHKERRQ(ierr); 2352 } 2353 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2354 2355 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2356 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2357 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2358 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2359 CHKMEMQ; 2360 PetscFunctionReturn(0); 2361 } 2362 2363 #undef __FUNCT__ 2364 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ" 2365 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat) 2366 { 2367 Mat B; 2368 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2369 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2370 Mat_SeqAIJ *b; 2371 PetscErrorCode ierr; 2372 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2373 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2374 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2375 2376 PetscFunctionBegin; 2377 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2378 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2379 2380 /* ---------------------------------------------------------------- 2381 Tell every processor the number of nonzeros per row 2382 */ 2383 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2384 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2385 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2386 } 2387 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2388 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2389 displs = recvcounts + size; 2390 for (i=0; i<size; i++) { 2391 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2392 displs[i] = A->rmap->range[i]/bs; 2393 } 2394 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2395 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2396 #else 2397 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2398 #endif 2399 /* --------------------------------------------------------------- 2400 Create the sequential matrix of the same type as the local block diagonal 2401 */ 2402 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2403 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2404 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2405 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2406 b = (Mat_SeqAIJ *)B->data; 2407 2408 /*-------------------------------------------------------------------- 2409 Copy my part of matrix column indices over 2410 */ 2411 sendcount = ad->nz + bd->nz; 2412 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2413 a_jsendbuf = ad->j; 2414 b_jsendbuf = bd->j; 2415 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2416 cnt = 0; 2417 for (i=0; i<n; i++) { 2418 2419 /* put in lower diagonal portion */ 2420 m = bd->i[i+1] - bd->i[i]; 2421 while (m > 0) { 2422 /* is it above diagonal (in bd (compressed) numbering) */ 2423 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2424 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2425 m--; 2426 } 2427 2428 /* put in diagonal portion */ 2429 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2430 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2431 } 2432 2433 /* put in upper diagonal portion */ 2434 while (m-- > 0) { 2435 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2436 } 2437 } 2438 if (cnt != sendcount) SETERRQ2(PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2439 2440 /*-------------------------------------------------------------------- 2441 Gather all column indices to all processors 2442 */ 2443 for (i=0; i<size; i++) { 2444 recvcounts[i] = 0; 2445 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2446 recvcounts[i] += lens[j]; 2447 } 2448 } 2449 displs[0] = 0; 2450 for (i=1; i<size; i++) { 2451 displs[i] = displs[i-1] + recvcounts[i-1]; 2452 } 2453 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2454 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2455 #else 2456 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2457 #endif 2458 /*-------------------------------------------------------------------- 2459 Assemble the matrix into useable form (note numerical values not yet set) 2460 */ 2461 /* set the b->ilen (length of each row) values */ 2462 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2463 /* set the b->i indices */ 2464 b->i[0] = 0; 2465 for (i=1; i<=A->rmap->N/bs; i++) { 2466 b->i[i] = b->i[i-1] + lens[i-1]; 2467 } 2468 ierr = PetscFree(lens);CHKERRQ(ierr); 2469 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2470 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2471 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2472 2473 if (A->symmetric){ 2474 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2475 } else if (A->hermitian) { 2476 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2477 } else if (A->structurally_symmetric) { 2478 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2479 } 2480 *newmat = B; 2481 PetscFunctionReturn(0); 2482 } 2483 2484 #undef __FUNCT__ 2485 #define __FUNCT__ "MatSOR_MPIBAIJ" 2486 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2487 { 2488 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2489 PetscErrorCode ierr; 2490 Vec bb1 = 0; 2491 2492 PetscFunctionBegin; 2493 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2494 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2495 } 2496 2497 if (flag == SOR_APPLY_UPPER) { 2498 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2499 PetscFunctionReturn(0); 2500 } 2501 2502 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2503 if (flag & SOR_ZERO_INITIAL_GUESS) { 2504 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2505 its--; 2506 } 2507 2508 while (its--) { 2509 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2510 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2511 2512 /* update rhs: bb1 = bb - B*x */ 2513 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2514 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2515 2516 /* local sweep */ 2517 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2518 } 2519 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2520 if (flag & SOR_ZERO_INITIAL_GUESS) { 2521 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2522 its--; 2523 } 2524 while (its--) { 2525 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2526 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2527 2528 /* update rhs: bb1 = bb - B*x */ 2529 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2530 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2531 2532 /* local sweep */ 2533 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2534 } 2535 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2536 if (flag & SOR_ZERO_INITIAL_GUESS) { 2537 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2538 its--; 2539 } 2540 while (its--) { 2541 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2542 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2543 2544 /* update rhs: bb1 = bb - B*x */ 2545 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2546 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2547 2548 /* local sweep */ 2549 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2550 } 2551 } else { 2552 SETERRQ(PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2553 } 2554 2555 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 2556 PetscFunctionReturn(0); 2557 } 2558 2559 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2560 2561 2562 /* -------------------------------------------------------------------*/ 2563 static struct _MatOps MatOps_Values = { 2564 MatSetValues_MPIBAIJ, 2565 MatGetRow_MPIBAIJ, 2566 MatRestoreRow_MPIBAIJ, 2567 MatMult_MPIBAIJ, 2568 /* 4*/ MatMultAdd_MPIBAIJ, 2569 MatMultTranspose_MPIBAIJ, 2570 MatMultTransposeAdd_MPIBAIJ, 2571 0, 2572 0, 2573 0, 2574 /*10*/ 0, 2575 0, 2576 0, 2577 MatSOR_MPIBAIJ, 2578 MatTranspose_MPIBAIJ, 2579 /*15*/ MatGetInfo_MPIBAIJ, 2580 MatEqual_MPIBAIJ, 2581 MatGetDiagonal_MPIBAIJ, 2582 MatDiagonalScale_MPIBAIJ, 2583 MatNorm_MPIBAIJ, 2584 /*20*/ MatAssemblyBegin_MPIBAIJ, 2585 MatAssemblyEnd_MPIBAIJ, 2586 MatSetOption_MPIBAIJ, 2587 MatZeroEntries_MPIBAIJ, 2588 /*24*/ MatZeroRows_MPIBAIJ, 2589 0, 2590 0, 2591 0, 2592 0, 2593 /*29*/ MatSetUpPreallocation_MPIBAIJ, 2594 0, 2595 0, 2596 0, 2597 0, 2598 /*34*/ MatDuplicate_MPIBAIJ, 2599 0, 2600 0, 2601 0, 2602 0, 2603 /*39*/ MatAXPY_MPIBAIJ, 2604 MatGetSubMatrices_MPIBAIJ, 2605 MatIncreaseOverlap_MPIBAIJ, 2606 MatGetValues_MPIBAIJ, 2607 MatCopy_MPIBAIJ, 2608 /*44*/ 0, 2609 MatScale_MPIBAIJ, 2610 0, 2611 0, 2612 0, 2613 /*49*/ MatSetBlockSize_MPIBAIJ, 2614 0, 2615 0, 2616 0, 2617 0, 2618 /*54*/ MatFDColoringCreate_MPIBAIJ, 2619 0, 2620 MatSetUnfactored_MPIBAIJ, 2621 MatPermute_MPIBAIJ, 2622 MatSetValuesBlocked_MPIBAIJ, 2623 /*59*/ MatGetSubMatrix_MPIBAIJ, 2624 MatDestroy_MPIBAIJ, 2625 MatView_MPIBAIJ, 2626 0, 2627 0, 2628 /*64*/ 0, 2629 0, 2630 0, 2631 0, 2632 0, 2633 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2634 0, 2635 0, 2636 0, 2637 0, 2638 /*74*/ 0, 2639 MatFDColoringApply_BAIJ, 2640 0, 2641 0, 2642 0, 2643 /*79*/ 0, 2644 0, 2645 0, 2646 0, 2647 MatLoad_MPIBAIJ, 2648 /*84*/ 0, 2649 0, 2650 0, 2651 0, 2652 0, 2653 /*89*/ 0, 2654 0, 2655 0, 2656 0, 2657 0, 2658 /*94*/ 0, 2659 0, 2660 0, 2661 0, 2662 0, 2663 /*99*/ 0, 2664 0, 2665 0, 2666 0, 2667 0, 2668 /*104*/0, 2669 MatRealPart_MPIBAIJ, 2670 MatImaginaryPart_MPIBAIJ, 2671 0, 2672 0, 2673 /*109*/0, 2674 0, 2675 0, 2676 0, 2677 0, 2678 /*114*/MatGetSeqNonzerostructure_MPIBAIJ, 2679 0, 2680 MatGetGhosts_MPIBAIJ 2681 }; 2682 2683 EXTERN_C_BEGIN 2684 #undef __FUNCT__ 2685 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2686 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2687 { 2688 PetscFunctionBegin; 2689 *a = ((Mat_MPIBAIJ *)A->data)->A; 2690 *iscopy = PETSC_FALSE; 2691 PetscFunctionReturn(0); 2692 } 2693 EXTERN_C_END 2694 2695 EXTERN_C_BEGIN 2696 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2697 EXTERN_C_END 2698 2699 EXTERN_C_BEGIN 2700 #undef __FUNCT__ 2701 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2702 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2703 { 2704 PetscInt m,rstart,cstart,cend; 2705 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2706 const PetscInt *JJ=0; 2707 PetscScalar *values=0; 2708 PetscErrorCode ierr; 2709 2710 PetscFunctionBegin; 2711 2712 if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2713 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2714 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2715 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2716 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2717 m = B->rmap->n/bs; 2718 rstart = B->rmap->rstart/bs; 2719 cstart = B->cmap->rstart/bs; 2720 cend = B->cmap->rend/bs; 2721 2722 if (ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2723 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2724 o_nnz = d_nnz + m; 2725 for (i=0; i<m; i++) { 2726 nz = ii[i+1] - ii[i]; 2727 if (nz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2728 nz_max = PetscMax(nz_max,nz); 2729 JJ = jj + ii[i]; 2730 for (j=0; j<nz; j++) { 2731 if (*JJ >= cstart) break; 2732 JJ++; 2733 } 2734 d = 0; 2735 for (; j<nz; j++) { 2736 if (*JJ++ >= cend) break; 2737 d++; 2738 } 2739 d_nnz[i] = d; 2740 o_nnz[i] = nz - d; 2741 } 2742 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2743 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2744 2745 values = (PetscScalar*)V; 2746 if (!values) { 2747 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2748 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2749 } 2750 for (i=0; i<m; i++) { 2751 PetscInt row = i + rstart; 2752 PetscInt ncols = ii[i+1] - ii[i]; 2753 const PetscInt *icols = jj + ii[i]; 2754 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2755 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2756 } 2757 2758 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2759 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2760 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2761 2762 PetscFunctionReturn(0); 2763 } 2764 EXTERN_C_END 2765 2766 #undef __FUNCT__ 2767 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2768 /*@C 2769 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2770 (the default parallel PETSc format). 2771 2772 Collective on MPI_Comm 2773 2774 Input Parameters: 2775 + A - the matrix 2776 . i - the indices into j for the start of each local row (starts with zero) 2777 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2778 - v - optional values in the matrix 2779 2780 Level: developer 2781 2782 .keywords: matrix, aij, compressed row, sparse, parallel 2783 2784 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2785 @*/ 2786 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2787 { 2788 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2789 2790 PetscFunctionBegin; 2791 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2792 if (f) { 2793 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2794 } 2795 PetscFunctionReturn(0); 2796 } 2797 2798 EXTERN_C_BEGIN 2799 #undef __FUNCT__ 2800 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2801 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2802 { 2803 Mat_MPIBAIJ *b; 2804 PetscErrorCode ierr; 2805 PetscInt i, newbs = PetscAbs(bs); 2806 2807 PetscFunctionBegin; 2808 if (bs < 0) { 2809 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2810 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2811 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2812 bs = PetscAbs(bs); 2813 } 2814 if ((d_nnz || o_nnz) && newbs != bs) { 2815 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 2816 } 2817 bs = newbs; 2818 2819 2820 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2821 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2822 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2823 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2824 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2825 2826 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2827 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2828 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2829 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2830 2831 if (d_nnz) { 2832 for (i=0; i<B->rmap->n/bs; i++) { 2833 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2834 } 2835 } 2836 if (o_nnz) { 2837 for (i=0; i<B->rmap->n/bs; i++) { 2838 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2839 } 2840 } 2841 2842 b = (Mat_MPIBAIJ*)B->data; 2843 b->bs2 = bs*bs; 2844 b->mbs = B->rmap->n/bs; 2845 b->nbs = B->cmap->n/bs; 2846 b->Mbs = B->rmap->N/bs; 2847 b->Nbs = B->cmap->N/bs; 2848 2849 for (i=0; i<=b->size; i++) { 2850 b->rangebs[i] = B->rmap->range[i]/bs; 2851 } 2852 b->rstartbs = B->rmap->rstart/bs; 2853 b->rendbs = B->rmap->rend/bs; 2854 b->cstartbs = B->cmap->rstart/bs; 2855 b->cendbs = B->cmap->rend/bs; 2856 2857 if (!B->preallocated) { 2858 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2859 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2860 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2861 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2862 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2863 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2864 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2865 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2866 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 2867 } 2868 2869 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2870 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2871 B->preallocated = PETSC_TRUE; 2872 PetscFunctionReturn(0); 2873 } 2874 EXTERN_C_END 2875 2876 EXTERN_C_BEGIN 2877 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2878 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2879 EXTERN_C_END 2880 2881 2882 EXTERN_C_BEGIN 2883 #undef __FUNCT__ 2884 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 2885 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj) 2886 { 2887 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2888 PetscErrorCode ierr; 2889 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2890 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2891 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2892 2893 PetscFunctionBegin; 2894 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 2895 ii[0] = 0; 2896 CHKMEMQ; 2897 for (i=0; i<M; i++) { 2898 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2899 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2900 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2901 /* remove one from count of matrix has diagonal */ 2902 for (j=id[i]; j<id[i+1]; j++) { 2903 if (jd[j] == i) {ii[i+1]--;break;} 2904 } 2905 CHKMEMQ; 2906 } 2907 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2908 cnt = 0; 2909 for (i=0; i<M; i++) { 2910 for (j=io[i]; j<io[i+1]; j++) { 2911 if (garray[jo[j]] > rstart) break; 2912 jj[cnt++] = garray[jo[j]]; 2913 CHKMEMQ; 2914 } 2915 for (k=id[i]; k<id[i+1]; k++) { 2916 if (jd[k] != i) { 2917 jj[cnt++] = rstart + jd[k]; 2918 CHKMEMQ; 2919 } 2920 } 2921 for (;j<io[i+1]; j++) { 2922 jj[cnt++] = garray[jo[j]]; 2923 CHKMEMQ; 2924 } 2925 } 2926 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 2927 PetscFunctionReturn(0); 2928 } 2929 EXTERN_C_END 2930 2931 /*MC 2932 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2933 2934 Options Database Keys: 2935 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2936 . -mat_block_size <bs> - set the blocksize used to store the matrix 2937 - -mat_use_hash_table <fact> 2938 2939 Level: beginner 2940 2941 .seealso: MatCreateMPIBAIJ 2942 M*/ 2943 2944 EXTERN_C_BEGIN 2945 #undef __FUNCT__ 2946 #define __FUNCT__ "MatCreate_MPIBAIJ" 2947 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2948 { 2949 Mat_MPIBAIJ *b; 2950 PetscErrorCode ierr; 2951 PetscTruth flg; 2952 2953 PetscFunctionBegin; 2954 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2955 B->data = (void*)b; 2956 2957 2958 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2959 B->mapping = 0; 2960 B->assembled = PETSC_FALSE; 2961 2962 B->insertmode = NOT_SET_VALUES; 2963 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 2964 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 2965 2966 /* build local table of row and column ownerships */ 2967 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 2968 2969 /* build cache for off array entries formed */ 2970 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 2971 b->donotstash = PETSC_FALSE; 2972 b->colmap = PETSC_NULL; 2973 b->garray = PETSC_NULL; 2974 b->roworiented = PETSC_TRUE; 2975 2976 /* stuff used in block assembly */ 2977 b->barray = 0; 2978 2979 /* stuff used for matrix vector multiply */ 2980 b->lvec = 0; 2981 b->Mvctx = 0; 2982 2983 /* stuff for MatGetRow() */ 2984 b->rowindices = 0; 2985 b->rowvalues = 0; 2986 b->getrowactive = PETSC_FALSE; 2987 2988 /* hash table stuff */ 2989 b->ht = 0; 2990 b->hd = 0; 2991 b->ht_size = 0; 2992 b->ht_flag = PETSC_FALSE; 2993 b->ht_fact = 0; 2994 b->ht_total_ct = 0; 2995 b->ht_insert_ct = 0; 2996 2997 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 2998 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2999 if (flg) { 3000 PetscReal fact = 1.39; 3001 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3002 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 3003 if (fact <= 1.0) fact = 1.39; 3004 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3005 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3006 } 3007 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3008 3009 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3010 "MatConvert_MPIBAIJ_MPIAdj", 3011 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3012 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3013 "MatStoreValues_MPIBAIJ", 3014 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3015 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3016 "MatRetrieveValues_MPIBAIJ", 3017 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3018 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3019 "MatGetDiagonalBlock_MPIBAIJ", 3020 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3021 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3022 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3023 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3024 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3025 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3026 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3027 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3028 "MatDiagonalScaleLocal_MPIBAIJ", 3029 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3030 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3031 "MatSetHashTableFactor_MPIBAIJ", 3032 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3033 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3034 PetscFunctionReturn(0); 3035 } 3036 EXTERN_C_END 3037 3038 /*MC 3039 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3040 3041 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3042 and MATMPIBAIJ otherwise. 3043 3044 Options Database Keys: 3045 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3046 3047 Level: beginner 3048 3049 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3050 M*/ 3051 3052 EXTERN_C_BEGIN 3053 #undef __FUNCT__ 3054 #define __FUNCT__ "MatCreate_BAIJ" 3055 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 3056 { 3057 PetscErrorCode ierr; 3058 PetscMPIInt size; 3059 3060 PetscFunctionBegin; 3061 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3062 if (size == 1) { 3063 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 3064 } else { 3065 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 3066 } 3067 PetscFunctionReturn(0); 3068 } 3069 EXTERN_C_END 3070 3071 #undef __FUNCT__ 3072 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3073 /*@C 3074 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3075 (block compressed row). For good matrix assembly performance 3076 the user should preallocate the matrix storage by setting the parameters 3077 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3078 performance can be increased by more than a factor of 50. 3079 3080 Collective on Mat 3081 3082 Input Parameters: 3083 + A - the matrix 3084 . bs - size of blockk 3085 . d_nz - number of block nonzeros per block row in diagonal portion of local 3086 submatrix (same for all local rows) 3087 . d_nnz - array containing the number of block nonzeros in the various block rows 3088 of the in diagonal portion of the local (possibly different for each block 3089 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3090 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3091 submatrix (same for all local rows). 3092 - o_nnz - array containing the number of nonzeros in the various block rows of the 3093 off-diagonal portion of the local submatrix (possibly different for 3094 each block row) or PETSC_NULL. 3095 3096 If the *_nnz parameter is given then the *_nz parameter is ignored 3097 3098 Options Database Keys: 3099 + -mat_block_size - size of the blocks to use 3100 - -mat_use_hash_table <fact> 3101 3102 Notes: 3103 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3104 than it must be used on all processors that share the object for that argument. 3105 3106 Storage Information: 3107 For a square global matrix we define each processor's diagonal portion 3108 to be its local rows and the corresponding columns (a square submatrix); 3109 each processor's off-diagonal portion encompasses the remainder of the 3110 local matrix (a rectangular submatrix). 3111 3112 The user can specify preallocated storage for the diagonal part of 3113 the local submatrix with either d_nz or d_nnz (not both). Set 3114 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3115 memory allocation. Likewise, specify preallocated storage for the 3116 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3117 3118 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3119 the figure below we depict these three local rows and all columns (0-11). 3120 3121 .vb 3122 0 1 2 3 4 5 6 7 8 9 10 11 3123 ------------------- 3124 row 3 | o o o d d d o o o o o o 3125 row 4 | o o o d d d o o o o o o 3126 row 5 | o o o d d d o o o o o o 3127 ------------------- 3128 .ve 3129 3130 Thus, any entries in the d locations are stored in the d (diagonal) 3131 submatrix, and any entries in the o locations are stored in the 3132 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3133 stored simply in the MATSEQBAIJ format for compressed row storage. 3134 3135 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3136 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3137 In general, for PDE problems in which most nonzeros are near the diagonal, 3138 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3139 or you will get TERRIBLE performance; see the users' manual chapter on 3140 matrices. 3141 3142 You can call MatGetInfo() to get information on how effective the preallocation was; 3143 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3144 You can also run with the option -info and look for messages with the string 3145 malloc in them to see if additional memory allocation was needed. 3146 3147 Level: intermediate 3148 3149 .keywords: matrix, block, aij, compressed row, sparse, parallel 3150 3151 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 3152 @*/ 3153 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3154 { 3155 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3156 3157 PetscFunctionBegin; 3158 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3159 if (f) { 3160 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3161 } 3162 PetscFunctionReturn(0); 3163 } 3164 3165 #undef __FUNCT__ 3166 #define __FUNCT__ "MatCreateMPIBAIJ" 3167 /*@C 3168 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 3169 (block compressed row). For good matrix assembly performance 3170 the user should preallocate the matrix storage by setting the parameters 3171 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3172 performance can be increased by more than a factor of 50. 3173 3174 Collective on MPI_Comm 3175 3176 Input Parameters: 3177 + comm - MPI communicator 3178 . bs - size of blockk 3179 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3180 This value should be the same as the local size used in creating the 3181 y vector for the matrix-vector product y = Ax. 3182 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3183 This value should be the same as the local size used in creating the 3184 x vector for the matrix-vector product y = Ax. 3185 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3186 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3187 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3188 submatrix (same for all local rows) 3189 . d_nnz - array containing the number of nonzero blocks in the various block rows 3190 of the in diagonal portion of the local (possibly different for each block 3191 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3192 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3193 submatrix (same for all local rows). 3194 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3195 off-diagonal portion of the local submatrix (possibly different for 3196 each block row) or PETSC_NULL. 3197 3198 Output Parameter: 3199 . A - the matrix 3200 3201 Options Database Keys: 3202 + -mat_block_size - size of the blocks to use 3203 - -mat_use_hash_table <fact> 3204 3205 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3206 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3207 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3208 3209 Notes: 3210 If the *_nnz parameter is given then the *_nz parameter is ignored 3211 3212 A nonzero block is any block that as 1 or more nonzeros in it 3213 3214 The user MUST specify either the local or global matrix dimensions 3215 (possibly both). 3216 3217 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3218 than it must be used on all processors that share the object for that argument. 3219 3220 Storage Information: 3221 For a square global matrix we define each processor's diagonal portion 3222 to be its local rows and the corresponding columns (a square submatrix); 3223 each processor's off-diagonal portion encompasses the remainder of the 3224 local matrix (a rectangular submatrix). 3225 3226 The user can specify preallocated storage for the diagonal part of 3227 the local submatrix with either d_nz or d_nnz (not both). Set 3228 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3229 memory allocation. Likewise, specify preallocated storage for the 3230 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3231 3232 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3233 the figure below we depict these three local rows and all columns (0-11). 3234 3235 .vb 3236 0 1 2 3 4 5 6 7 8 9 10 11 3237 ------------------- 3238 row 3 | o o o d d d o o o o o o 3239 row 4 | o o o d d d o o o o o o 3240 row 5 | o o o d d d o o o o o o 3241 ------------------- 3242 .ve 3243 3244 Thus, any entries in the d locations are stored in the d (diagonal) 3245 submatrix, and any entries in the o locations are stored in the 3246 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3247 stored simply in the MATSEQBAIJ format for compressed row storage. 3248 3249 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3250 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3251 In general, for PDE problems in which most nonzeros are near the diagonal, 3252 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3253 or you will get TERRIBLE performance; see the users' manual chapter on 3254 matrices. 3255 3256 Level: intermediate 3257 3258 .keywords: matrix, block, aij, compressed row, sparse, parallel 3259 3260 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3261 @*/ 3262 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3263 { 3264 PetscErrorCode ierr; 3265 PetscMPIInt size; 3266 3267 PetscFunctionBegin; 3268 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3269 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3270 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3271 if (size > 1) { 3272 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3273 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3274 } else { 3275 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3276 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3277 } 3278 PetscFunctionReturn(0); 3279 } 3280 3281 #undef __FUNCT__ 3282 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3283 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3284 { 3285 Mat mat; 3286 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3287 PetscErrorCode ierr; 3288 PetscInt len=0; 3289 3290 PetscFunctionBegin; 3291 *newmat = 0; 3292 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3293 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3294 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3295 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3296 3297 mat->factor = matin->factor; 3298 mat->preallocated = PETSC_TRUE; 3299 mat->assembled = PETSC_TRUE; 3300 mat->insertmode = NOT_SET_VALUES; 3301 3302 a = (Mat_MPIBAIJ*)mat->data; 3303 mat->rmap->bs = matin->rmap->bs; 3304 a->bs2 = oldmat->bs2; 3305 a->mbs = oldmat->mbs; 3306 a->nbs = oldmat->nbs; 3307 a->Mbs = oldmat->Mbs; 3308 a->Nbs = oldmat->Nbs; 3309 3310 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3311 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3312 3313 a->size = oldmat->size; 3314 a->rank = oldmat->rank; 3315 a->donotstash = oldmat->donotstash; 3316 a->roworiented = oldmat->roworiented; 3317 a->rowindices = 0; 3318 a->rowvalues = 0; 3319 a->getrowactive = PETSC_FALSE; 3320 a->barray = 0; 3321 a->rstartbs = oldmat->rstartbs; 3322 a->rendbs = oldmat->rendbs; 3323 a->cstartbs = oldmat->cstartbs; 3324 a->cendbs = oldmat->cendbs; 3325 3326 /* hash table stuff */ 3327 a->ht = 0; 3328 a->hd = 0; 3329 a->ht_size = 0; 3330 a->ht_flag = oldmat->ht_flag; 3331 a->ht_fact = oldmat->ht_fact; 3332 a->ht_total_ct = 0; 3333 a->ht_insert_ct = 0; 3334 3335 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3336 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 3337 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3338 if (oldmat->colmap) { 3339 #if defined (PETSC_USE_CTABLE) 3340 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3341 #else 3342 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3343 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3344 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3345 #endif 3346 } else a->colmap = 0; 3347 3348 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3349 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3350 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3351 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3352 } else a->garray = 0; 3353 3354 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3355 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3356 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3357 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3358 3359 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3360 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3361 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3362 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3363 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3364 *newmat = mat; 3365 3366 PetscFunctionReturn(0); 3367 } 3368 3369 #include "petscsys.h" 3370 3371 #undef __FUNCT__ 3372 #define __FUNCT__ "MatLoad_MPIBAIJ" 3373 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 3374 { 3375 Mat A; 3376 PetscErrorCode ierr; 3377 int fd; 3378 PetscInt i,nz,j,rstart,rend; 3379 PetscScalar *vals,*buf; 3380 MPI_Comm comm = ((PetscObject)viewer)->comm; 3381 MPI_Status status; 3382 PetscMPIInt rank,size,maxnz; 3383 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3384 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3385 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3386 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3387 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3388 PetscInt dcount,kmax,k,nzcount,tmp,mend; 3389 3390 PetscFunctionBegin; 3391 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3392 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3393 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3394 3395 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3396 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3397 if (!rank) { 3398 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3399 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3400 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3401 } 3402 3403 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3404 M = header[1]; N = header[2]; 3405 3406 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 3407 3408 /* 3409 This code adds extra rows to make sure the number of rows is 3410 divisible by the blocksize 3411 */ 3412 Mbs = M/bs; 3413 extra_rows = bs - M + bs*Mbs; 3414 if (extra_rows == bs) extra_rows = 0; 3415 else Mbs++; 3416 if (extra_rows && !rank) { 3417 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3418 } 3419 3420 /* determine ownership of all rows */ 3421 mbs = Mbs/size + ((Mbs % size) > rank); 3422 m = mbs*bs; 3423 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3424 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3425 3426 /* process 0 needs enough room for process with most rows */ 3427 if (!rank) { 3428 mmax = rowners[1]; 3429 for (i=2; i<size; i++) { 3430 mmax = PetscMax(mmax,rowners[i]); 3431 } 3432 mmax*=bs; 3433 } else mmax = m; 3434 3435 rowners[0] = 0; 3436 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3437 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3438 rstart = rowners[rank]; 3439 rend = rowners[rank+1]; 3440 3441 /* distribute row lengths to all processors */ 3442 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3443 if (!rank) { 3444 mend = m; 3445 if (size == 1) mend = mend - extra_rows; 3446 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3447 for (j=mend; j<m; j++) locrowlens[j] = 1; 3448 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3449 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3450 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3451 for (j=0; j<m; j++) { 3452 procsnz[0] += locrowlens[j]; 3453 } 3454 for (i=1; i<size; i++) { 3455 mend = browners[i+1] - browners[i]; 3456 if (i == size-1) mend = mend - extra_rows; 3457 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3458 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3459 /* calculate the number of nonzeros on each processor */ 3460 for (j=0; j<browners[i+1]-browners[i]; j++) { 3461 procsnz[i] += rowlengths[j]; 3462 } 3463 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3464 } 3465 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3466 } else { 3467 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3468 } 3469 3470 if (!rank) { 3471 /* determine max buffer needed and allocate it */ 3472 maxnz = procsnz[0]; 3473 for (i=1; i<size; i++) { 3474 maxnz = PetscMax(maxnz,procsnz[i]); 3475 } 3476 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3477 3478 /* read in my part of the matrix column indices */ 3479 nz = procsnz[0]; 3480 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3481 mycols = ibuf; 3482 if (size == 1) nz -= extra_rows; 3483 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3484 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3485 3486 /* read in every ones (except the last) and ship off */ 3487 for (i=1; i<size-1; i++) { 3488 nz = procsnz[i]; 3489 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3490 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3491 } 3492 /* read in the stuff for the last proc */ 3493 if (size != 1) { 3494 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3495 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3496 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3497 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3498 } 3499 ierr = PetscFree(cols);CHKERRQ(ierr); 3500 } else { 3501 /* determine buffer space needed for message */ 3502 nz = 0; 3503 for (i=0; i<m; i++) { 3504 nz += locrowlens[i]; 3505 } 3506 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3507 mycols = ibuf; 3508 /* receive message of column indices*/ 3509 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3510 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3511 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3512 } 3513 3514 /* loop over local rows, determining number of off diagonal entries */ 3515 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3516 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3517 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3518 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3519 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3520 rowcount = 0; nzcount = 0; 3521 for (i=0; i<mbs; i++) { 3522 dcount = 0; 3523 odcount = 0; 3524 for (j=0; j<bs; j++) { 3525 kmax = locrowlens[rowcount]; 3526 for (k=0; k<kmax; k++) { 3527 tmp = mycols[nzcount++]/bs; 3528 if (!mask[tmp]) { 3529 mask[tmp] = 1; 3530 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3531 else masked1[dcount++] = tmp; 3532 } 3533 } 3534 rowcount++; 3535 } 3536 3537 dlens[i] = dcount; 3538 odlens[i] = odcount; 3539 3540 /* zero out the mask elements we set */ 3541 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3542 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3543 } 3544 3545 /* create our matrix */ 3546 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 3547 ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3548 ierr = MatSetType(A,type);CHKERRQ(ierr) 3549 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3550 3551 if (!rank) { 3552 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3553 /* read in my part of the matrix numerical values */ 3554 nz = procsnz[0]; 3555 vals = buf; 3556 mycols = ibuf; 3557 if (size == 1) nz -= extra_rows; 3558 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3559 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3560 3561 /* insert into matrix */ 3562 jj = rstart*bs; 3563 for (i=0; i<m; i++) { 3564 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3565 mycols += locrowlens[i]; 3566 vals += locrowlens[i]; 3567 jj++; 3568 } 3569 /* read in other processors (except the last one) and ship out */ 3570 for (i=1; i<size-1; i++) { 3571 nz = procsnz[i]; 3572 vals = buf; 3573 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3574 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3575 } 3576 /* the last proc */ 3577 if (size != 1){ 3578 nz = procsnz[i] - extra_rows; 3579 vals = buf; 3580 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3581 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3582 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3583 } 3584 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3585 } else { 3586 /* receive numeric values */ 3587 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3588 3589 /* receive message of values*/ 3590 vals = buf; 3591 mycols = ibuf; 3592 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 3593 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3594 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3595 3596 /* insert into matrix */ 3597 jj = rstart*bs; 3598 for (i=0; i<m; i++) { 3599 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3600 mycols += locrowlens[i]; 3601 vals += locrowlens[i]; 3602 jj++; 3603 } 3604 } 3605 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3606 ierr = PetscFree(buf);CHKERRQ(ierr); 3607 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3608 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3609 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3610 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3611 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3612 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3613 3614 *newmat = A; 3615 PetscFunctionReturn(0); 3616 } 3617 3618 #undef __FUNCT__ 3619 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3620 /*@ 3621 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3622 3623 Input Parameters: 3624 . mat - the matrix 3625 . fact - factor 3626 3627 Collective on Mat 3628 3629 Level: advanced 3630 3631 Notes: 3632 This can also be set by the command line option: -mat_use_hash_table <fact> 3633 3634 .keywords: matrix, hashtable, factor, HT 3635 3636 .seealso: MatSetOption() 3637 @*/ 3638 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3639 { 3640 PetscErrorCode ierr,(*f)(Mat,PetscReal); 3641 3642 PetscFunctionBegin; 3643 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 3644 if (f) { 3645 ierr = (*f)(mat,fact);CHKERRQ(ierr); 3646 } 3647 PetscFunctionReturn(0); 3648 } 3649 3650 EXTERN_C_BEGIN 3651 #undef __FUNCT__ 3652 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3653 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3654 { 3655 Mat_MPIBAIJ *baij; 3656 3657 PetscFunctionBegin; 3658 baij = (Mat_MPIBAIJ*)mat->data; 3659 baij->ht_fact = fact; 3660 PetscFunctionReturn(0); 3661 } 3662 EXTERN_C_END 3663 3664 #undef __FUNCT__ 3665 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3666 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3667 { 3668 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3669 PetscFunctionBegin; 3670 *Ad = a->A; 3671 *Ao = a->B; 3672 *colmap = a->garray; 3673 PetscFunctionReturn(0); 3674 } 3675 3676 /* 3677 Special version for direct calls from Fortran (to eliminate two function call overheads 3678 */ 3679 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3680 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3681 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3682 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3683 #endif 3684 3685 #undef __FUNCT__ 3686 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3687 /*@C 3688 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3689 3690 Collective on Mat 3691 3692 Input Parameters: 3693 + mat - the matrix 3694 . min - number of input rows 3695 . im - input rows 3696 . nin - number of input columns 3697 . in - input columns 3698 . v - numerical values input 3699 - addvin - INSERT_VALUES or ADD_VALUES 3700 3701 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3702 3703 Level: advanced 3704 3705 .seealso: MatSetValuesBlocked() 3706 @*/ 3707 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3708 { 3709 /* convert input arguments to C version */ 3710 Mat mat = *matin; 3711 PetscInt m = *min, n = *nin; 3712 InsertMode addv = *addvin; 3713 3714 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3715 const MatScalar *value; 3716 MatScalar *barray=baij->barray; 3717 PetscTruth roworiented = baij->roworiented; 3718 PetscErrorCode ierr; 3719 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3720 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3721 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3722 3723 PetscFunctionBegin; 3724 /* tasks normally handled by MatSetValuesBlocked() */ 3725 if (mat->insertmode == NOT_SET_VALUES) { 3726 mat->insertmode = addv; 3727 } 3728 #if defined(PETSC_USE_DEBUG) 3729 else if (mat->insertmode != addv) { 3730 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3731 } 3732 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3733 #endif 3734 if (mat->assembled) { 3735 mat->was_assembled = PETSC_TRUE; 3736 mat->assembled = PETSC_FALSE; 3737 } 3738 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3739 3740 3741 if(!barray) { 3742 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3743 baij->barray = barray; 3744 } 3745 3746 if (roworiented) { 3747 stepval = (n-1)*bs; 3748 } else { 3749 stepval = (m-1)*bs; 3750 } 3751 for (i=0; i<m; i++) { 3752 if (im[i] < 0) continue; 3753 #if defined(PETSC_USE_DEBUG) 3754 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3755 #endif 3756 if (im[i] >= rstart && im[i] < rend) { 3757 row = im[i] - rstart; 3758 for (j=0; j<n; j++) { 3759 /* If NumCol = 1 then a copy is not required */ 3760 if ((roworiented) && (n == 1)) { 3761 barray = (MatScalar*)v + i*bs2; 3762 } else if((!roworiented) && (m == 1)) { 3763 barray = (MatScalar*)v + j*bs2; 3764 } else { /* Here a copy is required */ 3765 if (roworiented) { 3766 value = v + i*(stepval+bs)*bs + j*bs; 3767 } else { 3768 value = v + j*(stepval+bs)*bs + i*bs; 3769 } 3770 for (ii=0; ii<bs; ii++,value+=stepval) { 3771 for (jj=0; jj<bs; jj++) { 3772 *barray++ = *value++; 3773 } 3774 } 3775 barray -=bs2; 3776 } 3777 3778 if (in[j] >= cstart && in[j] < cend){ 3779 col = in[j] - cstart; 3780 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3781 } 3782 else if (in[j] < 0) continue; 3783 #if defined(PETSC_USE_DEBUG) 3784 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 3785 #endif 3786 else { 3787 if (mat->was_assembled) { 3788 if (!baij->colmap) { 3789 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3790 } 3791 3792 #if defined(PETSC_USE_DEBUG) 3793 #if defined (PETSC_USE_CTABLE) 3794 { PetscInt data; 3795 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3796 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 3797 } 3798 #else 3799 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 3800 #endif 3801 #endif 3802 #if defined (PETSC_USE_CTABLE) 3803 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3804 col = (col - 1)/bs; 3805 #else 3806 col = (baij->colmap[in[j]] - 1)/bs; 3807 #endif 3808 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3809 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3810 col = in[j]; 3811 } 3812 } 3813 else col = in[j]; 3814 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3815 } 3816 } 3817 } else { 3818 if (!baij->donotstash) { 3819 if (roworiented) { 3820 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3821 } else { 3822 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3823 } 3824 } 3825 } 3826 } 3827 3828 /* task normally handled by MatSetValuesBlocked() */ 3829 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3830 PetscFunctionReturn(0); 3831 } 3832