1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 8 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 9 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 10 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 11 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 12 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 13 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 16 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 19 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 20 { 21 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 22 PetscErrorCode ierr; 23 PetscInt i,*idxb = 0; 24 PetscScalar *va,*vb; 25 Vec vtmp; 26 27 PetscFunctionBegin; 28 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 29 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 30 if (idx) { 31 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 32 } 33 34 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 35 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 36 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 37 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 38 39 for (i=0; i<A->rmap->n; i++){ 40 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 41 } 42 43 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 44 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 45 if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);} 46 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 47 PetscFunctionReturn(0); 48 } 49 50 EXTERN_C_BEGIN 51 #undef __FUNCT__ 52 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 53 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 54 { 55 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 56 PetscErrorCode ierr; 57 58 PetscFunctionBegin; 59 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 60 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 61 PetscFunctionReturn(0); 62 } 63 EXTERN_C_END 64 65 EXTERN_C_BEGIN 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 68 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 69 { 70 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 75 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 76 PetscFunctionReturn(0); 77 } 78 EXTERN_C_END 79 80 /* 81 Local utility routine that creates a mapping from the global column 82 number to the local number in the off-diagonal part of the local 83 storage of the matrix. This is done in a non scalable way since the 84 length of colmap equals the global matrix length. 85 */ 86 #undef __FUNCT__ 87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 89 { 90 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 91 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 92 PetscErrorCode ierr; 93 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 94 95 PetscFunctionBegin; 96 #if defined (PETSC_USE_CTABLE) 97 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 98 for (i=0; i<nbs; i++){ 99 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 100 } 101 #else 102 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 103 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 105 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 106 #endif 107 PetscFunctionReturn(0); 108 } 109 110 #define CHUNKSIZE 10 111 112 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 113 { \ 114 \ 115 brow = row/bs; \ 116 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 117 rmax = aimax[brow]; nrow = ailen[brow]; \ 118 bcol = col/bs; \ 119 ridx = row % bs; cidx = col % bs; \ 120 low = 0; high = nrow; \ 121 while (high-low > 3) { \ 122 t = (low+high)/2; \ 123 if (rp[t] > bcol) high = t; \ 124 else low = t; \ 125 } \ 126 for (_i=low; _i<high; _i++) { \ 127 if (rp[_i] > bcol) break; \ 128 if (rp[_i] == bcol) { \ 129 bap = ap + bs2*_i + bs*cidx + ridx; \ 130 if (addv == ADD_VALUES) *bap += value; \ 131 else *bap = value; \ 132 goto a_noinsert; \ 133 } \ 134 } \ 135 if (a->nonew == 1) goto a_noinsert; \ 136 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 137 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 138 N = nrow++ - 1; \ 139 /* shift up all the later entries in this row */ \ 140 for (ii=N; ii>=_i; ii--) { \ 141 rp[ii+1] = rp[ii]; \ 142 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 143 } \ 144 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 145 rp[_i] = bcol; \ 146 ap[bs2*_i + bs*cidx + ridx] = value; \ 147 a_noinsert:; \ 148 ailen[brow] = nrow; \ 149 } 150 151 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 152 { \ 153 brow = row/bs; \ 154 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 155 rmax = bimax[brow]; nrow = bilen[brow]; \ 156 bcol = col/bs; \ 157 ridx = row % bs; cidx = col % bs; \ 158 low = 0; high = nrow; \ 159 while (high-low > 3) { \ 160 t = (low+high)/2; \ 161 if (rp[t] > bcol) high = t; \ 162 else low = t; \ 163 } \ 164 for (_i=low; _i<high; _i++) { \ 165 if (rp[_i] > bcol) break; \ 166 if (rp[_i] == bcol) { \ 167 bap = ap + bs2*_i + bs*cidx + ridx; \ 168 if (addv == ADD_VALUES) *bap += value; \ 169 else *bap = value; \ 170 goto b_noinsert; \ 171 } \ 172 } \ 173 if (b->nonew == 1) goto b_noinsert; \ 174 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 175 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 176 CHKMEMQ;\ 177 N = nrow++ - 1; \ 178 /* shift up all the later entries in this row */ \ 179 for (ii=N; ii>=_i; ii--) { \ 180 rp[ii+1] = rp[ii]; \ 181 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 182 } \ 183 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 184 rp[_i] = bcol; \ 185 ap[bs2*_i + bs*cidx + ridx] = value; \ 186 b_noinsert:; \ 187 bilen[brow] = nrow; \ 188 } 189 190 #undef __FUNCT__ 191 #define __FUNCT__ "MatSetValues_MPIBAIJ" 192 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 193 { 194 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 195 MatScalar value; 196 PetscTruth roworiented = baij->roworiented; 197 PetscErrorCode ierr; 198 PetscInt i,j,row,col; 199 PetscInt rstart_orig=mat->rmap->rstart; 200 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 201 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 202 203 /* Some Variables required in the macro */ 204 Mat A = baij->A; 205 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 206 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 207 MatScalar *aa=a->a; 208 209 Mat B = baij->B; 210 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 211 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 212 MatScalar *ba=b->a; 213 214 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 215 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 216 MatScalar *ap,*bap; 217 218 PetscFunctionBegin; 219 if (v) PetscValidScalarPointer(v,6); 220 for (i=0; i<m; i++) { 221 if (im[i] < 0) continue; 222 #if defined(PETSC_USE_DEBUG) 223 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 224 #endif 225 if (im[i] >= rstart_orig && im[i] < rend_orig) { 226 row = im[i] - rstart_orig; 227 for (j=0; j<n; j++) { 228 if (in[j] >= cstart_orig && in[j] < cend_orig){ 229 col = in[j] - cstart_orig; 230 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 231 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 232 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 233 } else if (in[j] < 0) continue; 234 #if defined(PETSC_USE_DEBUG) 235 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1);} 236 #endif 237 else { 238 if (mat->was_assembled) { 239 if (!baij->colmap) { 240 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 241 } 242 #if defined (PETSC_USE_CTABLE) 243 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 244 col = col - 1; 245 #else 246 col = baij->colmap[in[j]/bs] - 1; 247 #endif 248 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 249 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 250 col = in[j]; 251 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 252 B = baij->B; 253 b = (Mat_SeqBAIJ*)(B)->data; 254 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 255 ba=b->a; 256 } else col += in[j]%bs; 257 } else col = in[j]; 258 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 259 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!baij->donotstash) { 265 if (roworiented) { 266 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 267 } else { 268 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 269 } 270 } 271 } 272 } 273 PetscFunctionReturn(0); 274 } 275 276 #undef __FUNCT__ 277 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 278 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 279 { 280 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 281 const PetscScalar *value; 282 MatScalar *barray=baij->barray; 283 PetscTruth roworiented = baij->roworiented; 284 PetscErrorCode ierr; 285 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 286 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 287 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 288 289 PetscFunctionBegin; 290 if(!barray) { 291 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 292 baij->barray = barray; 293 } 294 295 if (roworiented) { 296 stepval = (n-1)*bs; 297 } else { 298 stepval = (m-1)*bs; 299 } 300 for (i=0; i<m; i++) { 301 if (im[i] < 0) continue; 302 #if defined(PETSC_USE_DEBUG) 303 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 304 #endif 305 if (im[i] >= rstart && im[i] < rend) { 306 row = im[i] - rstart; 307 for (j=0; j<n; j++) { 308 /* If NumCol = 1 then a copy is not required */ 309 if ((roworiented) && (n == 1)) { 310 barray = (MatScalar*)v + i*bs2; 311 } else if((!roworiented) && (m == 1)) { 312 barray = (MatScalar*)v + j*bs2; 313 } else { /* Here a copy is required */ 314 if (roworiented) { 315 value = v + i*(stepval+bs)*bs + j*bs; 316 } else { 317 value = v + j*(stepval+bs)*bs + i*bs; 318 } 319 for (ii=0; ii<bs; ii++,value+=stepval) { 320 for (jj=0; jj<bs; jj++) { 321 *barray++ = *value++; 322 } 323 } 324 barray -=bs2; 325 } 326 327 if (in[j] >= cstart && in[j] < cend){ 328 col = in[j] - cstart; 329 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 330 } 331 else if (in[j] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 334 #endif 335 else { 336 if (mat->was_assembled) { 337 if (!baij->colmap) { 338 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 339 } 340 341 #if defined(PETSC_USE_DEBUG) 342 #if defined (PETSC_USE_CTABLE) 343 { PetscInt data; 344 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 345 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 346 } 347 #else 348 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 349 #endif 350 #endif 351 #if defined (PETSC_USE_CTABLE) 352 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 353 col = (col - 1)/bs; 354 #else 355 col = (baij->colmap[in[j]] - 1)/bs; 356 #endif 357 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 358 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 359 col = in[j]; 360 } 361 } 362 else col = in[j]; 363 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 364 } 365 } 366 } else { 367 if (!baij->donotstash) { 368 if (roworiented) { 369 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 370 } else { 371 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 372 } 373 } 374 } 375 } 376 PetscFunctionReturn(0); 377 } 378 379 #define HASH_KEY 0.6180339887 380 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 381 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 382 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 383 #undef __FUNCT__ 384 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 385 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 386 { 387 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 388 PetscTruth roworiented = baij->roworiented; 389 PetscErrorCode ierr; 390 PetscInt i,j,row,col; 391 PetscInt rstart_orig=mat->rmap->rstart; 392 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 393 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 394 PetscReal tmp; 395 MatScalar **HD = baij->hd,value; 396 #if defined(PETSC_USE_DEBUG) 397 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 398 #endif 399 400 PetscFunctionBegin; 401 if (v) PetscValidScalarPointer(v,6); 402 for (i=0; i<m; i++) { 403 #if defined(PETSC_USE_DEBUG) 404 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 405 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 406 #endif 407 row = im[i]; 408 if (row >= rstart_orig && row < rend_orig) { 409 for (j=0; j<n; j++) { 410 col = in[j]; 411 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 412 /* Look up PetscInto the Hash Table */ 413 key = (row/bs)*Nbs+(col/bs)+1; 414 h1 = HASH(size,key,tmp); 415 416 417 idx = h1; 418 #if defined(PETSC_USE_DEBUG) 419 insert_ct++; 420 total_ct++; 421 if (HT[idx] != key) { 422 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 423 if (idx == size) { 424 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 425 if (idx == h1) { 426 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 427 } 428 } 429 } 430 #else 431 if (HT[idx] != key) { 432 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 433 if (idx == size) { 434 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 435 if (idx == h1) { 436 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 437 } 438 } 439 } 440 #endif 441 /* A HASH table entry is found, so insert the values at the correct address */ 442 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 443 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 444 } 445 } else { 446 if (!baij->donotstash) { 447 if (roworiented) { 448 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 449 } else { 450 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 451 } 452 } 453 } 454 } 455 #if defined(PETSC_USE_DEBUG) 456 baij->ht_total_ct = total_ct; 457 baij->ht_insert_ct = insert_ct; 458 #endif 459 PetscFunctionReturn(0); 460 } 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 464 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 465 { 466 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 467 PetscTruth roworiented = baij->roworiented; 468 PetscErrorCode ierr; 469 PetscInt i,j,ii,jj,row,col; 470 PetscInt rstart=baij->rstartbs; 471 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 472 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 473 PetscReal tmp; 474 MatScalar **HD = baij->hd,*baij_a; 475 const PetscScalar *v_t,*value; 476 #if defined(PETSC_USE_DEBUG) 477 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 478 #endif 479 480 PetscFunctionBegin; 481 482 if (roworiented) { 483 stepval = (n-1)*bs; 484 } else { 485 stepval = (m-1)*bs; 486 } 487 for (i=0; i<m; i++) { 488 #if defined(PETSC_USE_DEBUG) 489 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 490 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 491 #endif 492 row = im[i]; 493 v_t = v + i*nbs2; 494 if (row >= rstart && row < rend) { 495 for (j=0; j<n; j++) { 496 col = in[j]; 497 498 /* Look up into the Hash Table */ 499 key = row*Nbs+col+1; 500 h1 = HASH(size,key,tmp); 501 502 idx = h1; 503 #if defined(PETSC_USE_DEBUG) 504 total_ct++; 505 insert_ct++; 506 if (HT[idx] != key) { 507 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 508 if (idx == size) { 509 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 510 if (idx == h1) { 511 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 512 } 513 } 514 } 515 #else 516 if (HT[idx] != key) { 517 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 518 if (idx == size) { 519 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 520 if (idx == h1) { 521 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 522 } 523 } 524 } 525 #endif 526 baij_a = HD[idx]; 527 if (roworiented) { 528 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 529 /* value = v + (i*(stepval+bs)+j)*bs; */ 530 value = v_t; 531 v_t += bs; 532 if (addv == ADD_VALUES) { 533 for (ii=0; ii<bs; ii++,value+=stepval) { 534 for (jj=ii; jj<bs2; jj+=bs) { 535 baij_a[jj] += *value++; 536 } 537 } 538 } else { 539 for (ii=0; ii<bs; ii++,value+=stepval) { 540 for (jj=ii; jj<bs2; jj+=bs) { 541 baij_a[jj] = *value++; 542 } 543 } 544 } 545 } else { 546 value = v + j*(stepval+bs)*bs + i*bs; 547 if (addv == ADD_VALUES) { 548 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 549 for (jj=0; jj<bs; jj++) { 550 baij_a[jj] += *value++; 551 } 552 } 553 } else { 554 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 555 for (jj=0; jj<bs; jj++) { 556 baij_a[jj] = *value++; 557 } 558 } 559 } 560 } 561 } 562 } else { 563 if (!baij->donotstash) { 564 if (roworiented) { 565 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 566 } else { 567 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 568 } 569 } 570 } 571 } 572 #if defined(PETSC_USE_DEBUG) 573 baij->ht_total_ct = total_ct; 574 baij->ht_insert_ct = insert_ct; 575 #endif 576 PetscFunctionReturn(0); 577 } 578 579 #undef __FUNCT__ 580 #define __FUNCT__ "MatGetValues_MPIBAIJ" 581 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 582 { 583 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 584 PetscErrorCode ierr; 585 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 586 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 587 588 PetscFunctionBegin; 589 for (i=0; i<m; i++) { 590 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 591 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 592 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 593 row = idxm[i] - bsrstart; 594 for (j=0; j<n; j++) { 595 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 596 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 597 if (idxn[j] >= bscstart && idxn[j] < bscend){ 598 col = idxn[j] - bscstart; 599 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 600 } else { 601 if (!baij->colmap) { 602 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 603 } 604 #if defined (PETSC_USE_CTABLE) 605 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 606 data --; 607 #else 608 data = baij->colmap[idxn[j]/bs]-1; 609 #endif 610 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 611 else { 612 col = data + idxn[j]%bs; 613 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 614 } 615 } 616 } 617 } else { 618 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 619 } 620 } 621 PetscFunctionReturn(0); 622 } 623 624 #undef __FUNCT__ 625 #define __FUNCT__ "MatNorm_MPIBAIJ" 626 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 627 { 628 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 629 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 630 PetscErrorCode ierr; 631 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 632 PetscReal sum = 0.0; 633 MatScalar *v; 634 635 PetscFunctionBegin; 636 if (baij->size == 1) { 637 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 638 } else { 639 if (type == NORM_FROBENIUS) { 640 v = amat->a; 641 nz = amat->nz*bs2; 642 for (i=0; i<nz; i++) { 643 #if defined(PETSC_USE_COMPLEX) 644 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 645 #else 646 sum += (*v)*(*v); v++; 647 #endif 648 } 649 v = bmat->a; 650 nz = bmat->nz*bs2; 651 for (i=0; i<nz; i++) { 652 #if defined(PETSC_USE_COMPLEX) 653 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 654 #else 655 sum += (*v)*(*v); v++; 656 #endif 657 } 658 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 659 *nrm = sqrt(*nrm); 660 } else if (type == NORM_1) { /* max column sum */ 661 PetscReal *tmp,*tmp2; 662 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 663 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 664 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 665 v = amat->a; jj = amat->j; 666 for (i=0; i<amat->nz; i++) { 667 for (j=0; j<bs; j++){ 668 col = bs*(cstart + *jj) + j; /* column index */ 669 for (row=0; row<bs; row++){ 670 tmp[col] += PetscAbsScalar(*v); v++; 671 } 672 } 673 jj++; 674 } 675 v = bmat->a; jj = bmat->j; 676 for (i=0; i<bmat->nz; i++) { 677 for (j=0; j<bs; j++){ 678 col = bs*garray[*jj] + j; 679 for (row=0; row<bs; row++){ 680 tmp[col] += PetscAbsScalar(*v); v++; 681 } 682 } 683 jj++; 684 } 685 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 686 *nrm = 0.0; 687 for (j=0; j<mat->cmap->N; j++) { 688 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 689 } 690 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 691 } else if (type == NORM_INFINITY) { /* max row sum */ 692 PetscReal *sums; 693 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr) 694 sum = 0.0; 695 for (j=0; j<amat->mbs; j++) { 696 for (row=0; row<bs; row++) sums[row] = 0.0; 697 v = amat->a + bs2*amat->i[j]; 698 nz = amat->i[j+1]-amat->i[j]; 699 for (i=0; i<nz; i++) { 700 for (col=0; col<bs; col++){ 701 for (row=0; row<bs; row++){ 702 sums[row] += PetscAbsScalar(*v); v++; 703 } 704 } 705 } 706 v = bmat->a + bs2*bmat->i[j]; 707 nz = bmat->i[j+1]-bmat->i[j]; 708 for (i=0; i<nz; i++) { 709 for (col=0; col<bs; col++){ 710 for (row=0; row<bs; row++){ 711 sums[row] += PetscAbsScalar(*v); v++; 712 } 713 } 714 } 715 for (row=0; row<bs; row++){ 716 if (sums[row] > sum) sum = sums[row]; 717 } 718 } 719 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 720 ierr = PetscFree(sums);CHKERRQ(ierr); 721 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 722 } 723 PetscFunctionReturn(0); 724 } 725 726 /* 727 Creates the hash table, and sets the table 728 This table is created only once. 729 If new entried need to be added to the matrix 730 then the hash table has to be destroyed and 731 recreated. 732 */ 733 #undef __FUNCT__ 734 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 735 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 736 { 737 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 738 Mat A = baij->A,B=baij->B; 739 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 740 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 741 PetscErrorCode ierr; 742 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 743 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 744 PetscInt *HT,key; 745 MatScalar **HD; 746 PetscReal tmp; 747 #if defined(PETSC_USE_INFO) 748 PetscInt ct=0,max=0; 749 #endif 750 751 PetscFunctionBegin; 752 if (baij->ht) PetscFunctionReturn(0); 753 754 baij->ht_size = (PetscInt)(factor*nz); 755 ht_size = baij->ht_size; 756 757 /* Allocate Memory for Hash Table */ 758 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 759 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 760 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 761 HD = baij->hd; 762 HT = baij->ht; 763 764 /* Loop Over A */ 765 for (i=0; i<a->mbs; i++) { 766 for (j=ai[i]; j<ai[i+1]; j++) { 767 row = i+rstart; 768 col = aj[j]+cstart; 769 770 key = row*Nbs + col + 1; 771 h1 = HASH(ht_size,key,tmp); 772 for (k=0; k<ht_size; k++){ 773 if (!HT[(h1+k)%ht_size]) { 774 HT[(h1+k)%ht_size] = key; 775 HD[(h1+k)%ht_size] = a->a + j*bs2; 776 break; 777 #if defined(PETSC_USE_INFO) 778 } else { 779 ct++; 780 #endif 781 } 782 } 783 #if defined(PETSC_USE_INFO) 784 if (k> max) max = k; 785 #endif 786 } 787 } 788 /* Loop Over B */ 789 for (i=0; i<b->mbs; i++) { 790 for (j=bi[i]; j<bi[i+1]; j++) { 791 row = i+rstart; 792 col = garray[bj[j]]; 793 key = row*Nbs + col + 1; 794 h1 = HASH(ht_size,key,tmp); 795 for (k=0; k<ht_size; k++){ 796 if (!HT[(h1+k)%ht_size]) { 797 HT[(h1+k)%ht_size] = key; 798 HD[(h1+k)%ht_size] = b->a + j*bs2; 799 break; 800 #if defined(PETSC_USE_INFO) 801 } else { 802 ct++; 803 #endif 804 } 805 } 806 #if defined(PETSC_USE_INFO) 807 if (k> max) max = k; 808 #endif 809 } 810 } 811 812 /* Print Summary */ 813 #if defined(PETSC_USE_INFO) 814 for (i=0,j=0; i<ht_size; i++) { 815 if (HT[i]) {j++;} 816 } 817 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 818 #endif 819 PetscFunctionReturn(0); 820 } 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 824 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 825 { 826 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 827 PetscErrorCode ierr; 828 PetscInt nstash,reallocs; 829 InsertMode addv; 830 831 PetscFunctionBegin; 832 if (baij->donotstash) { 833 PetscFunctionReturn(0); 834 } 835 836 /* make sure all processors are either in INSERTMODE or ADDMODE */ 837 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 838 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 839 mat->insertmode = addv; /* in case this processor had no cache */ 840 841 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 842 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 843 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 844 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 845 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 846 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 847 PetscFunctionReturn(0); 848 } 849 850 #undef __FUNCT__ 851 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 852 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 853 { 854 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 855 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 856 PetscErrorCode ierr; 857 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 858 PetscInt *row,*col; 859 PetscTruth r1,r2,r3,other_disassembled; 860 MatScalar *val; 861 InsertMode addv = mat->insertmode; 862 PetscMPIInt n; 863 864 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 865 PetscFunctionBegin; 866 if (!baij->donotstash) { 867 while (1) { 868 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 869 if (!flg) break; 870 871 for (i=0; i<n;) { 872 /* Now identify the consecutive vals belonging to the same row */ 873 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 874 if (j < n) ncols = j-i; 875 else ncols = n-i; 876 /* Now assemble all these values with a single function call */ 877 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 878 i = j; 879 } 880 } 881 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 882 /* Now process the block-stash. Since the values are stashed column-oriented, 883 set the roworiented flag to column oriented, and after MatSetValues() 884 restore the original flags */ 885 r1 = baij->roworiented; 886 r2 = a->roworiented; 887 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 888 baij->roworiented = PETSC_FALSE; 889 a->roworiented = PETSC_FALSE; 890 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 891 while (1) { 892 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 893 if (!flg) break; 894 895 for (i=0; i<n;) { 896 /* Now identify the consecutive vals belonging to the same row */ 897 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 898 if (j < n) ncols = j-i; 899 else ncols = n-i; 900 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 901 i = j; 902 } 903 } 904 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 905 baij->roworiented = r1; 906 a->roworiented = r2; 907 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 908 } 909 910 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 911 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 912 913 /* determine if any processor has disassembled, if so we must 914 also disassemble ourselfs, in order that we may reassemble. */ 915 /* 916 if nonzero structure of submatrix B cannot change then we know that 917 no processor disassembled thus we can skip this stuff 918 */ 919 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 920 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 921 if (mat->was_assembled && !other_disassembled) { 922 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 923 } 924 } 925 926 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 927 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 928 } 929 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 930 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 931 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 932 933 #if defined(PETSC_USE_INFO) 934 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 935 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 936 baij->ht_total_ct = 0; 937 baij->ht_insert_ct = 0; 938 } 939 #endif 940 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 941 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 942 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 943 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 944 } 945 946 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 947 baij->rowvalues = 0; 948 PetscFunctionReturn(0); 949 } 950 951 #undef __FUNCT__ 952 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 953 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 954 { 955 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 956 PetscErrorCode ierr; 957 PetscMPIInt size = baij->size,rank = baij->rank; 958 PetscInt bs = mat->rmap->bs; 959 PetscTruth iascii,isdraw; 960 PetscViewer sviewer; 961 PetscViewerFormat format; 962 963 PetscFunctionBegin; 964 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 965 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 966 if (iascii) { 967 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 968 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 969 MatInfo info; 970 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 971 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 972 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 973 rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 974 mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 975 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 976 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 977 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 978 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 979 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 980 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 981 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 982 PetscFunctionReturn(0); 983 } else if (format == PETSC_VIEWER_ASCII_INFO) { 984 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 985 PetscFunctionReturn(0); 986 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 987 PetscFunctionReturn(0); 988 } 989 } 990 991 if (isdraw) { 992 PetscDraw draw; 993 PetscTruth isnull; 994 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 995 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 996 } 997 998 if (size == 1) { 999 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1000 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1001 } else { 1002 /* assemble the entire matrix onto first processor. */ 1003 Mat A; 1004 Mat_SeqBAIJ *Aloc; 1005 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1006 MatScalar *a; 1007 1008 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1009 /* Perhaps this should be the type of mat? */ 1010 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1011 if (!rank) { 1012 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1013 } else { 1014 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1015 } 1016 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1017 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1018 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1019 1020 /* copy over the A part */ 1021 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1022 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1023 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1024 1025 for (i=0; i<mbs; i++) { 1026 rvals[0] = bs*(baij->rstartbs + i); 1027 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1028 for (j=ai[i]; j<ai[i+1]; j++) { 1029 col = (baij->cstartbs+aj[j])*bs; 1030 for (k=0; k<bs; k++) { 1031 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1032 col++; a += bs; 1033 } 1034 } 1035 } 1036 /* copy over the B part */ 1037 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1038 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1039 for (i=0; i<mbs; i++) { 1040 rvals[0] = bs*(baij->rstartbs + i); 1041 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1042 for (j=ai[i]; j<ai[i+1]; j++) { 1043 col = baij->garray[aj[j]]*bs; 1044 for (k=0; k<bs; k++) { 1045 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1046 col++; a += bs; 1047 } 1048 } 1049 } 1050 ierr = PetscFree(rvals);CHKERRQ(ierr); 1051 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1052 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1053 /* 1054 Everyone has to call to draw the matrix since the graphics waits are 1055 synchronized across all processors that share the PetscDraw object 1056 */ 1057 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1058 if (!rank) { 1059 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1060 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1061 } 1062 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1063 ierr = MatDestroy(A);CHKERRQ(ierr); 1064 } 1065 PetscFunctionReturn(0); 1066 } 1067 1068 #undef __FUNCT__ 1069 #define __FUNCT__ "MatView_MPIBAIJ" 1070 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1071 { 1072 PetscErrorCode ierr; 1073 PetscTruth iascii,isdraw,issocket,isbinary; 1074 1075 PetscFunctionBegin; 1076 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1077 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1078 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1079 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1080 if (iascii || isdraw || issocket || isbinary) { 1081 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1082 } else { 1083 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1084 } 1085 PetscFunctionReturn(0); 1086 } 1087 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1090 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1091 { 1092 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1093 PetscErrorCode ierr; 1094 1095 PetscFunctionBegin; 1096 #if defined(PETSC_USE_LOG) 1097 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1098 #endif 1099 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1100 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1101 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1102 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1103 #if defined (PETSC_USE_CTABLE) 1104 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 1105 #else 1106 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1107 #endif 1108 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1109 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1110 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1111 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1112 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1113 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1114 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1115 ierr = PetscFree(baij);CHKERRQ(ierr); 1116 1117 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1118 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1119 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1120 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1121 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1122 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1123 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1124 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1125 PetscFunctionReturn(0); 1126 } 1127 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "MatMult_MPIBAIJ" 1130 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1131 { 1132 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1133 PetscErrorCode ierr; 1134 PetscInt nt; 1135 1136 PetscFunctionBegin; 1137 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1138 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1139 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1140 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1141 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1142 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1143 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1144 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1145 PetscFunctionReturn(0); 1146 } 1147 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1150 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1151 { 1152 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1153 PetscErrorCode ierr; 1154 1155 PetscFunctionBegin; 1156 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1157 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1158 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1159 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1160 PetscFunctionReturn(0); 1161 } 1162 1163 #undef __FUNCT__ 1164 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1165 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1166 { 1167 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1168 PetscErrorCode ierr; 1169 PetscTruth merged; 1170 1171 PetscFunctionBegin; 1172 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1173 /* do nondiagonal part */ 1174 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1175 if (!merged) { 1176 /* send it on its way */ 1177 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1178 /* do local part */ 1179 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1180 /* receive remote parts: note this assumes the values are not actually */ 1181 /* inserted in yy until the next line */ 1182 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1183 } else { 1184 /* do local part */ 1185 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1186 /* send it on its way */ 1187 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1188 /* values actually were received in the Begin() but we need to call this nop */ 1189 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1190 } 1191 PetscFunctionReturn(0); 1192 } 1193 1194 #undef __FUNCT__ 1195 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1196 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1197 { 1198 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1199 PetscErrorCode ierr; 1200 1201 PetscFunctionBegin; 1202 /* do nondiagonal part */ 1203 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1204 /* send it on its way */ 1205 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1206 /* do local part */ 1207 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1208 /* receive remote parts: note this assumes the values are not actually */ 1209 /* inserted in yy until the next line, which is true for my implementation*/ 1210 /* but is not perhaps always true. */ 1211 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1212 PetscFunctionReturn(0); 1213 } 1214 1215 /* 1216 This only works correctly for square matrices where the subblock A->A is the 1217 diagonal block 1218 */ 1219 #undef __FUNCT__ 1220 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1221 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1222 { 1223 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1224 PetscErrorCode ierr; 1225 1226 PetscFunctionBegin; 1227 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1228 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1229 PetscFunctionReturn(0); 1230 } 1231 1232 #undef __FUNCT__ 1233 #define __FUNCT__ "MatScale_MPIBAIJ" 1234 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1235 { 1236 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1237 PetscErrorCode ierr; 1238 1239 PetscFunctionBegin; 1240 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1241 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1242 PetscFunctionReturn(0); 1243 } 1244 1245 #undef __FUNCT__ 1246 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1247 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1248 { 1249 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1250 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1251 PetscErrorCode ierr; 1252 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1253 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1254 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1255 1256 PetscFunctionBegin; 1257 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1258 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1259 mat->getrowactive = PETSC_TRUE; 1260 1261 if (!mat->rowvalues && (idx || v)) { 1262 /* 1263 allocate enough space to hold information from the longest row. 1264 */ 1265 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1266 PetscInt max = 1,mbs = mat->mbs,tmp; 1267 for (i=0; i<mbs; i++) { 1268 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1269 if (max < tmp) { max = tmp; } 1270 } 1271 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1272 } 1273 lrow = row - brstart; 1274 1275 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1276 if (!v) {pvA = 0; pvB = 0;} 1277 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1278 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1279 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1280 nztot = nzA + nzB; 1281 1282 cmap = mat->garray; 1283 if (v || idx) { 1284 if (nztot) { 1285 /* Sort by increasing column numbers, assuming A and B already sorted */ 1286 PetscInt imark = -1; 1287 if (v) { 1288 *v = v_p = mat->rowvalues; 1289 for (i=0; i<nzB; i++) { 1290 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1291 else break; 1292 } 1293 imark = i; 1294 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1295 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1296 } 1297 if (idx) { 1298 *idx = idx_p = mat->rowindices; 1299 if (imark > -1) { 1300 for (i=0; i<imark; i++) { 1301 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1302 } 1303 } else { 1304 for (i=0; i<nzB; i++) { 1305 if (cmap[cworkB[i]/bs] < cstart) 1306 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1307 else break; 1308 } 1309 imark = i; 1310 } 1311 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1312 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1313 } 1314 } else { 1315 if (idx) *idx = 0; 1316 if (v) *v = 0; 1317 } 1318 } 1319 *nz = nztot; 1320 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1321 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1322 PetscFunctionReturn(0); 1323 } 1324 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1327 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1328 { 1329 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1330 1331 PetscFunctionBegin; 1332 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1333 baij->getrowactive = PETSC_FALSE; 1334 PetscFunctionReturn(0); 1335 } 1336 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1339 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1340 { 1341 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1342 PetscErrorCode ierr; 1343 1344 PetscFunctionBegin; 1345 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1346 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1347 PetscFunctionReturn(0); 1348 } 1349 1350 #undef __FUNCT__ 1351 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1352 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1353 { 1354 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1355 Mat A = a->A,B = a->B; 1356 PetscErrorCode ierr; 1357 PetscReal isend[5],irecv[5]; 1358 1359 PetscFunctionBegin; 1360 info->block_size = (PetscReal)matin->rmap->bs; 1361 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1362 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1363 isend[3] = info->memory; isend[4] = info->mallocs; 1364 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1365 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1366 isend[3] += info->memory; isend[4] += info->mallocs; 1367 if (flag == MAT_LOCAL) { 1368 info->nz_used = isend[0]; 1369 info->nz_allocated = isend[1]; 1370 info->nz_unneeded = isend[2]; 1371 info->memory = isend[3]; 1372 info->mallocs = isend[4]; 1373 } else if (flag == MAT_GLOBAL_MAX) { 1374 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1375 info->nz_used = irecv[0]; 1376 info->nz_allocated = irecv[1]; 1377 info->nz_unneeded = irecv[2]; 1378 info->memory = irecv[3]; 1379 info->mallocs = irecv[4]; 1380 } else if (flag == MAT_GLOBAL_SUM) { 1381 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1382 info->nz_used = irecv[0]; 1383 info->nz_allocated = irecv[1]; 1384 info->nz_unneeded = irecv[2]; 1385 info->memory = irecv[3]; 1386 info->mallocs = irecv[4]; 1387 } else { 1388 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1389 } 1390 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1391 info->fill_ratio_needed = 0; 1392 info->factor_mallocs = 0; 1393 PetscFunctionReturn(0); 1394 } 1395 1396 #undef __FUNCT__ 1397 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1398 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg) 1399 { 1400 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1401 PetscErrorCode ierr; 1402 1403 PetscFunctionBegin; 1404 switch (op) { 1405 case MAT_NEW_NONZERO_LOCATIONS: 1406 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1407 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1408 case MAT_KEEP_NONZERO_PATTERN: 1409 case MAT_NEW_NONZERO_LOCATION_ERR: 1410 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1411 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1412 break; 1413 case MAT_ROW_ORIENTED: 1414 a->roworiented = flg; 1415 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1416 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1417 break; 1418 case MAT_NEW_DIAGONALS: 1419 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1420 break; 1421 case MAT_IGNORE_OFF_PROC_ENTRIES: 1422 a->donotstash = flg; 1423 break; 1424 case MAT_USE_HASH_TABLE: 1425 a->ht_flag = flg; 1426 break; 1427 case MAT_SYMMETRIC: 1428 case MAT_STRUCTURALLY_SYMMETRIC: 1429 case MAT_HERMITIAN: 1430 case MAT_SYMMETRY_ETERNAL: 1431 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1432 break; 1433 default: 1434 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1435 } 1436 PetscFunctionReturn(0); 1437 } 1438 1439 #undef __FUNCT__ 1440 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1441 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1442 { 1443 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1444 Mat_SeqBAIJ *Aloc; 1445 Mat B; 1446 PetscErrorCode ierr; 1447 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1448 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1449 MatScalar *a; 1450 1451 PetscFunctionBegin; 1452 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1453 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1454 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1455 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1456 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1457 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1458 } else { 1459 B = *matout; 1460 } 1461 1462 /* copy over the A part */ 1463 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1464 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1465 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1466 1467 for (i=0; i<mbs; i++) { 1468 rvals[0] = bs*(baij->rstartbs + i); 1469 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1470 for (j=ai[i]; j<ai[i+1]; j++) { 1471 col = (baij->cstartbs+aj[j])*bs; 1472 for (k=0; k<bs; k++) { 1473 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1474 col++; a += bs; 1475 } 1476 } 1477 } 1478 /* copy over the B part */ 1479 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1480 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1481 for (i=0; i<mbs; i++) { 1482 rvals[0] = bs*(baij->rstartbs + i); 1483 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1484 for (j=ai[i]; j<ai[i+1]; j++) { 1485 col = baij->garray[aj[j]]*bs; 1486 for (k=0; k<bs; k++) { 1487 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1488 col++; a += bs; 1489 } 1490 } 1491 } 1492 ierr = PetscFree(rvals);CHKERRQ(ierr); 1493 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1494 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1495 1496 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1497 *matout = B; 1498 } else { 1499 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1500 } 1501 PetscFunctionReturn(0); 1502 } 1503 1504 #undef __FUNCT__ 1505 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1506 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1507 { 1508 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1509 Mat a = baij->A,b = baij->B; 1510 PetscErrorCode ierr; 1511 PetscInt s1,s2,s3; 1512 1513 PetscFunctionBegin; 1514 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1515 if (rr) { 1516 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1517 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1518 /* Overlap communication with computation. */ 1519 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1520 } 1521 if (ll) { 1522 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1523 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1524 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1525 } 1526 /* scale the diagonal block */ 1527 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1528 1529 if (rr) { 1530 /* Do a scatter end and then right scale the off-diagonal block */ 1531 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1532 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1533 } 1534 1535 PetscFunctionReturn(0); 1536 } 1537 1538 #undef __FUNCT__ 1539 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1540 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 1541 { 1542 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1543 PetscErrorCode ierr; 1544 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1545 PetscInt i,*owners = A->rmap->range; 1546 PetscInt *nprocs,j,idx,nsends,row; 1547 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1548 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1549 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1550 MPI_Comm comm = ((PetscObject)A)->comm; 1551 MPI_Request *send_waits,*recv_waits; 1552 MPI_Status recv_status,*send_status; 1553 #if defined(PETSC_DEBUG) 1554 PetscTruth found = PETSC_FALSE; 1555 #endif 1556 1557 PetscFunctionBegin; 1558 /* first count number of contributors to each processor */ 1559 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1560 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1561 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1562 j = 0; 1563 for (i=0; i<N; i++) { 1564 if (lastidx > (idx = rows[i])) j = 0; 1565 lastidx = idx; 1566 for (; j<size; j++) { 1567 if (idx >= owners[j] && idx < owners[j+1]) { 1568 nprocs[2*j]++; 1569 nprocs[2*j+1] = 1; 1570 owner[i] = j; 1571 #if defined(PETSC_DEBUG) 1572 found = PETSC_TRUE; 1573 #endif 1574 break; 1575 } 1576 } 1577 #if defined(PETSC_DEBUG) 1578 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1579 found = PETSC_FALSE; 1580 #endif 1581 } 1582 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1583 1584 /* inform other processors of number of messages and max length*/ 1585 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1586 1587 /* post receives: */ 1588 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1589 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1590 for (i=0; i<nrecvs; i++) { 1591 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1592 } 1593 1594 /* do sends: 1595 1) starts[i] gives the starting index in svalues for stuff going to 1596 the ith processor 1597 */ 1598 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1599 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1600 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1601 starts[0] = 0; 1602 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1603 for (i=0; i<N; i++) { 1604 svalues[starts[owner[i]]++] = rows[i]; 1605 } 1606 1607 starts[0] = 0; 1608 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1609 count = 0; 1610 for (i=0; i<size; i++) { 1611 if (nprocs[2*i+1]) { 1612 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1613 } 1614 } 1615 ierr = PetscFree(starts);CHKERRQ(ierr); 1616 1617 base = owners[rank]; 1618 1619 /* wait on receives */ 1620 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1621 count = nrecvs; 1622 slen = 0; 1623 while (count) { 1624 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1625 /* unpack receives into our local space */ 1626 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1627 source[imdex] = recv_status.MPI_SOURCE; 1628 lens[imdex] = n; 1629 slen += n; 1630 count--; 1631 } 1632 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1633 1634 /* move the data into the send scatter */ 1635 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1636 count = 0; 1637 for (i=0; i<nrecvs; i++) { 1638 values = rvalues + i*nmax; 1639 for (j=0; j<lens[i]; j++) { 1640 lrows[count++] = values[j] - base; 1641 } 1642 } 1643 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1644 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1645 ierr = PetscFree(owner);CHKERRQ(ierr); 1646 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1647 1648 /* actually zap the local rows */ 1649 /* 1650 Zero the required rows. If the "diagonal block" of the matrix 1651 is square and the user wishes to set the diagonal we use separate 1652 code so that MatSetValues() is not called for each diagonal allocating 1653 new memory, thus calling lots of mallocs and slowing things down. 1654 1655 */ 1656 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1657 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1658 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1659 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1660 } else if (diag != 0.0) { 1661 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1662 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1663 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1664 for (i=0; i<slen; i++) { 1665 row = lrows[i] + rstart_bs; 1666 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1667 } 1668 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1669 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1670 } else { 1671 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1672 } 1673 1674 ierr = PetscFree(lrows);CHKERRQ(ierr); 1675 1676 /* wait on sends */ 1677 if (nsends) { 1678 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1679 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1680 ierr = PetscFree(send_status);CHKERRQ(ierr); 1681 } 1682 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1683 ierr = PetscFree(svalues);CHKERRQ(ierr); 1684 1685 PetscFunctionReturn(0); 1686 } 1687 1688 #undef __FUNCT__ 1689 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1690 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1691 { 1692 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1693 PetscErrorCode ierr; 1694 1695 PetscFunctionBegin; 1696 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1697 PetscFunctionReturn(0); 1698 } 1699 1700 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1701 1702 #undef __FUNCT__ 1703 #define __FUNCT__ "MatEqual_MPIBAIJ" 1704 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1705 { 1706 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1707 Mat a,b,c,d; 1708 PetscTruth flg; 1709 PetscErrorCode ierr; 1710 1711 PetscFunctionBegin; 1712 a = matA->A; b = matA->B; 1713 c = matB->A; d = matB->B; 1714 1715 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1716 if (flg) { 1717 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1718 } 1719 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1720 PetscFunctionReturn(0); 1721 } 1722 1723 #undef __FUNCT__ 1724 #define __FUNCT__ "MatCopy_MPIBAIJ" 1725 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1726 { 1727 PetscErrorCode ierr; 1728 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1729 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1730 1731 PetscFunctionBegin; 1732 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1733 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1734 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1735 } else { 1736 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1737 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1738 } 1739 PetscFunctionReturn(0); 1740 } 1741 1742 #undef __FUNCT__ 1743 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1744 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1745 { 1746 PetscErrorCode ierr; 1747 1748 PetscFunctionBegin; 1749 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1750 PetscFunctionReturn(0); 1751 } 1752 1753 #undef __FUNCT__ 1754 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1755 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1756 { 1757 PetscErrorCode ierr; 1758 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1759 PetscBLASInt bnz,one=1; 1760 Mat_SeqBAIJ *x,*y; 1761 1762 PetscFunctionBegin; 1763 if (str == SAME_NONZERO_PATTERN) { 1764 PetscScalar alpha = a; 1765 x = (Mat_SeqBAIJ *)xx->A->data; 1766 y = (Mat_SeqBAIJ *)yy->A->data; 1767 bnz = PetscBLASIntCast(x->nz); 1768 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1769 x = (Mat_SeqBAIJ *)xx->B->data; 1770 y = (Mat_SeqBAIJ *)yy->B->data; 1771 bnz = PetscBLASIntCast(x->nz); 1772 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1773 } else { 1774 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1775 } 1776 PetscFunctionReturn(0); 1777 } 1778 1779 #undef __FUNCT__ 1780 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ" 1781 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs) 1782 { 1783 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1784 PetscInt rbs,cbs; 1785 PetscErrorCode ierr; 1786 1787 PetscFunctionBegin; 1788 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1789 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1790 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 1791 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 1792 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 1793 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 1794 PetscFunctionReturn(0); 1795 } 1796 1797 #undef __FUNCT__ 1798 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1799 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1800 { 1801 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1802 PetscErrorCode ierr; 1803 1804 PetscFunctionBegin; 1805 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1806 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1807 PetscFunctionReturn(0); 1808 } 1809 1810 #undef __FUNCT__ 1811 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1812 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1813 { 1814 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1815 PetscErrorCode ierr; 1816 1817 PetscFunctionBegin; 1818 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1819 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1820 PetscFunctionReturn(0); 1821 } 1822 1823 #undef __FUNCT__ 1824 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 1825 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1826 { 1827 PetscErrorCode ierr; 1828 IS iscol_local; 1829 PetscInt csize; 1830 1831 PetscFunctionBegin; 1832 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1833 if (call == MAT_REUSE_MATRIX) { 1834 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1835 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1836 } else { 1837 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1838 } 1839 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1840 if (call == MAT_INITIAL_MATRIX) { 1841 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1842 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 1843 } 1844 PetscFunctionReturn(0); 1845 } 1846 1847 #undef __FUNCT__ 1848 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 1849 /* 1850 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1851 in local and then by concatenating the local matrices the end result. 1852 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 1853 */ 1854 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 1855 { 1856 PetscErrorCode ierr; 1857 PetscMPIInt rank,size; 1858 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 1859 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 1860 Mat *local,M,Mreuse; 1861 MatScalar *vwork,*aa; 1862 MPI_Comm comm = ((PetscObject)mat)->comm; 1863 Mat_SeqBAIJ *aij; 1864 1865 1866 PetscFunctionBegin; 1867 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1868 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1869 1870 if (call == MAT_REUSE_MATRIX) { 1871 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1872 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1873 local = &Mreuse; 1874 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1875 } else { 1876 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1877 Mreuse = *local; 1878 ierr = PetscFree(local);CHKERRQ(ierr); 1879 } 1880 1881 /* 1882 m - number of local rows 1883 n - number of columns (same on all processors) 1884 rstart - first row in new global matrix generated 1885 */ 1886 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 1887 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1888 m = m/bs; 1889 n = n/bs; 1890 1891 if (call == MAT_INITIAL_MATRIX) { 1892 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1893 ii = aij->i; 1894 jj = aij->j; 1895 1896 /* 1897 Determine the number of non-zeros in the diagonal and off-diagonal 1898 portions of the matrix in order to do correct preallocation 1899 */ 1900 1901 /* first get start and end of "diagonal" columns */ 1902 if (csize == PETSC_DECIDE) { 1903 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 1904 if (mglobal == n*bs) { /* square matrix */ 1905 nlocal = m; 1906 } else { 1907 nlocal = n/size + ((n % size) > rank); 1908 } 1909 } else { 1910 nlocal = csize/bs; 1911 } 1912 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1913 rstart = rend - nlocal; 1914 if (rank == size - 1 && rend != n) { 1915 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 1916 } 1917 1918 /* next, compute all the lengths */ 1919 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 1920 olens = dlens + m; 1921 for (i=0; i<m; i++) { 1922 jend = ii[i+1] - ii[i]; 1923 olen = 0; 1924 dlen = 0; 1925 for (j=0; j<jend; j++) { 1926 if (*jj < rstart || *jj >= rend) olen++; 1927 else dlen++; 1928 jj++; 1929 } 1930 olens[i] = olen; 1931 dlens[i] = dlen; 1932 } 1933 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 1934 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 1935 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 1936 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 1937 ierr = PetscFree(dlens);CHKERRQ(ierr); 1938 } else { 1939 PetscInt ml,nl; 1940 1941 M = *newmat; 1942 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1943 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1944 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1945 /* 1946 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1947 rather than the slower MatSetValues(). 1948 */ 1949 M->was_assembled = PETSC_TRUE; 1950 M->assembled = PETSC_FALSE; 1951 } 1952 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1953 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1954 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1955 ii = aij->i; 1956 jj = aij->j; 1957 aa = aij->a; 1958 for (i=0; i<m; i++) { 1959 row = rstart/bs + i; 1960 nz = ii[i+1] - ii[i]; 1961 cwork = jj; jj += nz; 1962 vwork = aa; aa += nz; 1963 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1964 } 1965 1966 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1967 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1968 *newmat = M; 1969 1970 /* save submatrix used in processor for next request */ 1971 if (call == MAT_INITIAL_MATRIX) { 1972 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 1973 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 1974 } 1975 1976 PetscFunctionReturn(0); 1977 } 1978 1979 #undef __FUNCT__ 1980 #define __FUNCT__ "MatPermute_MPIBAIJ" 1981 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 1982 { 1983 MPI_Comm comm,pcomm; 1984 PetscInt first,local_size,nrows; 1985 const PetscInt *rows; 1986 PetscMPIInt size; 1987 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1988 PetscErrorCode ierr; 1989 1990 PetscFunctionBegin; 1991 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1992 /* make a collective version of 'rowp' */ 1993 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1994 if (pcomm==comm) { 1995 crowp = rowp; 1996 } else { 1997 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1998 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1999 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 2000 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2001 } 2002 /* collect the global row permutation and invert it */ 2003 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2004 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2005 if (pcomm!=comm) { 2006 ierr = ISDestroy(crowp);CHKERRQ(ierr); 2007 } 2008 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2009 /* get the local target indices */ 2010 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2011 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2012 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2013 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 2014 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2015 ierr = ISDestroy(irowp);CHKERRQ(ierr); 2016 /* the column permutation is so much easier; 2017 make a local version of 'colp' and invert it */ 2018 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2019 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2020 if (size==1) { 2021 lcolp = colp; 2022 } else { 2023 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2024 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2025 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 2026 } 2027 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2028 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2029 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2030 if (size>1) { 2031 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2032 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 2033 } 2034 /* now we just get the submatrix */ 2035 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2036 /* clean up */ 2037 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 2038 ierr = ISDestroy(icolp);CHKERRQ(ierr); 2039 PetscFunctionReturn(0); 2040 } 2041 2042 #undef __FUNCT__ 2043 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2044 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2045 { 2046 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2047 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2048 2049 PetscFunctionBegin; 2050 if (nghosts) { *nghosts = B->nbs;} 2051 if (ghosts) {*ghosts = baij->garray;} 2052 PetscFunctionReturn(0); 2053 } 2054 2055 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat); 2056 2057 #undef __FUNCT__ 2058 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2059 /* 2060 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2061 */ 2062 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2063 { 2064 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2065 PetscErrorCode ierr; 2066 PetscMPIInt size,*ncolsonproc,*disp,nn; 2067 PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col; 2068 const PetscInt *is; 2069 PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj; 2070 PetscInt *rowhit,M,cstart,cend,colb; 2071 PetscInt *columnsforrow,l; 2072 IS *isa; 2073 PetscTruth done,flg; 2074 ISLocalToGlobalMapping map = mat->bmapping; 2075 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2076 2077 PetscFunctionBegin; 2078 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2079 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2080 2081 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2082 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2083 M = mat->rmap->n/bs; 2084 cstart = mat->cmap->rstart/bs; 2085 cend = mat->cmap->rend/bs; 2086 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2087 c->N = mat->cmap->N/bs; 2088 c->m = mat->rmap->n/bs; 2089 c->rstart = mat->rmap->rstart/bs; 2090 2091 c->ncolors = nis; 2092 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2093 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2094 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2095 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2096 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2097 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2098 2099 /* Allow access to data structures of local part of matrix */ 2100 if (!baij->colmap) { 2101 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2102 } 2103 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2104 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2105 2106 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2107 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2108 2109 for (i=0; i<nis; i++) { 2110 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2111 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2112 c->ncolumns[i] = n; 2113 if (n) { 2114 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2115 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2116 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2117 } else { 2118 c->columns[i] = 0; 2119 } 2120 2121 if (ctype == IS_COLORING_GLOBAL){ 2122 /* Determine the total (parallel) number of columns of this color */ 2123 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2124 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2125 2126 nn = PetscMPIIntCast(n); 2127 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2128 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2129 if (!nctot) { 2130 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2131 } 2132 2133 disp[0] = 0; 2134 for (j=1; j<size; j++) { 2135 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2136 } 2137 2138 /* Get complete list of columns for color on each processor */ 2139 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2140 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2141 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2142 } else if (ctype == IS_COLORING_GHOSTED){ 2143 /* Determine local number of columns of this color on this process, including ghost points */ 2144 nctot = n; 2145 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2146 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2147 } else { 2148 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2149 } 2150 2151 /* 2152 Mark all rows affect by these columns 2153 */ 2154 /* Temporary option to allow for debugging/testing */ 2155 flg = PETSC_FALSE; 2156 ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2157 if (!flg) {/*-----------------------------------------------------------------------------*/ 2158 /* crude, fast version */ 2159 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2160 /* loop over columns*/ 2161 for (j=0; j<nctot; j++) { 2162 if (ctype == IS_COLORING_GHOSTED) { 2163 col = ltog[cols[j]]; 2164 } else { 2165 col = cols[j]; 2166 } 2167 if (col >= cstart && col < cend) { 2168 /* column is in diagonal block of matrix */ 2169 rows = A_cj + A_ci[col-cstart]; 2170 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2171 } else { 2172 #if defined (PETSC_USE_CTABLE) 2173 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr) 2174 colb --; 2175 #else 2176 colb = baij->colmap[col] - 1; 2177 #endif 2178 if (colb == -1) { 2179 m = 0; 2180 } else { 2181 colb = colb/bs; 2182 rows = B_cj + B_ci[colb]; 2183 m = B_ci[colb+1] - B_ci[colb]; 2184 } 2185 } 2186 /* loop over columns marking them in rowhit */ 2187 for (k=0; k<m; k++) { 2188 rowhit[*rows++] = col + 1; 2189 } 2190 } 2191 2192 /* count the number of hits */ 2193 nrows = 0; 2194 for (j=0; j<M; j++) { 2195 if (rowhit[j]) nrows++; 2196 } 2197 c->nrows[i] = nrows; 2198 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2199 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2200 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2201 nrows = 0; 2202 for (j=0; j<M; j++) { 2203 if (rowhit[j]) { 2204 c->rows[i][nrows] = j; 2205 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2206 nrows++; 2207 } 2208 } 2209 } else {/*-------------------------------------------------------------------------------*/ 2210 /* slow version, using rowhit as a linked list */ 2211 PetscInt currentcol,fm,mfm; 2212 rowhit[M] = M; 2213 nrows = 0; 2214 /* loop over columns*/ 2215 for (j=0; j<nctot; j++) { 2216 if (ctype == IS_COLORING_GHOSTED) { 2217 col = ltog[cols[j]]; 2218 } else { 2219 col = cols[j]; 2220 } 2221 if (col >= cstart && col < cend) { 2222 /* column is in diagonal block of matrix */ 2223 rows = A_cj + A_ci[col-cstart]; 2224 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2225 } else { 2226 #if defined (PETSC_USE_CTABLE) 2227 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2228 colb --; 2229 #else 2230 colb = baij->colmap[col] - 1; 2231 #endif 2232 if (colb == -1) { 2233 m = 0; 2234 } else { 2235 colb = colb/bs; 2236 rows = B_cj + B_ci[colb]; 2237 m = B_ci[colb+1] - B_ci[colb]; 2238 } 2239 } 2240 2241 /* loop over columns marking them in rowhit */ 2242 fm = M; /* fm points to first entry in linked list */ 2243 for (k=0; k<m; k++) { 2244 currentcol = *rows++; 2245 /* is it already in the list? */ 2246 do { 2247 mfm = fm; 2248 fm = rowhit[fm]; 2249 } while (fm < currentcol); 2250 /* not in list so add it */ 2251 if (fm != currentcol) { 2252 nrows++; 2253 columnsforrow[currentcol] = col; 2254 /* next three lines insert new entry into linked list */ 2255 rowhit[mfm] = currentcol; 2256 rowhit[currentcol] = fm; 2257 fm = currentcol; 2258 /* fm points to present position in list since we know the columns are sorted */ 2259 } else { 2260 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2261 } 2262 } 2263 } 2264 c->nrows[i] = nrows; 2265 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2266 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2267 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2268 /* now store the linked list of rows into c->rows[i] */ 2269 nrows = 0; 2270 fm = rowhit[M]; 2271 do { 2272 c->rows[i][nrows] = fm; 2273 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2274 fm = rowhit[fm]; 2275 } while (fm < M); 2276 } /* ---------------------------------------------------------------------------------------*/ 2277 ierr = PetscFree(cols);CHKERRQ(ierr); 2278 } 2279 2280 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2281 /* 2282 vscale will contain the "diagonal" on processor scalings followed by the off processor 2283 */ 2284 if (ctype == IS_COLORING_GLOBAL) { 2285 PetscInt *garray; 2286 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2287 for (i=0; i<baij->B->cmap->n/bs; i++) { 2288 for (j=0; j<bs; j++) { 2289 garray[i*bs+j] = bs*baij->garray[i]+j; 2290 } 2291 } 2292 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2293 ierr = PetscFree(garray);CHKERRQ(ierr); 2294 CHKMEMQ; 2295 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2296 for (k=0; k<c->ncolors; k++) { 2297 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2298 for (l=0; l<c->nrows[k]; l++) { 2299 col = c->columnsforrow[k][l]; 2300 if (col >= cstart && col < cend) { 2301 /* column is in diagonal block of matrix */ 2302 colb = col - cstart; 2303 } else { 2304 /* column is in "off-processor" part */ 2305 #if defined (PETSC_USE_CTABLE) 2306 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2307 colb --; 2308 #else 2309 colb = baij->colmap[col] - 1; 2310 #endif 2311 colb = colb/bs; 2312 colb += cend - cstart; 2313 } 2314 c->vscaleforrow[k][l] = colb; 2315 } 2316 } 2317 } else if (ctype == IS_COLORING_GHOSTED) { 2318 /* Get gtol mapping */ 2319 PetscInt N = mat->cmap->N, *gtol; 2320 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2321 for (i=0; i<N; i++) gtol[i] = -1; 2322 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2323 2324 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2325 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2326 for (k=0; k<c->ncolors; k++) { 2327 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2328 for (l=0; l<c->nrows[k]; l++) { 2329 col = c->columnsforrow[k][l]; /* global column index */ 2330 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2331 } 2332 } 2333 ierr = PetscFree(gtol);CHKERRQ(ierr); 2334 } 2335 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2336 2337 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2338 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2339 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2340 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2341 CHKMEMQ; 2342 PetscFunctionReturn(0); 2343 } 2344 2345 #undef __FUNCT__ 2346 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ" 2347 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat) 2348 { 2349 Mat B; 2350 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2351 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2352 Mat_SeqAIJ *b; 2353 PetscErrorCode ierr; 2354 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2355 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2356 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2357 2358 PetscFunctionBegin; 2359 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2360 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2361 2362 /* ---------------------------------------------------------------- 2363 Tell every processor the number of nonzeros per row 2364 */ 2365 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2366 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2367 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2368 } 2369 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2370 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2371 displs = recvcounts + size; 2372 for (i=0; i<size; i++) { 2373 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2374 displs[i] = A->rmap->range[i]/bs; 2375 } 2376 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2377 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2378 #else 2379 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2380 #endif 2381 /* --------------------------------------------------------------- 2382 Create the sequential matrix of the same type as the local block diagonal 2383 */ 2384 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2385 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2386 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2387 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2388 b = (Mat_SeqAIJ *)B->data; 2389 2390 /*-------------------------------------------------------------------- 2391 Copy my part of matrix column indices over 2392 */ 2393 sendcount = ad->nz + bd->nz; 2394 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2395 a_jsendbuf = ad->j; 2396 b_jsendbuf = bd->j; 2397 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2398 cnt = 0; 2399 for (i=0; i<n; i++) { 2400 2401 /* put in lower diagonal portion */ 2402 m = bd->i[i+1] - bd->i[i]; 2403 while (m > 0) { 2404 /* is it above diagonal (in bd (compressed) numbering) */ 2405 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2406 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2407 m--; 2408 } 2409 2410 /* put in diagonal portion */ 2411 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2412 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2413 } 2414 2415 /* put in upper diagonal portion */ 2416 while (m-- > 0) { 2417 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2418 } 2419 } 2420 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2421 2422 /*-------------------------------------------------------------------- 2423 Gather all column indices to all processors 2424 */ 2425 for (i=0; i<size; i++) { 2426 recvcounts[i] = 0; 2427 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2428 recvcounts[i] += lens[j]; 2429 } 2430 } 2431 displs[0] = 0; 2432 for (i=1; i<size; i++) { 2433 displs[i] = displs[i-1] + recvcounts[i-1]; 2434 } 2435 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2436 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2437 #else 2438 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2439 #endif 2440 /*-------------------------------------------------------------------- 2441 Assemble the matrix into useable form (note numerical values not yet set) 2442 */ 2443 /* set the b->ilen (length of each row) values */ 2444 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2445 /* set the b->i indices */ 2446 b->i[0] = 0; 2447 for (i=1; i<=A->rmap->N/bs; i++) { 2448 b->i[i] = b->i[i-1] + lens[i-1]; 2449 } 2450 ierr = PetscFree(lens);CHKERRQ(ierr); 2451 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2452 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2453 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2454 2455 if (A->symmetric){ 2456 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2457 } else if (A->hermitian) { 2458 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2459 } else if (A->structurally_symmetric) { 2460 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2461 } 2462 *newmat = B; 2463 PetscFunctionReturn(0); 2464 } 2465 2466 #undef __FUNCT__ 2467 #define __FUNCT__ "MatSOR_MPIBAIJ" 2468 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2469 { 2470 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2471 PetscErrorCode ierr; 2472 Vec bb1 = 0; 2473 2474 PetscFunctionBegin; 2475 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2476 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2477 } 2478 2479 if (flag == SOR_APPLY_UPPER) { 2480 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2481 PetscFunctionReturn(0); 2482 } 2483 2484 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2485 if (flag & SOR_ZERO_INITIAL_GUESS) { 2486 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2487 its--; 2488 } 2489 2490 while (its--) { 2491 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2492 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2493 2494 /* update rhs: bb1 = bb - B*x */ 2495 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2496 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2497 2498 /* local sweep */ 2499 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2500 } 2501 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2502 if (flag & SOR_ZERO_INITIAL_GUESS) { 2503 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2504 its--; 2505 } 2506 while (its--) { 2507 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2508 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2509 2510 /* update rhs: bb1 = bb - B*x */ 2511 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2512 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2513 2514 /* local sweep */ 2515 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2516 } 2517 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2518 if (flag & SOR_ZERO_INITIAL_GUESS) { 2519 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2520 its--; 2521 } 2522 while (its--) { 2523 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2524 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2525 2526 /* update rhs: bb1 = bb - B*x */ 2527 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2528 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2529 2530 /* local sweep */ 2531 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2532 } 2533 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2534 2535 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 2536 PetscFunctionReturn(0); 2537 } 2538 2539 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2540 2541 2542 /* -------------------------------------------------------------------*/ 2543 static struct _MatOps MatOps_Values = { 2544 MatSetValues_MPIBAIJ, 2545 MatGetRow_MPIBAIJ, 2546 MatRestoreRow_MPIBAIJ, 2547 MatMult_MPIBAIJ, 2548 /* 4*/ MatMultAdd_MPIBAIJ, 2549 MatMultTranspose_MPIBAIJ, 2550 MatMultTransposeAdd_MPIBAIJ, 2551 0, 2552 0, 2553 0, 2554 /*10*/ 0, 2555 0, 2556 0, 2557 MatSOR_MPIBAIJ, 2558 MatTranspose_MPIBAIJ, 2559 /*15*/ MatGetInfo_MPIBAIJ, 2560 MatEqual_MPIBAIJ, 2561 MatGetDiagonal_MPIBAIJ, 2562 MatDiagonalScale_MPIBAIJ, 2563 MatNorm_MPIBAIJ, 2564 /*20*/ MatAssemblyBegin_MPIBAIJ, 2565 MatAssemblyEnd_MPIBAIJ, 2566 MatSetOption_MPIBAIJ, 2567 MatZeroEntries_MPIBAIJ, 2568 /*24*/ MatZeroRows_MPIBAIJ, 2569 0, 2570 0, 2571 0, 2572 0, 2573 /*29*/ MatSetUpPreallocation_MPIBAIJ, 2574 0, 2575 0, 2576 0, 2577 0, 2578 /*34*/ MatDuplicate_MPIBAIJ, 2579 0, 2580 0, 2581 0, 2582 0, 2583 /*39*/ MatAXPY_MPIBAIJ, 2584 MatGetSubMatrices_MPIBAIJ, 2585 MatIncreaseOverlap_MPIBAIJ, 2586 MatGetValues_MPIBAIJ, 2587 MatCopy_MPIBAIJ, 2588 /*44*/ 0, 2589 MatScale_MPIBAIJ, 2590 0, 2591 0, 2592 0, 2593 /*49*/ MatSetBlockSize_MPIBAIJ, 2594 0, 2595 0, 2596 0, 2597 0, 2598 /*54*/ MatFDColoringCreate_MPIBAIJ, 2599 0, 2600 MatSetUnfactored_MPIBAIJ, 2601 MatPermute_MPIBAIJ, 2602 MatSetValuesBlocked_MPIBAIJ, 2603 /*59*/ MatGetSubMatrix_MPIBAIJ, 2604 MatDestroy_MPIBAIJ, 2605 MatView_MPIBAIJ, 2606 0, 2607 0, 2608 /*64*/ 0, 2609 0, 2610 0, 2611 0, 2612 0, 2613 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2614 0, 2615 0, 2616 0, 2617 0, 2618 /*74*/ 0, 2619 MatFDColoringApply_BAIJ, 2620 0, 2621 0, 2622 0, 2623 /*79*/ 0, 2624 0, 2625 0, 2626 0, 2627 MatLoad_MPIBAIJ, 2628 /*84*/ 0, 2629 0, 2630 0, 2631 0, 2632 0, 2633 /*89*/ 0, 2634 0, 2635 0, 2636 0, 2637 0, 2638 /*94*/ 0, 2639 0, 2640 0, 2641 0, 2642 0, 2643 /*99*/ 0, 2644 0, 2645 0, 2646 0, 2647 0, 2648 /*104*/0, 2649 MatRealPart_MPIBAIJ, 2650 MatImaginaryPart_MPIBAIJ, 2651 0, 2652 0, 2653 /*109*/0, 2654 0, 2655 0, 2656 0, 2657 0, 2658 /*114*/MatGetSeqNonzerostructure_MPIBAIJ, 2659 0, 2660 MatGetGhosts_MPIBAIJ 2661 }; 2662 2663 EXTERN_C_BEGIN 2664 #undef __FUNCT__ 2665 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2666 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2667 { 2668 PetscFunctionBegin; 2669 *a = ((Mat_MPIBAIJ *)A->data)->A; 2670 *iscopy = PETSC_FALSE; 2671 PetscFunctionReturn(0); 2672 } 2673 EXTERN_C_END 2674 2675 EXTERN_C_BEGIN 2676 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2677 EXTERN_C_END 2678 2679 EXTERN_C_BEGIN 2680 #undef __FUNCT__ 2681 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2682 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2683 { 2684 PetscInt m,rstart,cstart,cend; 2685 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2686 const PetscInt *JJ=0; 2687 PetscScalar *values=0; 2688 PetscErrorCode ierr; 2689 2690 PetscFunctionBegin; 2691 2692 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2693 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2694 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2695 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2696 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2697 m = B->rmap->n/bs; 2698 rstart = B->rmap->rstart/bs; 2699 cstart = B->cmap->rstart/bs; 2700 cend = B->cmap->rend/bs; 2701 2702 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2703 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2704 for (i=0; i<m; i++) { 2705 nz = ii[i+1] - ii[i]; 2706 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2707 nz_max = PetscMax(nz_max,nz); 2708 JJ = jj + ii[i]; 2709 for (j=0; j<nz; j++) { 2710 if (*JJ >= cstart) break; 2711 JJ++; 2712 } 2713 d = 0; 2714 for (; j<nz; j++) { 2715 if (*JJ++ >= cend) break; 2716 d++; 2717 } 2718 d_nnz[i] = d; 2719 o_nnz[i] = nz - d; 2720 } 2721 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2722 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2723 2724 values = (PetscScalar*)V; 2725 if (!values) { 2726 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2727 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2728 } 2729 for (i=0; i<m; i++) { 2730 PetscInt row = i + rstart; 2731 PetscInt ncols = ii[i+1] - ii[i]; 2732 const PetscInt *icols = jj + ii[i]; 2733 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2734 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2735 } 2736 2737 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2738 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2739 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2740 2741 PetscFunctionReturn(0); 2742 } 2743 EXTERN_C_END 2744 2745 #undef __FUNCT__ 2746 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2747 /*@C 2748 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2749 (the default parallel PETSc format). 2750 2751 Collective on MPI_Comm 2752 2753 Input Parameters: 2754 + A - the matrix 2755 . i - the indices into j for the start of each local row (starts with zero) 2756 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2757 - v - optional values in the matrix 2758 2759 Level: developer 2760 2761 .keywords: matrix, aij, compressed row, sparse, parallel 2762 2763 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2764 @*/ 2765 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2766 { 2767 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2768 2769 PetscFunctionBegin; 2770 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2771 if (f) { 2772 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2773 } 2774 PetscFunctionReturn(0); 2775 } 2776 2777 EXTERN_C_BEGIN 2778 #undef __FUNCT__ 2779 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2780 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2781 { 2782 Mat_MPIBAIJ *b; 2783 PetscErrorCode ierr; 2784 PetscInt i, newbs = PetscAbs(bs); 2785 2786 PetscFunctionBegin; 2787 if (bs < 0) { 2788 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2789 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2790 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2791 bs = PetscAbs(bs); 2792 } 2793 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 2794 bs = newbs; 2795 2796 2797 if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2798 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2799 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2800 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2801 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2802 2803 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2804 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2805 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2806 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2807 2808 if (d_nnz) { 2809 for (i=0; i<B->rmap->n/bs; i++) { 2810 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2811 } 2812 } 2813 if (o_nnz) { 2814 for (i=0; i<B->rmap->n/bs; i++) { 2815 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2816 } 2817 } 2818 2819 b = (Mat_MPIBAIJ*)B->data; 2820 b->bs2 = bs*bs; 2821 b->mbs = B->rmap->n/bs; 2822 b->nbs = B->cmap->n/bs; 2823 b->Mbs = B->rmap->N/bs; 2824 b->Nbs = B->cmap->N/bs; 2825 2826 for (i=0; i<=b->size; i++) { 2827 b->rangebs[i] = B->rmap->range[i]/bs; 2828 } 2829 b->rstartbs = B->rmap->rstart/bs; 2830 b->rendbs = B->rmap->rend/bs; 2831 b->cstartbs = B->cmap->rstart/bs; 2832 b->cendbs = B->cmap->rend/bs; 2833 2834 if (!B->preallocated) { 2835 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2836 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2837 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2838 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2839 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2840 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2841 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2842 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2843 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 2844 } 2845 2846 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2847 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2848 B->preallocated = PETSC_TRUE; 2849 PetscFunctionReturn(0); 2850 } 2851 EXTERN_C_END 2852 2853 EXTERN_C_BEGIN 2854 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2855 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2856 EXTERN_C_END 2857 2858 2859 EXTERN_C_BEGIN 2860 #undef __FUNCT__ 2861 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 2862 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj) 2863 { 2864 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2865 PetscErrorCode ierr; 2866 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2867 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2868 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2869 2870 PetscFunctionBegin; 2871 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 2872 ii[0] = 0; 2873 CHKMEMQ; 2874 for (i=0; i<M; i++) { 2875 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2876 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2877 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2878 /* remove one from count of matrix has diagonal */ 2879 for (j=id[i]; j<id[i+1]; j++) { 2880 if (jd[j] == i) {ii[i+1]--;break;} 2881 } 2882 CHKMEMQ; 2883 } 2884 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2885 cnt = 0; 2886 for (i=0; i<M; i++) { 2887 for (j=io[i]; j<io[i+1]; j++) { 2888 if (garray[jo[j]] > rstart) break; 2889 jj[cnt++] = garray[jo[j]]; 2890 CHKMEMQ; 2891 } 2892 for (k=id[i]; k<id[i+1]; k++) { 2893 if (jd[k] != i) { 2894 jj[cnt++] = rstart + jd[k]; 2895 CHKMEMQ; 2896 } 2897 } 2898 for (;j<io[i+1]; j++) { 2899 jj[cnt++] = garray[jo[j]]; 2900 CHKMEMQ; 2901 } 2902 } 2903 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 2904 PetscFunctionReturn(0); 2905 } 2906 EXTERN_C_END 2907 2908 EXTERN_C_BEGIN 2909 #if defined(PETSC_HAVE_MUMPS) 2910 extern PetscErrorCode MatGetFactor_mpibaij_mumps(Mat,MatFactorType,Mat*); 2911 #endif 2912 EXTERN_C_END 2913 2914 /*MC 2915 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2916 2917 Options Database Keys: 2918 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2919 . -mat_block_size <bs> - set the blocksize used to store the matrix 2920 - -mat_use_hash_table <fact> 2921 2922 Level: beginner 2923 2924 .seealso: MatCreateMPIBAIJ 2925 M*/ 2926 2927 EXTERN_C_BEGIN 2928 #undef __FUNCT__ 2929 #define __FUNCT__ "MatCreate_MPIBAIJ" 2930 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2931 { 2932 Mat_MPIBAIJ *b; 2933 PetscErrorCode ierr; 2934 PetscTruth flg; 2935 2936 PetscFunctionBegin; 2937 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2938 B->data = (void*)b; 2939 2940 2941 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2942 B->mapping = 0; 2943 B->assembled = PETSC_FALSE; 2944 2945 B->insertmode = NOT_SET_VALUES; 2946 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 2947 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 2948 2949 /* build local table of row and column ownerships */ 2950 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 2951 2952 /* build cache for off array entries formed */ 2953 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 2954 b->donotstash = PETSC_FALSE; 2955 b->colmap = PETSC_NULL; 2956 b->garray = PETSC_NULL; 2957 b->roworiented = PETSC_TRUE; 2958 2959 /* stuff used in block assembly */ 2960 b->barray = 0; 2961 2962 /* stuff used for matrix vector multiply */ 2963 b->lvec = 0; 2964 b->Mvctx = 0; 2965 2966 /* stuff for MatGetRow() */ 2967 b->rowindices = 0; 2968 b->rowvalues = 0; 2969 b->getrowactive = PETSC_FALSE; 2970 2971 /* hash table stuff */ 2972 b->ht = 0; 2973 b->hd = 0; 2974 b->ht_size = 0; 2975 b->ht_flag = PETSC_FALSE; 2976 b->ht_fact = 0; 2977 b->ht_total_ct = 0; 2978 b->ht_insert_ct = 0; 2979 2980 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 2981 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2982 if (flg) { 2983 PetscReal fact = 1.39; 2984 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2985 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 2986 if (fact <= 1.0) fact = 1.39; 2987 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2988 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2989 } 2990 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2991 2992 #if defined(PETSC_HAVE_MUMPS) 2993 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_mpibaij_mumps",MatGetFactor_mpibaij_mumps);CHKERRQ(ierr); 2994 #endif 2995 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 2996 "MatConvert_MPIBAIJ_MPIAdj", 2997 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 2998 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2999 "MatStoreValues_MPIBAIJ", 3000 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3001 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3002 "MatRetrieveValues_MPIBAIJ", 3003 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3004 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3005 "MatGetDiagonalBlock_MPIBAIJ", 3006 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3007 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3008 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3009 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3010 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3011 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3012 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3013 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3014 "MatDiagonalScaleLocal_MPIBAIJ", 3015 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3016 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3017 "MatSetHashTableFactor_MPIBAIJ", 3018 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3019 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3020 PetscFunctionReturn(0); 3021 } 3022 EXTERN_C_END 3023 3024 /*MC 3025 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3026 3027 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3028 and MATMPIBAIJ otherwise. 3029 3030 Options Database Keys: 3031 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3032 3033 Level: beginner 3034 3035 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3036 M*/ 3037 3038 EXTERN_C_BEGIN 3039 #undef __FUNCT__ 3040 #define __FUNCT__ "MatCreate_BAIJ" 3041 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 3042 { 3043 PetscErrorCode ierr; 3044 PetscMPIInt size; 3045 3046 PetscFunctionBegin; 3047 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3048 if (size == 1) { 3049 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 3050 } else { 3051 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 3052 } 3053 PetscFunctionReturn(0); 3054 } 3055 EXTERN_C_END 3056 3057 #undef __FUNCT__ 3058 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3059 /*@C 3060 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3061 (block compressed row). For good matrix assembly performance 3062 the user should preallocate the matrix storage by setting the parameters 3063 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3064 performance can be increased by more than a factor of 50. 3065 3066 Collective on Mat 3067 3068 Input Parameters: 3069 + A - the matrix 3070 . bs - size of blockk 3071 . d_nz - number of block nonzeros per block row in diagonal portion of local 3072 submatrix (same for all local rows) 3073 . d_nnz - array containing the number of block nonzeros in the various block rows 3074 of the in diagonal portion of the local (possibly different for each block 3075 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3076 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3077 submatrix (same for all local rows). 3078 - o_nnz - array containing the number of nonzeros in the various block rows of the 3079 off-diagonal portion of the local submatrix (possibly different for 3080 each block row) or PETSC_NULL. 3081 3082 If the *_nnz parameter is given then the *_nz parameter is ignored 3083 3084 Options Database Keys: 3085 + -mat_block_size - size of the blocks to use 3086 - -mat_use_hash_table <fact> 3087 3088 Notes: 3089 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3090 than it must be used on all processors that share the object for that argument. 3091 3092 Storage Information: 3093 For a square global matrix we define each processor's diagonal portion 3094 to be its local rows and the corresponding columns (a square submatrix); 3095 each processor's off-diagonal portion encompasses the remainder of the 3096 local matrix (a rectangular submatrix). 3097 3098 The user can specify preallocated storage for the diagonal part of 3099 the local submatrix with either d_nz or d_nnz (not both). Set 3100 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3101 memory allocation. Likewise, specify preallocated storage for the 3102 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3103 3104 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3105 the figure below we depict these three local rows and all columns (0-11). 3106 3107 .vb 3108 0 1 2 3 4 5 6 7 8 9 10 11 3109 ------------------- 3110 row 3 | o o o d d d o o o o o o 3111 row 4 | o o o d d d o o o o o o 3112 row 5 | o o o d d d o o o o o o 3113 ------------------- 3114 .ve 3115 3116 Thus, any entries in the d locations are stored in the d (diagonal) 3117 submatrix, and any entries in the o locations are stored in the 3118 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3119 stored simply in the MATSEQBAIJ format for compressed row storage. 3120 3121 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3122 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3123 In general, for PDE problems in which most nonzeros are near the diagonal, 3124 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3125 or you will get TERRIBLE performance; see the users' manual chapter on 3126 matrices. 3127 3128 You can call MatGetInfo() to get information on how effective the preallocation was; 3129 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3130 You can also run with the option -info and look for messages with the string 3131 malloc in them to see if additional memory allocation was needed. 3132 3133 Level: intermediate 3134 3135 .keywords: matrix, block, aij, compressed row, sparse, parallel 3136 3137 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 3138 @*/ 3139 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3140 { 3141 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3142 3143 PetscFunctionBegin; 3144 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3145 if (f) { 3146 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3147 } 3148 PetscFunctionReturn(0); 3149 } 3150 3151 #undef __FUNCT__ 3152 #define __FUNCT__ "MatCreateMPIBAIJ" 3153 /*@C 3154 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 3155 (block compressed row). For good matrix assembly performance 3156 the user should preallocate the matrix storage by setting the parameters 3157 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3158 performance can be increased by more than a factor of 50. 3159 3160 Collective on MPI_Comm 3161 3162 Input Parameters: 3163 + comm - MPI communicator 3164 . bs - size of blockk 3165 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3166 This value should be the same as the local size used in creating the 3167 y vector for the matrix-vector product y = Ax. 3168 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3169 This value should be the same as the local size used in creating the 3170 x vector for the matrix-vector product y = Ax. 3171 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3172 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3173 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3174 submatrix (same for all local rows) 3175 . d_nnz - array containing the number of nonzero blocks in the various block rows 3176 of the in diagonal portion of the local (possibly different for each block 3177 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3178 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3179 submatrix (same for all local rows). 3180 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3181 off-diagonal portion of the local submatrix (possibly different for 3182 each block row) or PETSC_NULL. 3183 3184 Output Parameter: 3185 . A - the matrix 3186 3187 Options Database Keys: 3188 + -mat_block_size - size of the blocks to use 3189 - -mat_use_hash_table <fact> 3190 3191 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3192 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3193 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3194 3195 Notes: 3196 If the *_nnz parameter is given then the *_nz parameter is ignored 3197 3198 A nonzero block is any block that as 1 or more nonzeros in it 3199 3200 The user MUST specify either the local or global matrix dimensions 3201 (possibly both). 3202 3203 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3204 than it must be used on all processors that share the object for that argument. 3205 3206 Storage Information: 3207 For a square global matrix we define each processor's diagonal portion 3208 to be its local rows and the corresponding columns (a square submatrix); 3209 each processor's off-diagonal portion encompasses the remainder of the 3210 local matrix (a rectangular submatrix). 3211 3212 The user can specify preallocated storage for the diagonal part of 3213 the local submatrix with either d_nz or d_nnz (not both). Set 3214 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3215 memory allocation. Likewise, specify preallocated storage for the 3216 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3217 3218 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3219 the figure below we depict these three local rows and all columns (0-11). 3220 3221 .vb 3222 0 1 2 3 4 5 6 7 8 9 10 11 3223 ------------------- 3224 row 3 | o o o d d d o o o o o o 3225 row 4 | o o o d d d o o o o o o 3226 row 5 | o o o d d d o o o o o o 3227 ------------------- 3228 .ve 3229 3230 Thus, any entries in the d locations are stored in the d (diagonal) 3231 submatrix, and any entries in the o locations are stored in the 3232 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3233 stored simply in the MATSEQBAIJ format for compressed row storage. 3234 3235 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3236 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3237 In general, for PDE problems in which most nonzeros are near the diagonal, 3238 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3239 or you will get TERRIBLE performance; see the users' manual chapter on 3240 matrices. 3241 3242 Level: intermediate 3243 3244 .keywords: matrix, block, aij, compressed row, sparse, parallel 3245 3246 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3247 @*/ 3248 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3249 { 3250 PetscErrorCode ierr; 3251 PetscMPIInt size; 3252 3253 PetscFunctionBegin; 3254 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3255 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3256 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3257 if (size > 1) { 3258 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3259 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3260 } else { 3261 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3262 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3263 } 3264 PetscFunctionReturn(0); 3265 } 3266 3267 #undef __FUNCT__ 3268 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3269 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3270 { 3271 Mat mat; 3272 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3273 PetscErrorCode ierr; 3274 PetscInt len=0; 3275 3276 PetscFunctionBegin; 3277 *newmat = 0; 3278 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3279 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3280 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3281 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3282 3283 mat->factortype = matin->factortype; 3284 mat->preallocated = PETSC_TRUE; 3285 mat->assembled = PETSC_TRUE; 3286 mat->insertmode = NOT_SET_VALUES; 3287 3288 a = (Mat_MPIBAIJ*)mat->data; 3289 mat->rmap->bs = matin->rmap->bs; 3290 a->bs2 = oldmat->bs2; 3291 a->mbs = oldmat->mbs; 3292 a->nbs = oldmat->nbs; 3293 a->Mbs = oldmat->Mbs; 3294 a->Nbs = oldmat->Nbs; 3295 3296 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3297 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3298 3299 a->size = oldmat->size; 3300 a->rank = oldmat->rank; 3301 a->donotstash = oldmat->donotstash; 3302 a->roworiented = oldmat->roworiented; 3303 a->rowindices = 0; 3304 a->rowvalues = 0; 3305 a->getrowactive = PETSC_FALSE; 3306 a->barray = 0; 3307 a->rstartbs = oldmat->rstartbs; 3308 a->rendbs = oldmat->rendbs; 3309 a->cstartbs = oldmat->cstartbs; 3310 a->cendbs = oldmat->cendbs; 3311 3312 /* hash table stuff */ 3313 a->ht = 0; 3314 a->hd = 0; 3315 a->ht_size = 0; 3316 a->ht_flag = oldmat->ht_flag; 3317 a->ht_fact = oldmat->ht_fact; 3318 a->ht_total_ct = 0; 3319 a->ht_insert_ct = 0; 3320 3321 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3322 if (oldmat->colmap) { 3323 #if defined (PETSC_USE_CTABLE) 3324 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3325 #else 3326 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3327 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3328 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3329 #endif 3330 } else a->colmap = 0; 3331 3332 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3333 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3334 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3335 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3336 } else a->garray = 0; 3337 3338 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3339 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3340 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3341 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3342 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3343 3344 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3345 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3346 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3347 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3348 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3349 *newmat = mat; 3350 3351 PetscFunctionReturn(0); 3352 } 3353 3354 #undef __FUNCT__ 3355 #define __FUNCT__ "MatLoad_MPIBAIJ" 3356 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 3357 { 3358 Mat A; 3359 PetscErrorCode ierr; 3360 int fd; 3361 PetscInt i,nz,j,rstart,rend; 3362 PetscScalar *vals,*buf; 3363 MPI_Comm comm = ((PetscObject)viewer)->comm; 3364 MPI_Status status; 3365 PetscMPIInt rank,size,maxnz; 3366 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3367 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3368 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3369 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3370 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3371 PetscInt dcount,kmax,k,nzcount,tmp,mend; 3372 3373 PetscFunctionBegin; 3374 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3375 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3376 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3377 3378 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3379 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3380 if (!rank) { 3381 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3382 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3383 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3384 } 3385 3386 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3387 M = header[1]; N = header[2]; 3388 3389 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3390 3391 /* 3392 This code adds extra rows to make sure the number of rows is 3393 divisible by the blocksize 3394 */ 3395 Mbs = M/bs; 3396 extra_rows = bs - M + bs*Mbs; 3397 if (extra_rows == bs) extra_rows = 0; 3398 else Mbs++; 3399 if (extra_rows && !rank) { 3400 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3401 } 3402 3403 /* determine ownership of all rows */ 3404 mbs = Mbs/size + ((Mbs % size) > rank); 3405 m = mbs*bs; 3406 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3407 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3408 3409 /* process 0 needs enough room for process with most rows */ 3410 if (!rank) { 3411 mmax = rowners[1]; 3412 for (i=2; i<size; i++) { 3413 mmax = PetscMax(mmax,rowners[i]); 3414 } 3415 mmax*=bs; 3416 } else mmax = m; 3417 3418 rowners[0] = 0; 3419 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3420 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3421 rstart = rowners[rank]; 3422 rend = rowners[rank+1]; 3423 3424 /* distribute row lengths to all processors */ 3425 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3426 if (!rank) { 3427 mend = m; 3428 if (size == 1) mend = mend - extra_rows; 3429 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3430 for (j=mend; j<m; j++) locrowlens[j] = 1; 3431 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3432 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3433 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3434 for (j=0; j<m; j++) { 3435 procsnz[0] += locrowlens[j]; 3436 } 3437 for (i=1; i<size; i++) { 3438 mend = browners[i+1] - browners[i]; 3439 if (i == size-1) mend = mend - extra_rows; 3440 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3441 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3442 /* calculate the number of nonzeros on each processor */ 3443 for (j=0; j<browners[i+1]-browners[i]; j++) { 3444 procsnz[i] += rowlengths[j]; 3445 } 3446 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3447 } 3448 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3449 } else { 3450 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3451 } 3452 3453 if (!rank) { 3454 /* determine max buffer needed and allocate it */ 3455 maxnz = procsnz[0]; 3456 for (i=1; i<size; i++) { 3457 maxnz = PetscMax(maxnz,procsnz[i]); 3458 } 3459 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3460 3461 /* read in my part of the matrix column indices */ 3462 nz = procsnz[0]; 3463 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3464 mycols = ibuf; 3465 if (size == 1) nz -= extra_rows; 3466 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3467 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3468 3469 /* read in every ones (except the last) and ship off */ 3470 for (i=1; i<size-1; i++) { 3471 nz = procsnz[i]; 3472 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3473 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3474 } 3475 /* read in the stuff for the last proc */ 3476 if (size != 1) { 3477 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3478 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3479 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3480 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3481 } 3482 ierr = PetscFree(cols);CHKERRQ(ierr); 3483 } else { 3484 /* determine buffer space needed for message */ 3485 nz = 0; 3486 for (i=0; i<m; i++) { 3487 nz += locrowlens[i]; 3488 } 3489 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3490 mycols = ibuf; 3491 /* receive message of column indices*/ 3492 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3493 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3494 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3495 } 3496 3497 /* loop over local rows, determining number of off diagonal entries */ 3498 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3499 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3500 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3501 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3502 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3503 rowcount = 0; nzcount = 0; 3504 for (i=0; i<mbs; i++) { 3505 dcount = 0; 3506 odcount = 0; 3507 for (j=0; j<bs; j++) { 3508 kmax = locrowlens[rowcount]; 3509 for (k=0; k<kmax; k++) { 3510 tmp = mycols[nzcount++]/bs; 3511 if (!mask[tmp]) { 3512 mask[tmp] = 1; 3513 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3514 else masked1[dcount++] = tmp; 3515 } 3516 } 3517 rowcount++; 3518 } 3519 3520 dlens[i] = dcount; 3521 odlens[i] = odcount; 3522 3523 /* zero out the mask elements we set */ 3524 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3525 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3526 } 3527 3528 /* create our matrix */ 3529 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 3530 ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3531 ierr = MatSetType(A,type);CHKERRQ(ierr) 3532 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3533 3534 if (!rank) { 3535 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3536 /* read in my part of the matrix numerical values */ 3537 nz = procsnz[0]; 3538 vals = buf; 3539 mycols = ibuf; 3540 if (size == 1) nz -= extra_rows; 3541 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3542 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3543 3544 /* insert into matrix */ 3545 jj = rstart*bs; 3546 for (i=0; i<m; i++) { 3547 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3548 mycols += locrowlens[i]; 3549 vals += locrowlens[i]; 3550 jj++; 3551 } 3552 /* read in other processors (except the last one) and ship out */ 3553 for (i=1; i<size-1; i++) { 3554 nz = procsnz[i]; 3555 vals = buf; 3556 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3557 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3558 } 3559 /* the last proc */ 3560 if (size != 1){ 3561 nz = procsnz[i] - extra_rows; 3562 vals = buf; 3563 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3564 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3565 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3566 } 3567 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3568 } else { 3569 /* receive numeric values */ 3570 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3571 3572 /* receive message of values*/ 3573 vals = buf; 3574 mycols = ibuf; 3575 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 3576 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3577 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3578 3579 /* insert into matrix */ 3580 jj = rstart*bs; 3581 for (i=0; i<m; i++) { 3582 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3583 mycols += locrowlens[i]; 3584 vals += locrowlens[i]; 3585 jj++; 3586 } 3587 } 3588 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3589 ierr = PetscFree(buf);CHKERRQ(ierr); 3590 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3591 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3592 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3593 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3594 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3595 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3596 3597 *newmat = A; 3598 PetscFunctionReturn(0); 3599 } 3600 3601 #undef __FUNCT__ 3602 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3603 /*@ 3604 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3605 3606 Input Parameters: 3607 . mat - the matrix 3608 . fact - factor 3609 3610 Collective on Mat 3611 3612 Level: advanced 3613 3614 Notes: 3615 This can also be set by the command line option: -mat_use_hash_table <fact> 3616 3617 .keywords: matrix, hashtable, factor, HT 3618 3619 .seealso: MatSetOption() 3620 @*/ 3621 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3622 { 3623 PetscErrorCode ierr,(*f)(Mat,PetscReal); 3624 3625 PetscFunctionBegin; 3626 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 3627 if (f) { 3628 ierr = (*f)(mat,fact);CHKERRQ(ierr); 3629 } 3630 PetscFunctionReturn(0); 3631 } 3632 3633 EXTERN_C_BEGIN 3634 #undef __FUNCT__ 3635 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3636 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3637 { 3638 Mat_MPIBAIJ *baij; 3639 3640 PetscFunctionBegin; 3641 baij = (Mat_MPIBAIJ*)mat->data; 3642 baij->ht_fact = fact; 3643 PetscFunctionReturn(0); 3644 } 3645 EXTERN_C_END 3646 3647 #undef __FUNCT__ 3648 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3649 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3650 { 3651 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3652 PetscFunctionBegin; 3653 *Ad = a->A; 3654 *Ao = a->B; 3655 *colmap = a->garray; 3656 PetscFunctionReturn(0); 3657 } 3658 3659 /* 3660 Special version for direct calls from Fortran (to eliminate two function call overheads 3661 */ 3662 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3663 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3664 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3665 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3666 #endif 3667 3668 #undef __FUNCT__ 3669 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3670 /*@C 3671 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3672 3673 Collective on Mat 3674 3675 Input Parameters: 3676 + mat - the matrix 3677 . min - number of input rows 3678 . im - input rows 3679 . nin - number of input columns 3680 . in - input columns 3681 . v - numerical values input 3682 - addvin - INSERT_VALUES or ADD_VALUES 3683 3684 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3685 3686 Level: advanced 3687 3688 .seealso: MatSetValuesBlocked() 3689 @*/ 3690 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3691 { 3692 /* convert input arguments to C version */ 3693 Mat mat = *matin; 3694 PetscInt m = *min, n = *nin; 3695 InsertMode addv = *addvin; 3696 3697 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3698 const MatScalar *value; 3699 MatScalar *barray=baij->barray; 3700 PetscTruth roworiented = baij->roworiented; 3701 PetscErrorCode ierr; 3702 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3703 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3704 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3705 3706 PetscFunctionBegin; 3707 /* tasks normally handled by MatSetValuesBlocked() */ 3708 if (mat->insertmode == NOT_SET_VALUES) { 3709 mat->insertmode = addv; 3710 } 3711 #if defined(PETSC_USE_DEBUG) 3712 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3713 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3714 #endif 3715 if (mat->assembled) { 3716 mat->was_assembled = PETSC_TRUE; 3717 mat->assembled = PETSC_FALSE; 3718 } 3719 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3720 3721 3722 if(!barray) { 3723 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3724 baij->barray = barray; 3725 } 3726 3727 if (roworiented) { 3728 stepval = (n-1)*bs; 3729 } else { 3730 stepval = (m-1)*bs; 3731 } 3732 for (i=0; i<m; i++) { 3733 if (im[i] < 0) continue; 3734 #if defined(PETSC_USE_DEBUG) 3735 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3736 #endif 3737 if (im[i] >= rstart && im[i] < rend) { 3738 row = im[i] - rstart; 3739 for (j=0; j<n; j++) { 3740 /* If NumCol = 1 then a copy is not required */ 3741 if ((roworiented) && (n == 1)) { 3742 barray = (MatScalar*)v + i*bs2; 3743 } else if((!roworiented) && (m == 1)) { 3744 barray = (MatScalar*)v + j*bs2; 3745 } else { /* Here a copy is required */ 3746 if (roworiented) { 3747 value = v + i*(stepval+bs)*bs + j*bs; 3748 } else { 3749 value = v + j*(stepval+bs)*bs + i*bs; 3750 } 3751 for (ii=0; ii<bs; ii++,value+=stepval) { 3752 for (jj=0; jj<bs; jj++) { 3753 *barray++ = *value++; 3754 } 3755 } 3756 barray -=bs2; 3757 } 3758 3759 if (in[j] >= cstart && in[j] < cend){ 3760 col = in[j] - cstart; 3761 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3762 } 3763 else if (in[j] < 0) continue; 3764 #if defined(PETSC_USE_DEBUG) 3765 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 3766 #endif 3767 else { 3768 if (mat->was_assembled) { 3769 if (!baij->colmap) { 3770 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3771 } 3772 3773 #if defined(PETSC_USE_DEBUG) 3774 #if defined (PETSC_USE_CTABLE) 3775 { PetscInt data; 3776 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3777 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3778 } 3779 #else 3780 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3781 #endif 3782 #endif 3783 #if defined (PETSC_USE_CTABLE) 3784 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3785 col = (col - 1)/bs; 3786 #else 3787 col = (baij->colmap[in[j]] - 1)/bs; 3788 #endif 3789 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3790 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3791 col = in[j]; 3792 } 3793 } 3794 else col = in[j]; 3795 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3796 } 3797 } 3798 } else { 3799 if (!baij->donotstash) { 3800 if (roworiented) { 3801 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3802 } else { 3803 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3804 } 3805 } 3806 } 3807 } 3808 3809 /* task normally handled by MatSetValuesBlocked() */ 3810 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3811 PetscFunctionReturn(0); 3812 } 3813