1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 8 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 9 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 10 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 11 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 12 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 13 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 16 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 19 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 20 { 21 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 22 PetscErrorCode ierr; 23 PetscInt i,*idxb = 0; 24 PetscScalar *va,*vb; 25 Vec vtmp; 26 27 PetscFunctionBegin; 28 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 29 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 30 if (idx) { 31 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 32 } 33 34 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 35 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 36 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 37 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 38 39 for (i=0; i<A->rmap->n; i++){ 40 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 41 } 42 43 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 44 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 45 if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);} 46 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 47 PetscFunctionReturn(0); 48 } 49 50 EXTERN_C_BEGIN 51 #undef __FUNCT__ 52 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 53 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 54 { 55 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 56 PetscErrorCode ierr; 57 58 PetscFunctionBegin; 59 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 60 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 61 PetscFunctionReturn(0); 62 } 63 EXTERN_C_END 64 65 EXTERN_C_BEGIN 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 68 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 69 { 70 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 75 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 76 PetscFunctionReturn(0); 77 } 78 EXTERN_C_END 79 80 /* 81 Local utility routine that creates a mapping from the global column 82 number to the local number in the off-diagonal part of the local 83 storage of the matrix. This is done in a non scalable way since the 84 length of colmap equals the global matrix length. 85 */ 86 #undef __FUNCT__ 87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 89 { 90 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 91 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 92 PetscErrorCode ierr; 93 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 94 95 PetscFunctionBegin; 96 #if defined (PETSC_USE_CTABLE) 97 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 98 for (i=0; i<nbs; i++){ 99 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 100 } 101 #else 102 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 103 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 105 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 106 #endif 107 PetscFunctionReturn(0); 108 } 109 110 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 111 { \ 112 \ 113 brow = row/bs; \ 114 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 115 rmax = aimax[brow]; nrow = ailen[brow]; \ 116 bcol = col/bs; \ 117 ridx = row % bs; cidx = col % bs; \ 118 low = 0; high = nrow; \ 119 while (high-low > 3) { \ 120 t = (low+high)/2; \ 121 if (rp[t] > bcol) high = t; \ 122 else low = t; \ 123 } \ 124 for (_i=low; _i<high; _i++) { \ 125 if (rp[_i] > bcol) break; \ 126 if (rp[_i] == bcol) { \ 127 bap = ap + bs2*_i + bs*cidx + ridx; \ 128 if (addv == ADD_VALUES) *bap += value; \ 129 else *bap = value; \ 130 goto a_noinsert; \ 131 } \ 132 } \ 133 if (a->nonew == 1) goto a_noinsert; \ 134 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 135 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 136 N = nrow++ - 1; \ 137 /* shift up all the later entries in this row */ \ 138 for (ii=N; ii>=_i; ii--) { \ 139 rp[ii+1] = rp[ii]; \ 140 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 141 } \ 142 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 143 rp[_i] = bcol; \ 144 ap[bs2*_i + bs*cidx + ridx] = value; \ 145 a_noinsert:; \ 146 ailen[brow] = nrow; \ 147 } 148 149 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 150 { \ 151 brow = row/bs; \ 152 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 153 rmax = bimax[brow]; nrow = bilen[brow]; \ 154 bcol = col/bs; \ 155 ridx = row % bs; cidx = col % bs; \ 156 low = 0; high = nrow; \ 157 while (high-low > 3) { \ 158 t = (low+high)/2; \ 159 if (rp[t] > bcol) high = t; \ 160 else low = t; \ 161 } \ 162 for (_i=low; _i<high; _i++) { \ 163 if (rp[_i] > bcol) break; \ 164 if (rp[_i] == bcol) { \ 165 bap = ap + bs2*_i + bs*cidx + ridx; \ 166 if (addv == ADD_VALUES) *bap += value; \ 167 else *bap = value; \ 168 goto b_noinsert; \ 169 } \ 170 } \ 171 if (b->nonew == 1) goto b_noinsert; \ 172 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 173 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 174 CHKMEMQ;\ 175 N = nrow++ - 1; \ 176 /* shift up all the later entries in this row */ \ 177 for (ii=N; ii>=_i; ii--) { \ 178 rp[ii+1] = rp[ii]; \ 179 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 180 } \ 181 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 182 rp[_i] = bcol; \ 183 ap[bs2*_i + bs*cidx + ridx] = value; \ 184 b_noinsert:; \ 185 bilen[brow] = nrow; \ 186 } 187 188 #undef __FUNCT__ 189 #define __FUNCT__ "MatSetValues_MPIBAIJ" 190 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 191 { 192 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 193 MatScalar value; 194 PetscTruth roworiented = baij->roworiented; 195 PetscErrorCode ierr; 196 PetscInt i,j,row,col; 197 PetscInt rstart_orig=mat->rmap->rstart; 198 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 199 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 200 201 /* Some Variables required in the macro */ 202 Mat A = baij->A; 203 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 204 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 205 MatScalar *aa=a->a; 206 207 Mat B = baij->B; 208 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 209 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 210 MatScalar *ba=b->a; 211 212 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 213 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 214 MatScalar *ap,*bap; 215 216 PetscFunctionBegin; 217 if (v) PetscValidScalarPointer(v,6); 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_DEBUG) 221 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 222 #endif 223 if (im[i] >= rstart_orig && im[i] < rend_orig) { 224 row = im[i] - rstart_orig; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart_orig && in[j] < cend_orig){ 227 col = in[j] - cstart_orig; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 230 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 231 } else if (in[j] < 0) continue; 232 #if defined(PETSC_USE_DEBUG) 233 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1); 234 #endif 235 else { 236 if (mat->was_assembled) { 237 if (!baij->colmap) { 238 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 239 } 240 #if defined (PETSC_USE_CTABLE) 241 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 242 col = col - 1; 243 #else 244 col = baij->colmap[in[j]/bs] - 1; 245 #endif 246 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 247 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 248 col = in[j]; 249 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 250 B = baij->B; 251 b = (Mat_SeqBAIJ*)(B)->data; 252 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 253 ba=b->a; 254 } else col += in[j]%bs; 255 } else col = in[j]; 256 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 257 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 258 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 259 } 260 } 261 } else { 262 if (!baij->donotstash) { 263 if (roworiented) { 264 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 265 } else { 266 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 267 } 268 } 269 } 270 } 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 276 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 277 { 278 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 279 const PetscScalar *value; 280 MatScalar *barray=baij->barray; 281 PetscTruth roworiented = baij->roworiented; 282 PetscErrorCode ierr; 283 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 284 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 285 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 286 287 PetscFunctionBegin; 288 if(!barray) { 289 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 290 baij->barray = barray; 291 } 292 293 if (roworiented) { 294 stepval = (n-1)*bs; 295 } else { 296 stepval = (m-1)*bs; 297 } 298 for (i=0; i<m; i++) { 299 if (im[i] < 0) continue; 300 #if defined(PETSC_USE_DEBUG) 301 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 302 #endif 303 if (im[i] >= rstart && im[i] < rend) { 304 row = im[i] - rstart; 305 for (j=0; j<n; j++) { 306 /* If NumCol = 1 then a copy is not required */ 307 if ((roworiented) && (n == 1)) { 308 barray = (MatScalar*)v + i*bs2; 309 } else if((!roworiented) && (m == 1)) { 310 barray = (MatScalar*)v + j*bs2; 311 } else { /* Here a copy is required */ 312 if (roworiented) { 313 value = v + i*(stepval+bs)*bs + j*bs; 314 } else { 315 value = v + j*(stepval+bs)*bs + i*bs; 316 } 317 for (ii=0; ii<bs; ii++,value+=stepval) { 318 for (jj=0; jj<bs; jj++) { 319 *barray++ = *value++; 320 } 321 } 322 barray -=bs2; 323 } 324 325 if (in[j] >= cstart && in[j] < cend){ 326 col = in[j] - cstart; 327 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 328 } 329 else if (in[j] < 0) continue; 330 #if defined(PETSC_USE_DEBUG) 331 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 332 #endif 333 else { 334 if (mat->was_assembled) { 335 if (!baij->colmap) { 336 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 337 } 338 339 #if defined(PETSC_USE_DEBUG) 340 #if defined (PETSC_USE_CTABLE) 341 { PetscInt data; 342 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 343 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 344 } 345 #else 346 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 347 #endif 348 #endif 349 #if defined (PETSC_USE_CTABLE) 350 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 351 col = (col - 1)/bs; 352 #else 353 col = (baij->colmap[in[j]] - 1)/bs; 354 #endif 355 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 356 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 357 col = in[j]; 358 } 359 } 360 else col = in[j]; 361 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 362 } 363 } 364 } else { 365 if (!baij->donotstash) { 366 if (roworiented) { 367 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 368 } else { 369 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 370 } 371 } 372 } 373 } 374 PetscFunctionReturn(0); 375 } 376 377 #define HASH_KEY 0.6180339887 378 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 379 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 380 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 381 #undef __FUNCT__ 382 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 383 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 384 { 385 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 386 PetscTruth roworiented = baij->roworiented; 387 PetscErrorCode ierr; 388 PetscInt i,j,row,col; 389 PetscInt rstart_orig=mat->rmap->rstart; 390 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 391 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 392 PetscReal tmp; 393 MatScalar **HD = baij->hd,value; 394 #if defined(PETSC_USE_DEBUG) 395 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 396 #endif 397 398 PetscFunctionBegin; 399 if (v) PetscValidScalarPointer(v,6); 400 for (i=0; i<m; i++) { 401 #if defined(PETSC_USE_DEBUG) 402 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 403 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 404 #endif 405 row = im[i]; 406 if (row >= rstart_orig && row < rend_orig) { 407 for (j=0; j<n; j++) { 408 col = in[j]; 409 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 410 /* Look up PetscInto the Hash Table */ 411 key = (row/bs)*Nbs+(col/bs)+1; 412 h1 = HASH(size,key,tmp); 413 414 415 idx = h1; 416 #if defined(PETSC_USE_DEBUG) 417 insert_ct++; 418 total_ct++; 419 if (HT[idx] != key) { 420 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 421 if (idx == size) { 422 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 423 if (idx == h1) { 424 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 425 } 426 } 427 } 428 #else 429 if (HT[idx] != key) { 430 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 431 if (idx == size) { 432 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 433 if (idx == h1) { 434 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 435 } 436 } 437 } 438 #endif 439 /* A HASH table entry is found, so insert the values at the correct address */ 440 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 441 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 442 } 443 } else { 444 if (!baij->donotstash) { 445 if (roworiented) { 446 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 447 } else { 448 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 449 } 450 } 451 } 452 } 453 #if defined(PETSC_USE_DEBUG) 454 baij->ht_total_ct = total_ct; 455 baij->ht_insert_ct = insert_ct; 456 #endif 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 463 { 464 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 465 PetscTruth roworiented = baij->roworiented; 466 PetscErrorCode ierr; 467 PetscInt i,j,ii,jj,row,col; 468 PetscInt rstart=baij->rstartbs; 469 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 470 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 471 PetscReal tmp; 472 MatScalar **HD = baij->hd,*baij_a; 473 const PetscScalar *v_t,*value; 474 #if defined(PETSC_USE_DEBUG) 475 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 476 #endif 477 478 PetscFunctionBegin; 479 480 if (roworiented) { 481 stepval = (n-1)*bs; 482 } else { 483 stepval = (m-1)*bs; 484 } 485 for (i=0; i<m; i++) { 486 #if defined(PETSC_USE_DEBUG) 487 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 488 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 489 #endif 490 row = im[i]; 491 v_t = v + i*nbs2; 492 if (row >= rstart && row < rend) { 493 for (j=0; j<n; j++) { 494 col = in[j]; 495 496 /* Look up into the Hash Table */ 497 key = row*Nbs+col+1; 498 h1 = HASH(size,key,tmp); 499 500 idx = h1; 501 #if defined(PETSC_USE_DEBUG) 502 total_ct++; 503 insert_ct++; 504 if (HT[idx] != key) { 505 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 506 if (idx == size) { 507 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 508 if (idx == h1) { 509 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 510 } 511 } 512 } 513 #else 514 if (HT[idx] != key) { 515 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 516 if (idx == size) { 517 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 518 if (idx == h1) { 519 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 520 } 521 } 522 } 523 #endif 524 baij_a = HD[idx]; 525 if (roworiented) { 526 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 527 /* value = v + (i*(stepval+bs)+j)*bs; */ 528 value = v_t; 529 v_t += bs; 530 if (addv == ADD_VALUES) { 531 for (ii=0; ii<bs; ii++,value+=stepval) { 532 for (jj=ii; jj<bs2; jj+=bs) { 533 baij_a[jj] += *value++; 534 } 535 } 536 } else { 537 for (ii=0; ii<bs; ii++,value+=stepval) { 538 for (jj=ii; jj<bs2; jj+=bs) { 539 baij_a[jj] = *value++; 540 } 541 } 542 } 543 } else { 544 value = v + j*(stepval+bs)*bs + i*bs; 545 if (addv == ADD_VALUES) { 546 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 547 for (jj=0; jj<bs; jj++) { 548 baij_a[jj] += *value++; 549 } 550 } 551 } else { 552 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 553 for (jj=0; jj<bs; jj++) { 554 baij_a[jj] = *value++; 555 } 556 } 557 } 558 } 559 } 560 } else { 561 if (!baij->donotstash) { 562 if (roworiented) { 563 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 564 } else { 565 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 566 } 567 } 568 } 569 } 570 #if defined(PETSC_USE_DEBUG) 571 baij->ht_total_ct = total_ct; 572 baij->ht_insert_ct = insert_ct; 573 #endif 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatGetValues_MPIBAIJ" 579 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 580 { 581 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 582 PetscErrorCode ierr; 583 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 584 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 585 586 PetscFunctionBegin; 587 for (i=0; i<m; i++) { 588 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 589 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 590 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 591 row = idxm[i] - bsrstart; 592 for (j=0; j<n; j++) { 593 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 594 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 595 if (idxn[j] >= bscstart && idxn[j] < bscend){ 596 col = idxn[j] - bscstart; 597 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 598 } else { 599 if (!baij->colmap) { 600 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 601 } 602 #if defined (PETSC_USE_CTABLE) 603 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 604 data --; 605 #else 606 data = baij->colmap[idxn[j]/bs]-1; 607 #endif 608 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 609 else { 610 col = data + idxn[j]%bs; 611 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 612 } 613 } 614 } 615 } else { 616 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 617 } 618 } 619 PetscFunctionReturn(0); 620 } 621 622 #undef __FUNCT__ 623 #define __FUNCT__ "MatNorm_MPIBAIJ" 624 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 625 { 626 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 627 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 628 PetscErrorCode ierr; 629 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 630 PetscReal sum = 0.0; 631 MatScalar *v; 632 633 PetscFunctionBegin; 634 if (baij->size == 1) { 635 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 636 } else { 637 if (type == NORM_FROBENIUS) { 638 v = amat->a; 639 nz = amat->nz*bs2; 640 for (i=0; i<nz; i++) { 641 #if defined(PETSC_USE_COMPLEX) 642 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 643 #else 644 sum += (*v)*(*v); v++; 645 #endif 646 } 647 v = bmat->a; 648 nz = bmat->nz*bs2; 649 for (i=0; i<nz; i++) { 650 #if defined(PETSC_USE_COMPLEX) 651 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 652 #else 653 sum += (*v)*(*v); v++; 654 #endif 655 } 656 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 657 *nrm = sqrt(*nrm); 658 } else if (type == NORM_1) { /* max column sum */ 659 PetscReal *tmp,*tmp2; 660 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 661 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 662 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 663 v = amat->a; jj = amat->j; 664 for (i=0; i<amat->nz; i++) { 665 for (j=0; j<bs; j++){ 666 col = bs*(cstart + *jj) + j; /* column index */ 667 for (row=0; row<bs; row++){ 668 tmp[col] += PetscAbsScalar(*v); v++; 669 } 670 } 671 jj++; 672 } 673 v = bmat->a; jj = bmat->j; 674 for (i=0; i<bmat->nz; i++) { 675 for (j=0; j<bs; j++){ 676 col = bs*garray[*jj] + j; 677 for (row=0; row<bs; row++){ 678 tmp[col] += PetscAbsScalar(*v); v++; 679 } 680 } 681 jj++; 682 } 683 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 684 *nrm = 0.0; 685 for (j=0; j<mat->cmap->N; j++) { 686 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 687 } 688 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 689 } else if (type == NORM_INFINITY) { /* max row sum */ 690 PetscReal *sums; 691 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 692 sum = 0.0; 693 for (j=0; j<amat->mbs; j++) { 694 for (row=0; row<bs; row++) sums[row] = 0.0; 695 v = amat->a + bs2*amat->i[j]; 696 nz = amat->i[j+1]-amat->i[j]; 697 for (i=0; i<nz; i++) { 698 for (col=0; col<bs; col++){ 699 for (row=0; row<bs; row++){ 700 sums[row] += PetscAbsScalar(*v); v++; 701 } 702 } 703 } 704 v = bmat->a + bs2*bmat->i[j]; 705 nz = bmat->i[j+1]-bmat->i[j]; 706 for (i=0; i<nz; i++) { 707 for (col=0; col<bs; col++){ 708 for (row=0; row<bs; row++){ 709 sums[row] += PetscAbsScalar(*v); v++; 710 } 711 } 712 } 713 for (row=0; row<bs; row++){ 714 if (sums[row] > sum) sum = sums[row]; 715 } 716 } 717 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 718 ierr = PetscFree(sums);CHKERRQ(ierr); 719 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 720 } 721 PetscFunctionReturn(0); 722 } 723 724 /* 725 Creates the hash table, and sets the table 726 This table is created only once. 727 If new entried need to be added to the matrix 728 then the hash table has to be destroyed and 729 recreated. 730 */ 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 733 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 734 { 735 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 736 Mat A = baij->A,B=baij->B; 737 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 738 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 739 PetscErrorCode ierr; 740 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 741 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 742 PetscInt *HT,key; 743 MatScalar **HD; 744 PetscReal tmp; 745 #if defined(PETSC_USE_INFO) 746 PetscInt ct=0,max=0; 747 #endif 748 749 PetscFunctionBegin; 750 if (baij->ht) PetscFunctionReturn(0); 751 752 baij->ht_size = (PetscInt)(factor*nz); 753 ht_size = baij->ht_size; 754 755 /* Allocate Memory for Hash Table */ 756 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 757 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 758 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 759 HD = baij->hd; 760 HT = baij->ht; 761 762 /* Loop Over A */ 763 for (i=0; i<a->mbs; i++) { 764 for (j=ai[i]; j<ai[i+1]; j++) { 765 row = i+rstart; 766 col = aj[j]+cstart; 767 768 key = row*Nbs + col + 1; 769 h1 = HASH(ht_size,key,tmp); 770 for (k=0; k<ht_size; k++){ 771 if (!HT[(h1+k)%ht_size]) { 772 HT[(h1+k)%ht_size] = key; 773 HD[(h1+k)%ht_size] = a->a + j*bs2; 774 break; 775 #if defined(PETSC_USE_INFO) 776 } else { 777 ct++; 778 #endif 779 } 780 } 781 #if defined(PETSC_USE_INFO) 782 if (k> max) max = k; 783 #endif 784 } 785 } 786 /* Loop Over B */ 787 for (i=0; i<b->mbs; i++) { 788 for (j=bi[i]; j<bi[i+1]; j++) { 789 row = i+rstart; 790 col = garray[bj[j]]; 791 key = row*Nbs + col + 1; 792 h1 = HASH(ht_size,key,tmp); 793 for (k=0; k<ht_size; k++){ 794 if (!HT[(h1+k)%ht_size]) { 795 HT[(h1+k)%ht_size] = key; 796 HD[(h1+k)%ht_size] = b->a + j*bs2; 797 break; 798 #if defined(PETSC_USE_INFO) 799 } else { 800 ct++; 801 #endif 802 } 803 } 804 #if defined(PETSC_USE_INFO) 805 if (k> max) max = k; 806 #endif 807 } 808 } 809 810 /* Print Summary */ 811 #if defined(PETSC_USE_INFO) 812 for (i=0,j=0; i<ht_size; i++) { 813 if (HT[i]) {j++;} 814 } 815 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 816 #endif 817 PetscFunctionReturn(0); 818 } 819 820 #undef __FUNCT__ 821 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 822 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 823 { 824 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 825 PetscErrorCode ierr; 826 PetscInt nstash,reallocs; 827 InsertMode addv; 828 829 PetscFunctionBegin; 830 if (baij->donotstash) { 831 PetscFunctionReturn(0); 832 } 833 834 /* make sure all processors are either in INSERTMODE or ADDMODE */ 835 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 836 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 837 mat->insertmode = addv; /* in case this processor had no cache */ 838 839 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 840 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 841 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 842 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 843 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 844 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 845 PetscFunctionReturn(0); 846 } 847 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 850 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 851 { 852 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 853 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 854 PetscErrorCode ierr; 855 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 856 PetscInt *row,*col; 857 PetscTruth r1,r2,r3,other_disassembled; 858 MatScalar *val; 859 InsertMode addv = mat->insertmode; 860 PetscMPIInt n; 861 862 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 863 PetscFunctionBegin; 864 if (!baij->donotstash) { 865 while (1) { 866 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 867 if (!flg) break; 868 869 for (i=0; i<n;) { 870 /* Now identify the consecutive vals belonging to the same row */ 871 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 872 if (j < n) ncols = j-i; 873 else ncols = n-i; 874 /* Now assemble all these values with a single function call */ 875 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 876 i = j; 877 } 878 } 879 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 880 /* Now process the block-stash. Since the values are stashed column-oriented, 881 set the roworiented flag to column oriented, and after MatSetValues() 882 restore the original flags */ 883 r1 = baij->roworiented; 884 r2 = a->roworiented; 885 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 886 baij->roworiented = PETSC_FALSE; 887 a->roworiented = PETSC_FALSE; 888 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 889 while (1) { 890 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 891 if (!flg) break; 892 893 for (i=0; i<n;) { 894 /* Now identify the consecutive vals belonging to the same row */ 895 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 896 if (j < n) ncols = j-i; 897 else ncols = n-i; 898 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 899 i = j; 900 } 901 } 902 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 903 baij->roworiented = r1; 904 a->roworiented = r2; 905 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 906 } 907 908 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 909 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 910 911 /* determine if any processor has disassembled, if so we must 912 also disassemble ourselfs, in order that we may reassemble. */ 913 /* 914 if nonzero structure of submatrix B cannot change then we know that 915 no processor disassembled thus we can skip this stuff 916 */ 917 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 918 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 919 if (mat->was_assembled && !other_disassembled) { 920 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 921 } 922 } 923 924 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 925 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 926 } 927 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 928 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 929 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 930 931 #if defined(PETSC_USE_INFO) 932 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 933 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 934 baij->ht_total_ct = 0; 935 baij->ht_insert_ct = 0; 936 } 937 #endif 938 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 939 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 940 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 941 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 942 } 943 944 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 945 baij->rowvalues = 0; 946 PetscFunctionReturn(0); 947 } 948 949 #undef __FUNCT__ 950 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 951 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 952 { 953 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 954 PetscErrorCode ierr; 955 PetscMPIInt size = baij->size,rank = baij->rank; 956 PetscInt bs = mat->rmap->bs; 957 PetscTruth iascii,isdraw; 958 PetscViewer sviewer; 959 PetscViewerFormat format; 960 961 PetscFunctionBegin; 962 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 963 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 964 if (iascii) { 965 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 966 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 967 MatInfo info; 968 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 969 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 970 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 971 rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 972 mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 973 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 974 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 975 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 976 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 977 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 978 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 979 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 980 PetscFunctionReturn(0); 981 } else if (format == PETSC_VIEWER_ASCII_INFO) { 982 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 983 PetscFunctionReturn(0); 984 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 985 PetscFunctionReturn(0); 986 } 987 } 988 989 if (isdraw) { 990 PetscDraw draw; 991 PetscTruth isnull; 992 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 993 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 994 } 995 996 if (size == 1) { 997 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 998 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 999 } else { 1000 /* assemble the entire matrix onto first processor. */ 1001 Mat A; 1002 Mat_SeqBAIJ *Aloc; 1003 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1004 MatScalar *a; 1005 1006 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1007 /* Perhaps this should be the type of mat? */ 1008 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1009 if (!rank) { 1010 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1011 } else { 1012 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1013 } 1014 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1015 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1016 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1017 1018 /* copy over the A part */ 1019 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1020 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1021 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1022 1023 for (i=0; i<mbs; i++) { 1024 rvals[0] = bs*(baij->rstartbs + i); 1025 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1026 for (j=ai[i]; j<ai[i+1]; j++) { 1027 col = (baij->cstartbs+aj[j])*bs; 1028 for (k=0; k<bs; k++) { 1029 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1030 col++; a += bs; 1031 } 1032 } 1033 } 1034 /* copy over the B part */ 1035 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1036 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1037 for (i=0; i<mbs; i++) { 1038 rvals[0] = bs*(baij->rstartbs + i); 1039 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1040 for (j=ai[i]; j<ai[i+1]; j++) { 1041 col = baij->garray[aj[j]]*bs; 1042 for (k=0; k<bs; k++) { 1043 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1044 col++; a += bs; 1045 } 1046 } 1047 } 1048 ierr = PetscFree(rvals);CHKERRQ(ierr); 1049 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1050 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1051 /* 1052 Everyone has to call to draw the matrix since the graphics waits are 1053 synchronized across all processors that share the PetscDraw object 1054 */ 1055 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1056 if (!rank) { 1057 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1058 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1059 } 1060 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1061 ierr = MatDestroy(A);CHKERRQ(ierr); 1062 } 1063 PetscFunctionReturn(0); 1064 } 1065 1066 #undef __FUNCT__ 1067 #define __FUNCT__ "MatView_MPIBAIJ" 1068 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1069 { 1070 PetscErrorCode ierr; 1071 PetscTruth iascii,isdraw,issocket,isbinary; 1072 1073 PetscFunctionBegin; 1074 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1075 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1076 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1077 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1078 if (iascii || isdraw || issocket || isbinary) { 1079 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1080 } else { 1081 SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1082 } 1083 PetscFunctionReturn(0); 1084 } 1085 1086 #undef __FUNCT__ 1087 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1088 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1089 { 1090 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1091 PetscErrorCode ierr; 1092 1093 PetscFunctionBegin; 1094 #if defined(PETSC_USE_LOG) 1095 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1096 #endif 1097 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1098 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1099 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1100 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1101 #if defined (PETSC_USE_CTABLE) 1102 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 1103 #else 1104 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1105 #endif 1106 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1107 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1108 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1109 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1110 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1111 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1112 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1113 ierr = PetscFree(baij);CHKERRQ(ierr); 1114 1115 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1116 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1117 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1118 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1119 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1120 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1121 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1122 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1123 PetscFunctionReturn(0); 1124 } 1125 1126 #undef __FUNCT__ 1127 #define __FUNCT__ "MatMult_MPIBAIJ" 1128 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1129 { 1130 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1131 PetscErrorCode ierr; 1132 PetscInt nt; 1133 1134 PetscFunctionBegin; 1135 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1136 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1137 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1138 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1139 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1140 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1141 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1142 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1148 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1149 { 1150 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1151 PetscErrorCode ierr; 1152 1153 PetscFunctionBegin; 1154 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1155 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1156 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1157 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1158 PetscFunctionReturn(0); 1159 } 1160 1161 #undef __FUNCT__ 1162 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1163 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1164 { 1165 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1166 PetscErrorCode ierr; 1167 PetscTruth merged; 1168 1169 PetscFunctionBegin; 1170 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1171 /* do nondiagonal part */ 1172 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1173 if (!merged) { 1174 /* send it on its way */ 1175 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1176 /* do local part */ 1177 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1178 /* receive remote parts: note this assumes the values are not actually */ 1179 /* inserted in yy until the next line */ 1180 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1181 } else { 1182 /* do local part */ 1183 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1184 /* send it on its way */ 1185 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1186 /* values actually were received in the Begin() but we need to call this nop */ 1187 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1188 } 1189 PetscFunctionReturn(0); 1190 } 1191 1192 #undef __FUNCT__ 1193 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1194 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1195 { 1196 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1197 PetscErrorCode ierr; 1198 1199 PetscFunctionBegin; 1200 /* do nondiagonal part */ 1201 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1202 /* send it on its way */ 1203 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1204 /* do local part */ 1205 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1206 /* receive remote parts: note this assumes the values are not actually */ 1207 /* inserted in yy until the next line, which is true for my implementation*/ 1208 /* but is not perhaps always true. */ 1209 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1210 PetscFunctionReturn(0); 1211 } 1212 1213 /* 1214 This only works correctly for square matrices where the subblock A->A is the 1215 diagonal block 1216 */ 1217 #undef __FUNCT__ 1218 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1219 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1220 { 1221 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1222 PetscErrorCode ierr; 1223 1224 PetscFunctionBegin; 1225 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1226 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1227 PetscFunctionReturn(0); 1228 } 1229 1230 #undef __FUNCT__ 1231 #define __FUNCT__ "MatScale_MPIBAIJ" 1232 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1233 { 1234 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1235 PetscErrorCode ierr; 1236 1237 PetscFunctionBegin; 1238 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1239 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1240 PetscFunctionReturn(0); 1241 } 1242 1243 #undef __FUNCT__ 1244 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1245 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1246 { 1247 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1248 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1249 PetscErrorCode ierr; 1250 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1251 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1252 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1253 1254 PetscFunctionBegin; 1255 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1256 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1257 mat->getrowactive = PETSC_TRUE; 1258 1259 if (!mat->rowvalues && (idx || v)) { 1260 /* 1261 allocate enough space to hold information from the longest row. 1262 */ 1263 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1264 PetscInt max = 1,mbs = mat->mbs,tmp; 1265 for (i=0; i<mbs; i++) { 1266 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1267 if (max < tmp) { max = tmp; } 1268 } 1269 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1270 } 1271 lrow = row - brstart; 1272 1273 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1274 if (!v) {pvA = 0; pvB = 0;} 1275 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1276 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1277 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1278 nztot = nzA + nzB; 1279 1280 cmap = mat->garray; 1281 if (v || idx) { 1282 if (nztot) { 1283 /* Sort by increasing column numbers, assuming A and B already sorted */ 1284 PetscInt imark = -1; 1285 if (v) { 1286 *v = v_p = mat->rowvalues; 1287 for (i=0; i<nzB; i++) { 1288 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1289 else break; 1290 } 1291 imark = i; 1292 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1293 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1294 } 1295 if (idx) { 1296 *idx = idx_p = mat->rowindices; 1297 if (imark > -1) { 1298 for (i=0; i<imark; i++) { 1299 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1300 } 1301 } else { 1302 for (i=0; i<nzB; i++) { 1303 if (cmap[cworkB[i]/bs] < cstart) 1304 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1305 else break; 1306 } 1307 imark = i; 1308 } 1309 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1310 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1311 } 1312 } else { 1313 if (idx) *idx = 0; 1314 if (v) *v = 0; 1315 } 1316 } 1317 *nz = nztot; 1318 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1319 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1320 PetscFunctionReturn(0); 1321 } 1322 1323 #undef __FUNCT__ 1324 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1325 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1326 { 1327 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1328 1329 PetscFunctionBegin; 1330 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1331 baij->getrowactive = PETSC_FALSE; 1332 PetscFunctionReturn(0); 1333 } 1334 1335 #undef __FUNCT__ 1336 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1337 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1338 { 1339 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1340 PetscErrorCode ierr; 1341 1342 PetscFunctionBegin; 1343 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1344 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1345 PetscFunctionReturn(0); 1346 } 1347 1348 #undef __FUNCT__ 1349 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1350 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1351 { 1352 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1353 Mat A = a->A,B = a->B; 1354 PetscErrorCode ierr; 1355 PetscReal isend[5],irecv[5]; 1356 1357 PetscFunctionBegin; 1358 info->block_size = (PetscReal)matin->rmap->bs; 1359 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1360 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1361 isend[3] = info->memory; isend[4] = info->mallocs; 1362 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1363 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1364 isend[3] += info->memory; isend[4] += info->mallocs; 1365 if (flag == MAT_LOCAL) { 1366 info->nz_used = isend[0]; 1367 info->nz_allocated = isend[1]; 1368 info->nz_unneeded = isend[2]; 1369 info->memory = isend[3]; 1370 info->mallocs = isend[4]; 1371 } else if (flag == MAT_GLOBAL_MAX) { 1372 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1373 info->nz_used = irecv[0]; 1374 info->nz_allocated = irecv[1]; 1375 info->nz_unneeded = irecv[2]; 1376 info->memory = irecv[3]; 1377 info->mallocs = irecv[4]; 1378 } else if (flag == MAT_GLOBAL_SUM) { 1379 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1380 info->nz_used = irecv[0]; 1381 info->nz_allocated = irecv[1]; 1382 info->nz_unneeded = irecv[2]; 1383 info->memory = irecv[3]; 1384 info->mallocs = irecv[4]; 1385 } else { 1386 SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1387 } 1388 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1389 info->fill_ratio_needed = 0; 1390 info->factor_mallocs = 0; 1391 PetscFunctionReturn(0); 1392 } 1393 1394 #undef __FUNCT__ 1395 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1396 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg) 1397 { 1398 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1399 PetscErrorCode ierr; 1400 1401 PetscFunctionBegin; 1402 switch (op) { 1403 case MAT_NEW_NONZERO_LOCATIONS: 1404 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1405 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1406 case MAT_KEEP_NONZERO_PATTERN: 1407 case MAT_NEW_NONZERO_LOCATION_ERR: 1408 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1409 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1410 break; 1411 case MAT_ROW_ORIENTED: 1412 a->roworiented = flg; 1413 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1414 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1415 break; 1416 case MAT_NEW_DIAGONALS: 1417 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1418 break; 1419 case MAT_IGNORE_OFF_PROC_ENTRIES: 1420 a->donotstash = flg; 1421 break; 1422 case MAT_USE_HASH_TABLE: 1423 a->ht_flag = flg; 1424 break; 1425 case MAT_SYMMETRIC: 1426 case MAT_STRUCTURALLY_SYMMETRIC: 1427 case MAT_HERMITIAN: 1428 case MAT_SYMMETRY_ETERNAL: 1429 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1430 break; 1431 default: 1432 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op); 1433 } 1434 PetscFunctionReturn(0); 1435 } 1436 1437 #undef __FUNCT__ 1438 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1439 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1440 { 1441 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1442 Mat_SeqBAIJ *Aloc; 1443 Mat B; 1444 PetscErrorCode ierr; 1445 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1446 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1447 MatScalar *a; 1448 1449 PetscFunctionBegin; 1450 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1451 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1452 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1453 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1454 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1455 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1456 } else { 1457 B = *matout; 1458 } 1459 1460 /* copy over the A part */ 1461 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1462 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1463 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1464 1465 for (i=0; i<mbs; i++) { 1466 rvals[0] = bs*(baij->rstartbs + i); 1467 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1468 for (j=ai[i]; j<ai[i+1]; j++) { 1469 col = (baij->cstartbs+aj[j])*bs; 1470 for (k=0; k<bs; k++) { 1471 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1472 col++; a += bs; 1473 } 1474 } 1475 } 1476 /* copy over the B part */ 1477 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1478 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1479 for (i=0; i<mbs; i++) { 1480 rvals[0] = bs*(baij->rstartbs + i); 1481 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1482 for (j=ai[i]; j<ai[i+1]; j++) { 1483 col = baij->garray[aj[j]]*bs; 1484 for (k=0; k<bs; k++) { 1485 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1486 col++; a += bs; 1487 } 1488 } 1489 } 1490 ierr = PetscFree(rvals);CHKERRQ(ierr); 1491 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1492 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1493 1494 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1495 *matout = B; 1496 } else { 1497 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1498 } 1499 PetscFunctionReturn(0); 1500 } 1501 1502 #undef __FUNCT__ 1503 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1504 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1505 { 1506 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1507 Mat a = baij->A,b = baij->B; 1508 PetscErrorCode ierr; 1509 PetscInt s1,s2,s3; 1510 1511 PetscFunctionBegin; 1512 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1513 if (rr) { 1514 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1515 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1516 /* Overlap communication with computation. */ 1517 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1518 } 1519 if (ll) { 1520 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1521 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1522 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1523 } 1524 /* scale the diagonal block */ 1525 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1526 1527 if (rr) { 1528 /* Do a scatter end and then right scale the off-diagonal block */ 1529 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1530 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1531 } 1532 1533 PetscFunctionReturn(0); 1534 } 1535 1536 #undef __FUNCT__ 1537 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1538 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 1539 { 1540 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1541 PetscErrorCode ierr; 1542 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1543 PetscInt i,*owners = A->rmap->range; 1544 PetscInt *nprocs,j,idx,nsends,row; 1545 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1546 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1547 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1548 MPI_Comm comm = ((PetscObject)A)->comm; 1549 MPI_Request *send_waits,*recv_waits; 1550 MPI_Status recv_status,*send_status; 1551 #if defined(PETSC_DEBUG) 1552 PetscTruth found = PETSC_FALSE; 1553 #endif 1554 1555 PetscFunctionBegin; 1556 /* first count number of contributors to each processor */ 1557 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1558 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1559 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1560 j = 0; 1561 for (i=0; i<N; i++) { 1562 if (lastidx > (idx = rows[i])) j = 0; 1563 lastidx = idx; 1564 for (; j<size; j++) { 1565 if (idx >= owners[j] && idx < owners[j+1]) { 1566 nprocs[2*j]++; 1567 nprocs[2*j+1] = 1; 1568 owner[i] = j; 1569 #if defined(PETSC_DEBUG) 1570 found = PETSC_TRUE; 1571 #endif 1572 break; 1573 } 1574 } 1575 #if defined(PETSC_DEBUG) 1576 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1577 found = PETSC_FALSE; 1578 #endif 1579 } 1580 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1581 1582 /* inform other processors of number of messages and max length*/ 1583 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1584 1585 /* post receives: */ 1586 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1587 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1588 for (i=0; i<nrecvs; i++) { 1589 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1590 } 1591 1592 /* do sends: 1593 1) starts[i] gives the starting index in svalues for stuff going to 1594 the ith processor 1595 */ 1596 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1597 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1598 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1599 starts[0] = 0; 1600 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1601 for (i=0; i<N; i++) { 1602 svalues[starts[owner[i]]++] = rows[i]; 1603 } 1604 1605 starts[0] = 0; 1606 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1607 count = 0; 1608 for (i=0; i<size; i++) { 1609 if (nprocs[2*i+1]) { 1610 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1611 } 1612 } 1613 ierr = PetscFree(starts);CHKERRQ(ierr); 1614 1615 base = owners[rank]; 1616 1617 /* wait on receives */ 1618 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1619 count = nrecvs; 1620 slen = 0; 1621 while (count) { 1622 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1623 /* unpack receives into our local space */ 1624 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1625 source[imdex] = recv_status.MPI_SOURCE; 1626 lens[imdex] = n; 1627 slen += n; 1628 count--; 1629 } 1630 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1631 1632 /* move the data into the send scatter */ 1633 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1634 count = 0; 1635 for (i=0; i<nrecvs; i++) { 1636 values = rvalues + i*nmax; 1637 for (j=0; j<lens[i]; j++) { 1638 lrows[count++] = values[j] - base; 1639 } 1640 } 1641 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1642 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1643 ierr = PetscFree(owner);CHKERRQ(ierr); 1644 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1645 1646 /* actually zap the local rows */ 1647 /* 1648 Zero the required rows. If the "diagonal block" of the matrix 1649 is square and the user wishes to set the diagonal we use separate 1650 code so that MatSetValues() is not called for each diagonal allocating 1651 new memory, thus calling lots of mallocs and slowing things down. 1652 1653 */ 1654 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1655 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1656 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1657 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1658 } else if (diag != 0.0) { 1659 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1660 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1661 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1662 for (i=0; i<slen; i++) { 1663 row = lrows[i] + rstart_bs; 1664 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1665 } 1666 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1667 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1668 } else { 1669 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1670 } 1671 1672 ierr = PetscFree(lrows);CHKERRQ(ierr); 1673 1674 /* wait on sends */ 1675 if (nsends) { 1676 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1677 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1678 ierr = PetscFree(send_status);CHKERRQ(ierr); 1679 } 1680 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1681 ierr = PetscFree(svalues);CHKERRQ(ierr); 1682 1683 PetscFunctionReturn(0); 1684 } 1685 1686 #undef __FUNCT__ 1687 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1688 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1689 { 1690 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1691 PetscErrorCode ierr; 1692 1693 PetscFunctionBegin; 1694 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1695 PetscFunctionReturn(0); 1696 } 1697 1698 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1699 1700 #undef __FUNCT__ 1701 #define __FUNCT__ "MatEqual_MPIBAIJ" 1702 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1703 { 1704 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1705 Mat a,b,c,d; 1706 PetscTruth flg; 1707 PetscErrorCode ierr; 1708 1709 PetscFunctionBegin; 1710 a = matA->A; b = matA->B; 1711 c = matB->A; d = matB->B; 1712 1713 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1714 if (flg) { 1715 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1716 } 1717 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1718 PetscFunctionReturn(0); 1719 } 1720 1721 #undef __FUNCT__ 1722 #define __FUNCT__ "MatCopy_MPIBAIJ" 1723 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1724 { 1725 PetscErrorCode ierr; 1726 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1727 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1728 1729 PetscFunctionBegin; 1730 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1731 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1732 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1733 } else { 1734 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1735 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1736 } 1737 PetscFunctionReturn(0); 1738 } 1739 1740 #undef __FUNCT__ 1741 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1742 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1743 { 1744 PetscErrorCode ierr; 1745 1746 PetscFunctionBegin; 1747 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1748 PetscFunctionReturn(0); 1749 } 1750 1751 #undef __FUNCT__ 1752 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1753 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1754 { 1755 PetscErrorCode ierr; 1756 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1757 PetscBLASInt bnz,one=1; 1758 Mat_SeqBAIJ *x,*y; 1759 1760 PetscFunctionBegin; 1761 if (str == SAME_NONZERO_PATTERN) { 1762 PetscScalar alpha = a; 1763 x = (Mat_SeqBAIJ *)xx->A->data; 1764 y = (Mat_SeqBAIJ *)yy->A->data; 1765 bnz = PetscBLASIntCast(x->nz); 1766 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1767 x = (Mat_SeqBAIJ *)xx->B->data; 1768 y = (Mat_SeqBAIJ *)yy->B->data; 1769 bnz = PetscBLASIntCast(x->nz); 1770 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1771 } else { 1772 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1773 } 1774 PetscFunctionReturn(0); 1775 } 1776 1777 #undef __FUNCT__ 1778 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ" 1779 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs) 1780 { 1781 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1782 PetscInt rbs,cbs; 1783 PetscErrorCode ierr; 1784 1785 PetscFunctionBegin; 1786 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1787 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1788 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 1789 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 1790 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 1791 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 1792 PetscFunctionReturn(0); 1793 } 1794 1795 #undef __FUNCT__ 1796 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1797 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1798 { 1799 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1800 PetscErrorCode ierr; 1801 1802 PetscFunctionBegin; 1803 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1804 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1805 PetscFunctionReturn(0); 1806 } 1807 1808 #undef __FUNCT__ 1809 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1810 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1811 { 1812 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1813 PetscErrorCode ierr; 1814 1815 PetscFunctionBegin; 1816 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1817 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1818 PetscFunctionReturn(0); 1819 } 1820 1821 #undef __FUNCT__ 1822 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 1823 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1824 { 1825 PetscErrorCode ierr; 1826 IS iscol_local; 1827 PetscInt csize; 1828 1829 PetscFunctionBegin; 1830 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1831 if (call == MAT_REUSE_MATRIX) { 1832 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1833 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1834 } else { 1835 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1836 } 1837 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1838 if (call == MAT_INITIAL_MATRIX) { 1839 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1840 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 1841 } 1842 PetscFunctionReturn(0); 1843 } 1844 1845 #undef __FUNCT__ 1846 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 1847 /* 1848 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1849 in local and then by concatenating the local matrices the end result. 1850 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 1851 */ 1852 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 1853 { 1854 PetscErrorCode ierr; 1855 PetscMPIInt rank,size; 1856 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 1857 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 1858 Mat *local,M,Mreuse; 1859 MatScalar *vwork,*aa; 1860 MPI_Comm comm = ((PetscObject)mat)->comm; 1861 Mat_SeqBAIJ *aij; 1862 1863 1864 PetscFunctionBegin; 1865 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1866 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1867 1868 if (call == MAT_REUSE_MATRIX) { 1869 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1870 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1871 local = &Mreuse; 1872 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1873 } else { 1874 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1875 Mreuse = *local; 1876 ierr = PetscFree(local);CHKERRQ(ierr); 1877 } 1878 1879 /* 1880 m - number of local rows 1881 n - number of columns (same on all processors) 1882 rstart - first row in new global matrix generated 1883 */ 1884 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 1885 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1886 m = m/bs; 1887 n = n/bs; 1888 1889 if (call == MAT_INITIAL_MATRIX) { 1890 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1891 ii = aij->i; 1892 jj = aij->j; 1893 1894 /* 1895 Determine the number of non-zeros in the diagonal and off-diagonal 1896 portions of the matrix in order to do correct preallocation 1897 */ 1898 1899 /* first get start and end of "diagonal" columns */ 1900 if (csize == PETSC_DECIDE) { 1901 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 1902 if (mglobal == n*bs) { /* square matrix */ 1903 nlocal = m; 1904 } else { 1905 nlocal = n/size + ((n % size) > rank); 1906 } 1907 } else { 1908 nlocal = csize/bs; 1909 } 1910 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1911 rstart = rend - nlocal; 1912 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 1913 1914 /* next, compute all the lengths */ 1915 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 1916 olens = dlens + m; 1917 for (i=0; i<m; i++) { 1918 jend = ii[i+1] - ii[i]; 1919 olen = 0; 1920 dlen = 0; 1921 for (j=0; j<jend; j++) { 1922 if (*jj < rstart || *jj >= rend) olen++; 1923 else dlen++; 1924 jj++; 1925 } 1926 olens[i] = olen; 1927 dlens[i] = dlen; 1928 } 1929 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 1930 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 1931 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 1932 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 1933 ierr = PetscFree(dlens);CHKERRQ(ierr); 1934 } else { 1935 PetscInt ml,nl; 1936 1937 M = *newmat; 1938 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1939 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1940 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1941 /* 1942 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1943 rather than the slower MatSetValues(). 1944 */ 1945 M->was_assembled = PETSC_TRUE; 1946 M->assembled = PETSC_FALSE; 1947 } 1948 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1949 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1950 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1951 ii = aij->i; 1952 jj = aij->j; 1953 aa = aij->a; 1954 for (i=0; i<m; i++) { 1955 row = rstart/bs + i; 1956 nz = ii[i+1] - ii[i]; 1957 cwork = jj; jj += nz; 1958 vwork = aa; aa += nz; 1959 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1960 } 1961 1962 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1963 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1964 *newmat = M; 1965 1966 /* save submatrix used in processor for next request */ 1967 if (call == MAT_INITIAL_MATRIX) { 1968 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 1969 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 1970 } 1971 1972 PetscFunctionReturn(0); 1973 } 1974 1975 #undef __FUNCT__ 1976 #define __FUNCT__ "MatPermute_MPIBAIJ" 1977 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 1978 { 1979 MPI_Comm comm,pcomm; 1980 PetscInt first,local_size,nrows; 1981 const PetscInt *rows; 1982 PetscMPIInt size; 1983 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1984 PetscErrorCode ierr; 1985 1986 PetscFunctionBegin; 1987 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1988 /* make a collective version of 'rowp' */ 1989 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1990 if (pcomm==comm) { 1991 crowp = rowp; 1992 } else { 1993 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1994 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1995 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1996 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1997 } 1998 /* collect the global row permutation and invert it */ 1999 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2000 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2001 if (pcomm!=comm) { 2002 ierr = ISDestroy(crowp);CHKERRQ(ierr); 2003 } 2004 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2005 /* get the local target indices */ 2006 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2007 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2008 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2009 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 2010 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2011 ierr = ISDestroy(irowp);CHKERRQ(ierr); 2012 /* the column permutation is so much easier; 2013 make a local version of 'colp' and invert it */ 2014 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2015 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2016 if (size==1) { 2017 lcolp = colp; 2018 } else { 2019 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2020 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2021 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 2022 } 2023 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2024 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2025 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2026 if (size>1) { 2027 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2028 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 2029 } 2030 /* now we just get the submatrix */ 2031 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2032 /* clean up */ 2033 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 2034 ierr = ISDestroy(icolp);CHKERRQ(ierr); 2035 PetscFunctionReturn(0); 2036 } 2037 2038 #undef __FUNCT__ 2039 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2040 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2041 { 2042 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2043 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2044 2045 PetscFunctionBegin; 2046 if (nghosts) { *nghosts = B->nbs;} 2047 if (ghosts) {*ghosts = baij->garray;} 2048 PetscFunctionReturn(0); 2049 } 2050 2051 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat); 2052 2053 #undef __FUNCT__ 2054 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2055 /* 2056 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2057 */ 2058 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2059 { 2060 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2061 PetscErrorCode ierr; 2062 PetscMPIInt size,*ncolsonproc,*disp,nn; 2063 PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col; 2064 const PetscInt *is; 2065 PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj; 2066 PetscInt *rowhit,M,cstart,cend,colb; 2067 PetscInt *columnsforrow,l; 2068 IS *isa; 2069 PetscTruth done,flg; 2070 ISLocalToGlobalMapping map = mat->bmapping; 2071 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2072 2073 PetscFunctionBegin; 2074 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2075 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2076 2077 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2078 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2079 M = mat->rmap->n/bs; 2080 cstart = mat->cmap->rstart/bs; 2081 cend = mat->cmap->rend/bs; 2082 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2083 c->N = mat->cmap->N/bs; 2084 c->m = mat->rmap->n/bs; 2085 c->rstart = mat->rmap->rstart/bs; 2086 2087 c->ncolors = nis; 2088 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2089 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2090 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2091 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2092 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2093 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2094 2095 /* Allow access to data structures of local part of matrix */ 2096 if (!baij->colmap) { 2097 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2098 } 2099 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2100 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2101 2102 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2103 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2104 2105 for (i=0; i<nis; i++) { 2106 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2107 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2108 c->ncolumns[i] = n; 2109 if (n) { 2110 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2111 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2112 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2113 } else { 2114 c->columns[i] = 0; 2115 } 2116 2117 if (ctype == IS_COLORING_GLOBAL){ 2118 /* Determine the total (parallel) number of columns of this color */ 2119 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2120 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2121 2122 nn = PetscMPIIntCast(n); 2123 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2124 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2125 if (!nctot) { 2126 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2127 } 2128 2129 disp[0] = 0; 2130 for (j=1; j<size; j++) { 2131 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2132 } 2133 2134 /* Get complete list of columns for color on each processor */ 2135 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2136 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2137 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2138 } else if (ctype == IS_COLORING_GHOSTED){ 2139 /* Determine local number of columns of this color on this process, including ghost points */ 2140 nctot = n; 2141 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2142 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2143 } else { 2144 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2145 } 2146 2147 /* 2148 Mark all rows affect by these columns 2149 */ 2150 /* Temporary option to allow for debugging/testing */ 2151 flg = PETSC_FALSE; 2152 ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2153 if (!flg) {/*-----------------------------------------------------------------------------*/ 2154 /* crude, fast version */ 2155 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2156 /* loop over columns*/ 2157 for (j=0; j<nctot; j++) { 2158 if (ctype == IS_COLORING_GHOSTED) { 2159 col = ltog[cols[j]]; 2160 } else { 2161 col = cols[j]; 2162 } 2163 if (col >= cstart && col < cend) { 2164 /* column is in diagonal block of matrix */ 2165 rows = A_cj + A_ci[col-cstart]; 2166 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2167 } else { 2168 #if defined (PETSC_USE_CTABLE) 2169 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2170 colb --; 2171 #else 2172 colb = baij->colmap[col] - 1; 2173 #endif 2174 if (colb == -1) { 2175 m = 0; 2176 } else { 2177 colb = colb/bs; 2178 rows = B_cj + B_ci[colb]; 2179 m = B_ci[colb+1] - B_ci[colb]; 2180 } 2181 } 2182 /* loop over columns marking them in rowhit */ 2183 for (k=0; k<m; k++) { 2184 rowhit[*rows++] = col + 1; 2185 } 2186 } 2187 2188 /* count the number of hits */ 2189 nrows = 0; 2190 for (j=0; j<M; j++) { 2191 if (rowhit[j]) nrows++; 2192 } 2193 c->nrows[i] = nrows; 2194 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2195 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2196 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2197 nrows = 0; 2198 for (j=0; j<M; j++) { 2199 if (rowhit[j]) { 2200 c->rows[i][nrows] = j; 2201 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2202 nrows++; 2203 } 2204 } 2205 } else {/*-------------------------------------------------------------------------------*/ 2206 /* slow version, using rowhit as a linked list */ 2207 PetscInt currentcol,fm,mfm; 2208 rowhit[M] = M; 2209 nrows = 0; 2210 /* loop over columns*/ 2211 for (j=0; j<nctot; j++) { 2212 if (ctype == IS_COLORING_GHOSTED) { 2213 col = ltog[cols[j]]; 2214 } else { 2215 col = cols[j]; 2216 } 2217 if (col >= cstart && col < cend) { 2218 /* column is in diagonal block of matrix */ 2219 rows = A_cj + A_ci[col-cstart]; 2220 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2221 } else { 2222 #if defined (PETSC_USE_CTABLE) 2223 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2224 colb --; 2225 #else 2226 colb = baij->colmap[col] - 1; 2227 #endif 2228 if (colb == -1) { 2229 m = 0; 2230 } else { 2231 colb = colb/bs; 2232 rows = B_cj + B_ci[colb]; 2233 m = B_ci[colb+1] - B_ci[colb]; 2234 } 2235 } 2236 2237 /* loop over columns marking them in rowhit */ 2238 fm = M; /* fm points to first entry in linked list */ 2239 for (k=0; k<m; k++) { 2240 currentcol = *rows++; 2241 /* is it already in the list? */ 2242 do { 2243 mfm = fm; 2244 fm = rowhit[fm]; 2245 } while (fm < currentcol); 2246 /* not in list so add it */ 2247 if (fm != currentcol) { 2248 nrows++; 2249 columnsforrow[currentcol] = col; 2250 /* next three lines insert new entry into linked list */ 2251 rowhit[mfm] = currentcol; 2252 rowhit[currentcol] = fm; 2253 fm = currentcol; 2254 /* fm points to present position in list since we know the columns are sorted */ 2255 } else { 2256 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2257 } 2258 } 2259 } 2260 c->nrows[i] = nrows; 2261 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2262 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2263 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2264 /* now store the linked list of rows into c->rows[i] */ 2265 nrows = 0; 2266 fm = rowhit[M]; 2267 do { 2268 c->rows[i][nrows] = fm; 2269 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2270 fm = rowhit[fm]; 2271 } while (fm < M); 2272 } /* ---------------------------------------------------------------------------------------*/ 2273 ierr = PetscFree(cols);CHKERRQ(ierr); 2274 } 2275 2276 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2277 /* 2278 vscale will contain the "diagonal" on processor scalings followed by the off processor 2279 */ 2280 if (ctype == IS_COLORING_GLOBAL) { 2281 PetscInt *garray; 2282 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2283 for (i=0; i<baij->B->cmap->n/bs; i++) { 2284 for (j=0; j<bs; j++) { 2285 garray[i*bs+j] = bs*baij->garray[i]+j; 2286 } 2287 } 2288 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2289 ierr = PetscFree(garray);CHKERRQ(ierr); 2290 CHKMEMQ; 2291 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2292 for (k=0; k<c->ncolors; k++) { 2293 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2294 for (l=0; l<c->nrows[k]; l++) { 2295 col = c->columnsforrow[k][l]; 2296 if (col >= cstart && col < cend) { 2297 /* column is in diagonal block of matrix */ 2298 colb = col - cstart; 2299 } else { 2300 /* column is in "off-processor" part */ 2301 #if defined (PETSC_USE_CTABLE) 2302 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2303 colb --; 2304 #else 2305 colb = baij->colmap[col] - 1; 2306 #endif 2307 colb = colb/bs; 2308 colb += cend - cstart; 2309 } 2310 c->vscaleforrow[k][l] = colb; 2311 } 2312 } 2313 } else if (ctype == IS_COLORING_GHOSTED) { 2314 /* Get gtol mapping */ 2315 PetscInt N = mat->cmap->N, *gtol; 2316 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2317 for (i=0; i<N; i++) gtol[i] = -1; 2318 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2319 2320 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2321 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2322 for (k=0; k<c->ncolors; k++) { 2323 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2324 for (l=0; l<c->nrows[k]; l++) { 2325 col = c->columnsforrow[k][l]; /* global column index */ 2326 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2327 } 2328 } 2329 ierr = PetscFree(gtol);CHKERRQ(ierr); 2330 } 2331 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2332 2333 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2334 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2335 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2336 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2337 CHKMEMQ; 2338 PetscFunctionReturn(0); 2339 } 2340 2341 #undef __FUNCT__ 2342 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ" 2343 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat) 2344 { 2345 Mat B; 2346 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2347 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2348 Mat_SeqAIJ *b; 2349 PetscErrorCode ierr; 2350 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2351 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2352 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2353 2354 PetscFunctionBegin; 2355 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2356 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2357 2358 /* ---------------------------------------------------------------- 2359 Tell every processor the number of nonzeros per row 2360 */ 2361 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2362 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2363 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2364 } 2365 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2366 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2367 displs = recvcounts + size; 2368 for (i=0; i<size; i++) { 2369 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2370 displs[i] = A->rmap->range[i]/bs; 2371 } 2372 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2373 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2374 #else 2375 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2376 #endif 2377 /* --------------------------------------------------------------- 2378 Create the sequential matrix of the same type as the local block diagonal 2379 */ 2380 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2381 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2382 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2383 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2384 b = (Mat_SeqAIJ *)B->data; 2385 2386 /*-------------------------------------------------------------------- 2387 Copy my part of matrix column indices over 2388 */ 2389 sendcount = ad->nz + bd->nz; 2390 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2391 a_jsendbuf = ad->j; 2392 b_jsendbuf = bd->j; 2393 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2394 cnt = 0; 2395 for (i=0; i<n; i++) { 2396 2397 /* put in lower diagonal portion */ 2398 m = bd->i[i+1] - bd->i[i]; 2399 while (m > 0) { 2400 /* is it above diagonal (in bd (compressed) numbering) */ 2401 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2402 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2403 m--; 2404 } 2405 2406 /* put in diagonal portion */ 2407 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2408 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2409 } 2410 2411 /* put in upper diagonal portion */ 2412 while (m-- > 0) { 2413 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2414 } 2415 } 2416 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2417 2418 /*-------------------------------------------------------------------- 2419 Gather all column indices to all processors 2420 */ 2421 for (i=0; i<size; i++) { 2422 recvcounts[i] = 0; 2423 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2424 recvcounts[i] += lens[j]; 2425 } 2426 } 2427 displs[0] = 0; 2428 for (i=1; i<size; i++) { 2429 displs[i] = displs[i-1] + recvcounts[i-1]; 2430 } 2431 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2432 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2433 #else 2434 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2435 #endif 2436 /*-------------------------------------------------------------------- 2437 Assemble the matrix into useable form (note numerical values not yet set) 2438 */ 2439 /* set the b->ilen (length of each row) values */ 2440 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2441 /* set the b->i indices */ 2442 b->i[0] = 0; 2443 for (i=1; i<=A->rmap->N/bs; i++) { 2444 b->i[i] = b->i[i-1] + lens[i-1]; 2445 } 2446 ierr = PetscFree(lens);CHKERRQ(ierr); 2447 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2448 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2449 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2450 2451 if (A->symmetric){ 2452 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2453 } else if (A->hermitian) { 2454 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2455 } else if (A->structurally_symmetric) { 2456 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2457 } 2458 *newmat = B; 2459 PetscFunctionReturn(0); 2460 } 2461 2462 #undef __FUNCT__ 2463 #define __FUNCT__ "MatSOR_MPIBAIJ" 2464 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2465 { 2466 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2467 PetscErrorCode ierr; 2468 Vec bb1 = 0; 2469 2470 PetscFunctionBegin; 2471 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2472 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2473 } 2474 2475 if (flag == SOR_APPLY_UPPER) { 2476 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2477 PetscFunctionReturn(0); 2478 } 2479 2480 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2481 if (flag & SOR_ZERO_INITIAL_GUESS) { 2482 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2483 its--; 2484 } 2485 2486 while (its--) { 2487 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2488 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2489 2490 /* update rhs: bb1 = bb - B*x */ 2491 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2492 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2493 2494 /* local sweep */ 2495 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2496 } 2497 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2498 if (flag & SOR_ZERO_INITIAL_GUESS) { 2499 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2500 its--; 2501 } 2502 while (its--) { 2503 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2504 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2505 2506 /* update rhs: bb1 = bb - B*x */ 2507 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2508 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2509 2510 /* local sweep */ 2511 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2512 } 2513 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2514 if (flag & SOR_ZERO_INITIAL_GUESS) { 2515 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2516 its--; 2517 } 2518 while (its--) { 2519 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2520 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2521 2522 /* update rhs: bb1 = bb - B*x */ 2523 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2524 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2525 2526 /* local sweep */ 2527 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2528 } 2529 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2530 2531 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 2532 PetscFunctionReturn(0); 2533 } 2534 2535 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2536 2537 2538 /* -------------------------------------------------------------------*/ 2539 static struct _MatOps MatOps_Values = { 2540 MatSetValues_MPIBAIJ, 2541 MatGetRow_MPIBAIJ, 2542 MatRestoreRow_MPIBAIJ, 2543 MatMult_MPIBAIJ, 2544 /* 4*/ MatMultAdd_MPIBAIJ, 2545 MatMultTranspose_MPIBAIJ, 2546 MatMultTransposeAdd_MPIBAIJ, 2547 0, 2548 0, 2549 0, 2550 /*10*/ 0, 2551 0, 2552 0, 2553 MatSOR_MPIBAIJ, 2554 MatTranspose_MPIBAIJ, 2555 /*15*/ MatGetInfo_MPIBAIJ, 2556 MatEqual_MPIBAIJ, 2557 MatGetDiagonal_MPIBAIJ, 2558 MatDiagonalScale_MPIBAIJ, 2559 MatNorm_MPIBAIJ, 2560 /*20*/ MatAssemblyBegin_MPIBAIJ, 2561 MatAssemblyEnd_MPIBAIJ, 2562 MatSetOption_MPIBAIJ, 2563 MatZeroEntries_MPIBAIJ, 2564 /*24*/ MatZeroRows_MPIBAIJ, 2565 0, 2566 0, 2567 0, 2568 0, 2569 /*29*/ MatSetUpPreallocation_MPIBAIJ, 2570 0, 2571 0, 2572 0, 2573 0, 2574 /*34*/ MatDuplicate_MPIBAIJ, 2575 0, 2576 0, 2577 0, 2578 0, 2579 /*39*/ MatAXPY_MPIBAIJ, 2580 MatGetSubMatrices_MPIBAIJ, 2581 MatIncreaseOverlap_MPIBAIJ, 2582 MatGetValues_MPIBAIJ, 2583 MatCopy_MPIBAIJ, 2584 /*44*/ 0, 2585 MatScale_MPIBAIJ, 2586 0, 2587 0, 2588 0, 2589 /*49*/ MatSetBlockSize_MPIBAIJ, 2590 0, 2591 0, 2592 0, 2593 0, 2594 /*54*/ MatFDColoringCreate_MPIBAIJ, 2595 0, 2596 MatSetUnfactored_MPIBAIJ, 2597 MatPermute_MPIBAIJ, 2598 MatSetValuesBlocked_MPIBAIJ, 2599 /*59*/ MatGetSubMatrix_MPIBAIJ, 2600 MatDestroy_MPIBAIJ, 2601 MatView_MPIBAIJ, 2602 0, 2603 0, 2604 /*64*/ 0, 2605 0, 2606 0, 2607 0, 2608 0, 2609 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2610 0, 2611 0, 2612 0, 2613 0, 2614 /*74*/ 0, 2615 MatFDColoringApply_BAIJ, 2616 0, 2617 0, 2618 0, 2619 /*79*/ 0, 2620 0, 2621 0, 2622 0, 2623 MatLoad_MPIBAIJ, 2624 /*84*/ 0, 2625 0, 2626 0, 2627 0, 2628 0, 2629 /*89*/ 0, 2630 0, 2631 0, 2632 0, 2633 0, 2634 /*94*/ 0, 2635 0, 2636 0, 2637 0, 2638 0, 2639 /*99*/ 0, 2640 0, 2641 0, 2642 0, 2643 0, 2644 /*104*/0, 2645 MatRealPart_MPIBAIJ, 2646 MatImaginaryPart_MPIBAIJ, 2647 0, 2648 0, 2649 /*109*/0, 2650 0, 2651 0, 2652 0, 2653 0, 2654 /*114*/MatGetSeqNonzerostructure_MPIBAIJ, 2655 0, 2656 MatGetGhosts_MPIBAIJ, 2657 0, 2658 0, 2659 /*119*/0, 2660 0, 2661 0, 2662 0 2663 }; 2664 2665 EXTERN_C_BEGIN 2666 #undef __FUNCT__ 2667 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2668 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2669 { 2670 PetscFunctionBegin; 2671 *a = ((Mat_MPIBAIJ *)A->data)->A; 2672 *iscopy = PETSC_FALSE; 2673 PetscFunctionReturn(0); 2674 } 2675 EXTERN_C_END 2676 2677 EXTERN_C_BEGIN 2678 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2679 EXTERN_C_END 2680 2681 EXTERN_C_BEGIN 2682 #undef __FUNCT__ 2683 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2684 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2685 { 2686 PetscInt m,rstart,cstart,cend; 2687 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2688 const PetscInt *JJ=0; 2689 PetscScalar *values=0; 2690 PetscErrorCode ierr; 2691 2692 PetscFunctionBegin; 2693 2694 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2695 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2696 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2697 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2698 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2699 m = B->rmap->n/bs; 2700 rstart = B->rmap->rstart/bs; 2701 cstart = B->cmap->rstart/bs; 2702 cend = B->cmap->rend/bs; 2703 2704 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2705 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2706 for (i=0; i<m; i++) { 2707 nz = ii[i+1] - ii[i]; 2708 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2709 nz_max = PetscMax(nz_max,nz); 2710 JJ = jj + ii[i]; 2711 for (j=0; j<nz; j++) { 2712 if (*JJ >= cstart) break; 2713 JJ++; 2714 } 2715 d = 0; 2716 for (; j<nz; j++) { 2717 if (*JJ++ >= cend) break; 2718 d++; 2719 } 2720 d_nnz[i] = d; 2721 o_nnz[i] = nz - d; 2722 } 2723 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2724 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2725 2726 values = (PetscScalar*)V; 2727 if (!values) { 2728 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2729 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2730 } 2731 for (i=0; i<m; i++) { 2732 PetscInt row = i + rstart; 2733 PetscInt ncols = ii[i+1] - ii[i]; 2734 const PetscInt *icols = jj + ii[i]; 2735 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2736 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2737 } 2738 2739 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2740 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2741 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2742 2743 PetscFunctionReturn(0); 2744 } 2745 EXTERN_C_END 2746 2747 #undef __FUNCT__ 2748 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2749 /*@C 2750 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2751 (the default parallel PETSc format). 2752 2753 Collective on MPI_Comm 2754 2755 Input Parameters: 2756 + A - the matrix 2757 . i - the indices into j for the start of each local row (starts with zero) 2758 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2759 - v - optional values in the matrix 2760 2761 Level: developer 2762 2763 .keywords: matrix, aij, compressed row, sparse, parallel 2764 2765 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2766 @*/ 2767 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2768 { 2769 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2770 2771 PetscFunctionBegin; 2772 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2773 if (f) { 2774 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2775 } 2776 PetscFunctionReturn(0); 2777 } 2778 2779 EXTERN_C_BEGIN 2780 #undef __FUNCT__ 2781 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2782 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2783 { 2784 Mat_MPIBAIJ *b; 2785 PetscErrorCode ierr; 2786 PetscInt i, newbs = PetscAbs(bs); 2787 2788 PetscFunctionBegin; 2789 if (bs < 0) { 2790 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2791 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2792 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2793 bs = PetscAbs(bs); 2794 } 2795 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 2796 bs = newbs; 2797 2798 2799 if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2800 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2801 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2802 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2803 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2804 2805 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2806 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2807 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2808 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2809 2810 if (d_nnz) { 2811 for (i=0; i<B->rmap->n/bs; i++) { 2812 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2813 } 2814 } 2815 if (o_nnz) { 2816 for (i=0; i<B->rmap->n/bs; i++) { 2817 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2818 } 2819 } 2820 2821 b = (Mat_MPIBAIJ*)B->data; 2822 b->bs2 = bs*bs; 2823 b->mbs = B->rmap->n/bs; 2824 b->nbs = B->cmap->n/bs; 2825 b->Mbs = B->rmap->N/bs; 2826 b->Nbs = B->cmap->N/bs; 2827 2828 for (i=0; i<=b->size; i++) { 2829 b->rangebs[i] = B->rmap->range[i]/bs; 2830 } 2831 b->rstartbs = B->rmap->rstart/bs; 2832 b->rendbs = B->rmap->rend/bs; 2833 b->cstartbs = B->cmap->rstart/bs; 2834 b->cendbs = B->cmap->rend/bs; 2835 2836 if (!B->preallocated) { 2837 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2838 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2839 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2840 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2841 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2842 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2843 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2844 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2845 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 2846 } 2847 2848 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2849 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2850 B->preallocated = PETSC_TRUE; 2851 PetscFunctionReturn(0); 2852 } 2853 EXTERN_C_END 2854 2855 EXTERN_C_BEGIN 2856 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2857 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2858 EXTERN_C_END 2859 2860 2861 EXTERN_C_BEGIN 2862 #undef __FUNCT__ 2863 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 2864 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj) 2865 { 2866 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2867 PetscErrorCode ierr; 2868 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2869 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2870 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2871 2872 PetscFunctionBegin; 2873 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 2874 ii[0] = 0; 2875 CHKMEMQ; 2876 for (i=0; i<M; i++) { 2877 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2878 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2879 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2880 /* remove one from count of matrix has diagonal */ 2881 for (j=id[i]; j<id[i+1]; j++) { 2882 if (jd[j] == i) {ii[i+1]--;break;} 2883 } 2884 CHKMEMQ; 2885 } 2886 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2887 cnt = 0; 2888 for (i=0; i<M; i++) { 2889 for (j=io[i]; j<io[i+1]; j++) { 2890 if (garray[jo[j]] > rstart) break; 2891 jj[cnt++] = garray[jo[j]]; 2892 CHKMEMQ; 2893 } 2894 for (k=id[i]; k<id[i+1]; k++) { 2895 if (jd[k] != i) { 2896 jj[cnt++] = rstart + jd[k]; 2897 CHKMEMQ; 2898 } 2899 } 2900 for (;j<io[i+1]; j++) { 2901 jj[cnt++] = garray[jo[j]]; 2902 CHKMEMQ; 2903 } 2904 } 2905 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 2906 PetscFunctionReturn(0); 2907 } 2908 EXTERN_C_END 2909 2910 EXTERN_C_BEGIN 2911 #if defined(PETSC_HAVE_MUMPS) 2912 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 2913 #endif 2914 EXTERN_C_END 2915 2916 /*MC 2917 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2918 2919 Options Database Keys: 2920 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2921 . -mat_block_size <bs> - set the blocksize used to store the matrix 2922 - -mat_use_hash_table <fact> 2923 2924 Level: beginner 2925 2926 .seealso: MatCreateMPIBAIJ 2927 M*/ 2928 2929 EXTERN_C_BEGIN 2930 #undef __FUNCT__ 2931 #define __FUNCT__ "MatCreate_MPIBAIJ" 2932 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2933 { 2934 Mat_MPIBAIJ *b; 2935 PetscErrorCode ierr; 2936 PetscTruth flg; 2937 2938 PetscFunctionBegin; 2939 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2940 B->data = (void*)b; 2941 2942 2943 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2944 B->mapping = 0; 2945 B->assembled = PETSC_FALSE; 2946 2947 B->insertmode = NOT_SET_VALUES; 2948 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 2949 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 2950 2951 /* build local table of row and column ownerships */ 2952 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 2953 2954 /* build cache for off array entries formed */ 2955 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 2956 b->donotstash = PETSC_FALSE; 2957 b->colmap = PETSC_NULL; 2958 b->garray = PETSC_NULL; 2959 b->roworiented = PETSC_TRUE; 2960 2961 /* stuff used in block assembly */ 2962 b->barray = 0; 2963 2964 /* stuff used for matrix vector multiply */ 2965 b->lvec = 0; 2966 b->Mvctx = 0; 2967 2968 /* stuff for MatGetRow() */ 2969 b->rowindices = 0; 2970 b->rowvalues = 0; 2971 b->getrowactive = PETSC_FALSE; 2972 2973 /* hash table stuff */ 2974 b->ht = 0; 2975 b->hd = 0; 2976 b->ht_size = 0; 2977 b->ht_flag = PETSC_FALSE; 2978 b->ht_fact = 0; 2979 b->ht_total_ct = 0; 2980 b->ht_insert_ct = 0; 2981 2982 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 2983 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2984 if (flg) { 2985 PetscReal fact = 1.39; 2986 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2987 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 2988 if (fact <= 1.0) fact = 1.39; 2989 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2990 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2991 } 2992 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2993 2994 #if defined(PETSC_HAVE_MUMPS) 2995 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 2996 #endif 2997 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 2998 "MatConvert_MPIBAIJ_MPIAdj", 2999 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3000 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3001 "MatStoreValues_MPIBAIJ", 3002 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3003 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3004 "MatRetrieveValues_MPIBAIJ", 3005 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3006 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3007 "MatGetDiagonalBlock_MPIBAIJ", 3008 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3009 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3010 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3011 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3012 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3013 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3014 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3015 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3016 "MatDiagonalScaleLocal_MPIBAIJ", 3017 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3018 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3019 "MatSetHashTableFactor_MPIBAIJ", 3020 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3021 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3022 PetscFunctionReturn(0); 3023 } 3024 EXTERN_C_END 3025 3026 /*MC 3027 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3028 3029 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3030 and MATMPIBAIJ otherwise. 3031 3032 Options Database Keys: 3033 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3034 3035 Level: beginner 3036 3037 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3038 M*/ 3039 3040 EXTERN_C_BEGIN 3041 #undef __FUNCT__ 3042 #define __FUNCT__ "MatCreate_BAIJ" 3043 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 3044 { 3045 PetscErrorCode ierr; 3046 PetscMPIInt size; 3047 3048 PetscFunctionBegin; 3049 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3050 if (size == 1) { 3051 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 3052 } else { 3053 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 3054 } 3055 PetscFunctionReturn(0); 3056 } 3057 EXTERN_C_END 3058 3059 #undef __FUNCT__ 3060 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3061 /*@C 3062 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3063 (block compressed row). For good matrix assembly performance 3064 the user should preallocate the matrix storage by setting the parameters 3065 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3066 performance can be increased by more than a factor of 50. 3067 3068 Collective on Mat 3069 3070 Input Parameters: 3071 + A - the matrix 3072 . bs - size of blockk 3073 . d_nz - number of block nonzeros per block row in diagonal portion of local 3074 submatrix (same for all local rows) 3075 . d_nnz - array containing the number of block nonzeros in the various block rows 3076 of the in diagonal portion of the local (possibly different for each block 3077 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3078 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3079 submatrix (same for all local rows). 3080 - o_nnz - array containing the number of nonzeros in the various block rows of the 3081 off-diagonal portion of the local submatrix (possibly different for 3082 each block row) or PETSC_NULL. 3083 3084 If the *_nnz parameter is given then the *_nz parameter is ignored 3085 3086 Options Database Keys: 3087 + -mat_block_size - size of the blocks to use 3088 - -mat_use_hash_table <fact> 3089 3090 Notes: 3091 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3092 than it must be used on all processors that share the object for that argument. 3093 3094 Storage Information: 3095 For a square global matrix we define each processor's diagonal portion 3096 to be its local rows and the corresponding columns (a square submatrix); 3097 each processor's off-diagonal portion encompasses the remainder of the 3098 local matrix (a rectangular submatrix). 3099 3100 The user can specify preallocated storage for the diagonal part of 3101 the local submatrix with either d_nz or d_nnz (not both). Set 3102 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3103 memory allocation. Likewise, specify preallocated storage for the 3104 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3105 3106 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3107 the figure below we depict these three local rows and all columns (0-11). 3108 3109 .vb 3110 0 1 2 3 4 5 6 7 8 9 10 11 3111 ------------------- 3112 row 3 | o o o d d d o o o o o o 3113 row 4 | o o o d d d o o o o o o 3114 row 5 | o o o d d d o o o o o o 3115 ------------------- 3116 .ve 3117 3118 Thus, any entries in the d locations are stored in the d (diagonal) 3119 submatrix, and any entries in the o locations are stored in the 3120 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3121 stored simply in the MATSEQBAIJ format for compressed row storage. 3122 3123 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3124 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3125 In general, for PDE problems in which most nonzeros are near the diagonal, 3126 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3127 or you will get TERRIBLE performance; see the users' manual chapter on 3128 matrices. 3129 3130 You can call MatGetInfo() to get information on how effective the preallocation was; 3131 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3132 You can also run with the option -info and look for messages with the string 3133 malloc in them to see if additional memory allocation was needed. 3134 3135 Level: intermediate 3136 3137 .keywords: matrix, block, aij, compressed row, sparse, parallel 3138 3139 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 3140 @*/ 3141 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3142 { 3143 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3144 3145 PetscFunctionBegin; 3146 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3147 if (f) { 3148 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3149 } 3150 PetscFunctionReturn(0); 3151 } 3152 3153 #undef __FUNCT__ 3154 #define __FUNCT__ "MatCreateMPIBAIJ" 3155 /*@C 3156 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 3157 (block compressed row). For good matrix assembly performance 3158 the user should preallocate the matrix storage by setting the parameters 3159 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3160 performance can be increased by more than a factor of 50. 3161 3162 Collective on MPI_Comm 3163 3164 Input Parameters: 3165 + comm - MPI communicator 3166 . bs - size of blockk 3167 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3168 This value should be the same as the local size used in creating the 3169 y vector for the matrix-vector product y = Ax. 3170 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3171 This value should be the same as the local size used in creating the 3172 x vector for the matrix-vector product y = Ax. 3173 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3174 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3175 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3176 submatrix (same for all local rows) 3177 . d_nnz - array containing the number of nonzero blocks in the various block rows 3178 of the in diagonal portion of the local (possibly different for each block 3179 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3180 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3181 submatrix (same for all local rows). 3182 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3183 off-diagonal portion of the local submatrix (possibly different for 3184 each block row) or PETSC_NULL. 3185 3186 Output Parameter: 3187 . A - the matrix 3188 3189 Options Database Keys: 3190 + -mat_block_size - size of the blocks to use 3191 - -mat_use_hash_table <fact> 3192 3193 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3194 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3195 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3196 3197 Notes: 3198 If the *_nnz parameter is given then the *_nz parameter is ignored 3199 3200 A nonzero block is any block that as 1 or more nonzeros in it 3201 3202 The user MUST specify either the local or global matrix dimensions 3203 (possibly both). 3204 3205 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3206 than it must be used on all processors that share the object for that argument. 3207 3208 Storage Information: 3209 For a square global matrix we define each processor's diagonal portion 3210 to be its local rows and the corresponding columns (a square submatrix); 3211 each processor's off-diagonal portion encompasses the remainder of the 3212 local matrix (a rectangular submatrix). 3213 3214 The user can specify preallocated storage for the diagonal part of 3215 the local submatrix with either d_nz or d_nnz (not both). Set 3216 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3217 memory allocation. Likewise, specify preallocated storage for the 3218 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3219 3220 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3221 the figure below we depict these three local rows and all columns (0-11). 3222 3223 .vb 3224 0 1 2 3 4 5 6 7 8 9 10 11 3225 ------------------- 3226 row 3 | o o o d d d o o o o o o 3227 row 4 | o o o d d d o o o o o o 3228 row 5 | o o o d d d o o o o o o 3229 ------------------- 3230 .ve 3231 3232 Thus, any entries in the d locations are stored in the d (diagonal) 3233 submatrix, and any entries in the o locations are stored in the 3234 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3235 stored simply in the MATSEQBAIJ format for compressed row storage. 3236 3237 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3238 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3239 In general, for PDE problems in which most nonzeros are near the diagonal, 3240 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3241 or you will get TERRIBLE performance; see the users' manual chapter on 3242 matrices. 3243 3244 Level: intermediate 3245 3246 .keywords: matrix, block, aij, compressed row, sparse, parallel 3247 3248 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3249 @*/ 3250 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3251 { 3252 PetscErrorCode ierr; 3253 PetscMPIInt size; 3254 3255 PetscFunctionBegin; 3256 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3257 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3258 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3259 if (size > 1) { 3260 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3261 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3262 } else { 3263 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3264 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3265 } 3266 PetscFunctionReturn(0); 3267 } 3268 3269 #undef __FUNCT__ 3270 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3271 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3272 { 3273 Mat mat; 3274 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3275 PetscErrorCode ierr; 3276 PetscInt len=0; 3277 3278 PetscFunctionBegin; 3279 *newmat = 0; 3280 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3281 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3282 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3283 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3284 3285 mat->factortype = matin->factortype; 3286 mat->preallocated = PETSC_TRUE; 3287 mat->assembled = PETSC_TRUE; 3288 mat->insertmode = NOT_SET_VALUES; 3289 3290 a = (Mat_MPIBAIJ*)mat->data; 3291 mat->rmap->bs = matin->rmap->bs; 3292 a->bs2 = oldmat->bs2; 3293 a->mbs = oldmat->mbs; 3294 a->nbs = oldmat->nbs; 3295 a->Mbs = oldmat->Mbs; 3296 a->Nbs = oldmat->Nbs; 3297 3298 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3299 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3300 3301 a->size = oldmat->size; 3302 a->rank = oldmat->rank; 3303 a->donotstash = oldmat->donotstash; 3304 a->roworiented = oldmat->roworiented; 3305 a->rowindices = 0; 3306 a->rowvalues = 0; 3307 a->getrowactive = PETSC_FALSE; 3308 a->barray = 0; 3309 a->rstartbs = oldmat->rstartbs; 3310 a->rendbs = oldmat->rendbs; 3311 a->cstartbs = oldmat->cstartbs; 3312 a->cendbs = oldmat->cendbs; 3313 3314 /* hash table stuff */ 3315 a->ht = 0; 3316 a->hd = 0; 3317 a->ht_size = 0; 3318 a->ht_flag = oldmat->ht_flag; 3319 a->ht_fact = oldmat->ht_fact; 3320 a->ht_total_ct = 0; 3321 a->ht_insert_ct = 0; 3322 3323 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3324 if (oldmat->colmap) { 3325 #if defined (PETSC_USE_CTABLE) 3326 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3327 #else 3328 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3329 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3330 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3331 #endif 3332 } else a->colmap = 0; 3333 3334 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3335 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3336 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3337 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3338 } else a->garray = 0; 3339 3340 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3341 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3342 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3343 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3344 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3345 3346 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3347 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3348 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3349 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3350 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3351 *newmat = mat; 3352 3353 PetscFunctionReturn(0); 3354 } 3355 3356 #undef __FUNCT__ 3357 #define __FUNCT__ "MatLoad_MPIBAIJ" 3358 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, Mat newmat) 3359 { 3360 PetscErrorCode ierr; 3361 int fd; 3362 PetscInt i,nz,j,rstart,rend; 3363 PetscScalar *vals,*buf; 3364 MPI_Comm comm = ((PetscObject)viewer)->comm; 3365 MPI_Status status; 3366 PetscMPIInt rank,size,maxnz; 3367 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3368 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3369 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3370 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3371 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3372 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3373 3374 PetscFunctionBegin; 3375 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3376 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3377 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3378 3379 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3380 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3381 if (!rank) { 3382 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3383 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3384 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3385 } 3386 3387 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3388 3389 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3390 M = header[1]; N = header[2]; 3391 3392 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3393 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3394 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3395 3396 /* If global sizes are set, check if they are consistent with that given in the file */ 3397 if (sizesset) { 3398 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3399 } 3400 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3401 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3402 3403 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3404 3405 /* 3406 This code adds extra rows to make sure the number of rows is 3407 divisible by the blocksize 3408 */ 3409 Mbs = M/bs; 3410 extra_rows = bs - M + bs*Mbs; 3411 if (extra_rows == bs) extra_rows = 0; 3412 else Mbs++; 3413 if (extra_rows && !rank) { 3414 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3415 } 3416 3417 /* determine ownership of all rows */ 3418 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3419 mbs = Mbs/size + ((Mbs % size) > rank); 3420 m = mbs*bs; 3421 } else { /* User set */ 3422 m = newmat->rmap->n; 3423 mbs = m/bs; 3424 } 3425 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3426 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3427 3428 /* process 0 needs enough room for process with most rows */ 3429 if (!rank) { 3430 mmax = rowners[1]; 3431 for (i=2; i<size; i++) { 3432 mmax = PetscMax(mmax,rowners[i]); 3433 } 3434 mmax*=bs; 3435 } else mmax = m; 3436 3437 rowners[0] = 0; 3438 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3439 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3440 rstart = rowners[rank]; 3441 rend = rowners[rank+1]; 3442 3443 /* distribute row lengths to all processors */ 3444 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3445 if (!rank) { 3446 mend = m; 3447 if (size == 1) mend = mend - extra_rows; 3448 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3449 for (j=mend; j<m; j++) locrowlens[j] = 1; 3450 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3451 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3452 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3453 for (j=0; j<m; j++) { 3454 procsnz[0] += locrowlens[j]; 3455 } 3456 for (i=1; i<size; i++) { 3457 mend = browners[i+1] - browners[i]; 3458 if (i == size-1) mend = mend - extra_rows; 3459 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3460 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3461 /* calculate the number of nonzeros on each processor */ 3462 for (j=0; j<browners[i+1]-browners[i]; j++) { 3463 procsnz[i] += rowlengths[j]; 3464 } 3465 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3466 } 3467 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3468 } else { 3469 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3470 } 3471 3472 if (!rank) { 3473 /* determine max buffer needed and allocate it */ 3474 maxnz = procsnz[0]; 3475 for (i=1; i<size; i++) { 3476 maxnz = PetscMax(maxnz,procsnz[i]); 3477 } 3478 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3479 3480 /* read in my part of the matrix column indices */ 3481 nz = procsnz[0]; 3482 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3483 mycols = ibuf; 3484 if (size == 1) nz -= extra_rows; 3485 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3486 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3487 3488 /* read in every ones (except the last) and ship off */ 3489 for (i=1; i<size-1; i++) { 3490 nz = procsnz[i]; 3491 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3492 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3493 } 3494 /* read in the stuff for the last proc */ 3495 if (size != 1) { 3496 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3497 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3498 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3499 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3500 } 3501 ierr = PetscFree(cols);CHKERRQ(ierr); 3502 } else { 3503 /* determine buffer space needed for message */ 3504 nz = 0; 3505 for (i=0; i<m; i++) { 3506 nz += locrowlens[i]; 3507 } 3508 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3509 mycols = ibuf; 3510 /* receive message of column indices*/ 3511 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3512 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3513 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3514 } 3515 3516 /* loop over local rows, determining number of off diagonal entries */ 3517 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3518 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3519 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3520 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3521 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3522 rowcount = 0; nzcount = 0; 3523 for (i=0; i<mbs; i++) { 3524 dcount = 0; 3525 odcount = 0; 3526 for (j=0; j<bs; j++) { 3527 kmax = locrowlens[rowcount]; 3528 for (k=0; k<kmax; k++) { 3529 tmp = mycols[nzcount++]/bs; 3530 if (!mask[tmp]) { 3531 mask[tmp] = 1; 3532 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3533 else masked1[dcount++] = tmp; 3534 } 3535 } 3536 rowcount++; 3537 } 3538 3539 dlens[i] = dcount; 3540 odlens[i] = odcount; 3541 3542 /* zero out the mask elements we set */ 3543 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3544 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3545 } 3546 3547 3548 if (!sizesset) { 3549 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3550 } 3551 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3552 3553 if (!rank) { 3554 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3555 /* read in my part of the matrix numerical values */ 3556 nz = procsnz[0]; 3557 vals = buf; 3558 mycols = ibuf; 3559 if (size == 1) nz -= extra_rows; 3560 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3561 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3562 3563 /* insert into matrix */ 3564 jj = rstart*bs; 3565 for (i=0; i<m; i++) { 3566 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3567 mycols += locrowlens[i]; 3568 vals += locrowlens[i]; 3569 jj++; 3570 } 3571 /* read in other processors (except the last one) and ship out */ 3572 for (i=1; i<size-1; i++) { 3573 nz = procsnz[i]; 3574 vals = buf; 3575 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3576 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3577 } 3578 /* the last proc */ 3579 if (size != 1){ 3580 nz = procsnz[i] - extra_rows; 3581 vals = buf; 3582 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3583 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3584 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3585 } 3586 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3587 } else { 3588 /* receive numeric values */ 3589 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3590 3591 /* receive message of values*/ 3592 vals = buf; 3593 mycols = ibuf; 3594 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 3595 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3596 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3597 3598 /* insert into matrix */ 3599 jj = rstart*bs; 3600 for (i=0; i<m; i++) { 3601 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3602 mycols += locrowlens[i]; 3603 vals += locrowlens[i]; 3604 jj++; 3605 } 3606 } 3607 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3608 ierr = PetscFree(buf);CHKERRQ(ierr); 3609 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3610 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3611 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3612 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3613 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3614 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3615 3616 PetscFunctionReturn(0); 3617 } 3618 3619 #undef __FUNCT__ 3620 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3621 /*@ 3622 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3623 3624 Input Parameters: 3625 . mat - the matrix 3626 . fact - factor 3627 3628 Collective on Mat 3629 3630 Level: advanced 3631 3632 Notes: 3633 This can also be set by the command line option: -mat_use_hash_table <fact> 3634 3635 .keywords: matrix, hashtable, factor, HT 3636 3637 .seealso: MatSetOption() 3638 @*/ 3639 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3640 { 3641 PetscErrorCode ierr,(*f)(Mat,PetscReal); 3642 3643 PetscFunctionBegin; 3644 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 3645 if (f) { 3646 ierr = (*f)(mat,fact);CHKERRQ(ierr); 3647 } 3648 PetscFunctionReturn(0); 3649 } 3650 3651 EXTERN_C_BEGIN 3652 #undef __FUNCT__ 3653 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3654 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3655 { 3656 Mat_MPIBAIJ *baij; 3657 3658 PetscFunctionBegin; 3659 baij = (Mat_MPIBAIJ*)mat->data; 3660 baij->ht_fact = fact; 3661 PetscFunctionReturn(0); 3662 } 3663 EXTERN_C_END 3664 3665 #undef __FUNCT__ 3666 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3667 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3668 { 3669 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3670 PetscFunctionBegin; 3671 *Ad = a->A; 3672 *Ao = a->B; 3673 *colmap = a->garray; 3674 PetscFunctionReturn(0); 3675 } 3676 3677 /* 3678 Special version for direct calls from Fortran (to eliminate two function call overheads 3679 */ 3680 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3681 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3682 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3683 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3684 #endif 3685 3686 #undef __FUNCT__ 3687 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3688 /*@C 3689 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3690 3691 Collective on Mat 3692 3693 Input Parameters: 3694 + mat - the matrix 3695 . min - number of input rows 3696 . im - input rows 3697 . nin - number of input columns 3698 . in - input columns 3699 . v - numerical values input 3700 - addvin - INSERT_VALUES or ADD_VALUES 3701 3702 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3703 3704 Level: advanced 3705 3706 .seealso: MatSetValuesBlocked() 3707 @*/ 3708 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3709 { 3710 /* convert input arguments to C version */ 3711 Mat mat = *matin; 3712 PetscInt m = *min, n = *nin; 3713 InsertMode addv = *addvin; 3714 3715 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3716 const MatScalar *value; 3717 MatScalar *barray=baij->barray; 3718 PetscTruth roworiented = baij->roworiented; 3719 PetscErrorCode ierr; 3720 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3721 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3722 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3723 3724 PetscFunctionBegin; 3725 /* tasks normally handled by MatSetValuesBlocked() */ 3726 if (mat->insertmode == NOT_SET_VALUES) { 3727 mat->insertmode = addv; 3728 } 3729 #if defined(PETSC_USE_DEBUG) 3730 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3731 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3732 #endif 3733 if (mat->assembled) { 3734 mat->was_assembled = PETSC_TRUE; 3735 mat->assembled = PETSC_FALSE; 3736 } 3737 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3738 3739 3740 if(!barray) { 3741 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3742 baij->barray = barray; 3743 } 3744 3745 if (roworiented) { 3746 stepval = (n-1)*bs; 3747 } else { 3748 stepval = (m-1)*bs; 3749 } 3750 for (i=0; i<m; i++) { 3751 if (im[i] < 0) continue; 3752 #if defined(PETSC_USE_DEBUG) 3753 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3754 #endif 3755 if (im[i] >= rstart && im[i] < rend) { 3756 row = im[i] - rstart; 3757 for (j=0; j<n; j++) { 3758 /* If NumCol = 1 then a copy is not required */ 3759 if ((roworiented) && (n == 1)) { 3760 barray = (MatScalar*)v + i*bs2; 3761 } else if((!roworiented) && (m == 1)) { 3762 barray = (MatScalar*)v + j*bs2; 3763 } else { /* Here a copy is required */ 3764 if (roworiented) { 3765 value = v + i*(stepval+bs)*bs + j*bs; 3766 } else { 3767 value = v + j*(stepval+bs)*bs + i*bs; 3768 } 3769 for (ii=0; ii<bs; ii++,value+=stepval) { 3770 for (jj=0; jj<bs; jj++) { 3771 *barray++ = *value++; 3772 } 3773 } 3774 barray -=bs2; 3775 } 3776 3777 if (in[j] >= cstart && in[j] < cend){ 3778 col = in[j] - cstart; 3779 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3780 } 3781 else if (in[j] < 0) continue; 3782 #if defined(PETSC_USE_DEBUG) 3783 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3784 #endif 3785 else { 3786 if (mat->was_assembled) { 3787 if (!baij->colmap) { 3788 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3789 } 3790 3791 #if defined(PETSC_USE_DEBUG) 3792 #if defined (PETSC_USE_CTABLE) 3793 { PetscInt data; 3794 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3795 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3796 } 3797 #else 3798 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3799 #endif 3800 #endif 3801 #if defined (PETSC_USE_CTABLE) 3802 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3803 col = (col - 1)/bs; 3804 #else 3805 col = (baij->colmap[in[j]] - 1)/bs; 3806 #endif 3807 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3808 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3809 col = in[j]; 3810 } 3811 } 3812 else col = in[j]; 3813 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3814 } 3815 } 3816 } else { 3817 if (!baij->donotstash) { 3818 if (roworiented) { 3819 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3820 } else { 3821 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3822 } 3823 } 3824 } 3825 } 3826 3827 /* task normally handled by MatSetValuesBlocked() */ 3828 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3829 PetscFunctionReturn(0); 3830 } 3831