1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 8 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 9 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 10 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 11 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 12 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 13 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 16 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 19 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 20 { 21 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 22 PetscErrorCode ierr; 23 PetscInt i,*idxb = 0; 24 PetscScalar *va,*vb; 25 Vec vtmp; 26 27 PetscFunctionBegin; 28 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 29 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 30 if (idx) { 31 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 32 } 33 34 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 35 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 36 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 37 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 38 39 for (i=0; i<A->rmap->n; i++){ 40 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 41 } 42 43 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 44 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 45 if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);} 46 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 47 PetscFunctionReturn(0); 48 } 49 50 EXTERN_C_BEGIN 51 #undef __FUNCT__ 52 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 53 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 54 { 55 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 56 PetscErrorCode ierr; 57 58 PetscFunctionBegin; 59 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 60 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 61 PetscFunctionReturn(0); 62 } 63 EXTERN_C_END 64 65 EXTERN_C_BEGIN 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 68 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 69 { 70 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 75 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 76 PetscFunctionReturn(0); 77 } 78 EXTERN_C_END 79 80 /* 81 Local utility routine that creates a mapping from the global column 82 number to the local number in the off-diagonal part of the local 83 storage of the matrix. This is done in a non scalable way since the 84 length of colmap equals the global matrix length. 85 */ 86 #undef __FUNCT__ 87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 89 { 90 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 91 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 92 PetscErrorCode ierr; 93 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 94 95 PetscFunctionBegin; 96 #if defined (PETSC_USE_CTABLE) 97 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 98 for (i=0; i<nbs; i++){ 99 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 100 } 101 #else 102 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 103 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 105 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 106 #endif 107 PetscFunctionReturn(0); 108 } 109 110 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 111 { \ 112 \ 113 brow = row/bs; \ 114 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 115 rmax = aimax[brow]; nrow = ailen[brow]; \ 116 bcol = col/bs; \ 117 ridx = row % bs; cidx = col % bs; \ 118 low = 0; high = nrow; \ 119 while (high-low > 3) { \ 120 t = (low+high)/2; \ 121 if (rp[t] > bcol) high = t; \ 122 else low = t; \ 123 } \ 124 for (_i=low; _i<high; _i++) { \ 125 if (rp[_i] > bcol) break; \ 126 if (rp[_i] == bcol) { \ 127 bap = ap + bs2*_i + bs*cidx + ridx; \ 128 if (addv == ADD_VALUES) *bap += value; \ 129 else *bap = value; \ 130 goto a_noinsert; \ 131 } \ 132 } \ 133 if (a->nonew == 1) goto a_noinsert; \ 134 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 135 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 136 N = nrow++ - 1; \ 137 /* shift up all the later entries in this row */ \ 138 for (ii=N; ii>=_i; ii--) { \ 139 rp[ii+1] = rp[ii]; \ 140 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 141 } \ 142 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 143 rp[_i] = bcol; \ 144 ap[bs2*_i + bs*cidx + ridx] = value; \ 145 a_noinsert:; \ 146 ailen[brow] = nrow; \ 147 } 148 149 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 150 { \ 151 brow = row/bs; \ 152 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 153 rmax = bimax[brow]; nrow = bilen[brow]; \ 154 bcol = col/bs; \ 155 ridx = row % bs; cidx = col % bs; \ 156 low = 0; high = nrow; \ 157 while (high-low > 3) { \ 158 t = (low+high)/2; \ 159 if (rp[t] > bcol) high = t; \ 160 else low = t; \ 161 } \ 162 for (_i=low; _i<high; _i++) { \ 163 if (rp[_i] > bcol) break; \ 164 if (rp[_i] == bcol) { \ 165 bap = ap + bs2*_i + bs*cidx + ridx; \ 166 if (addv == ADD_VALUES) *bap += value; \ 167 else *bap = value; \ 168 goto b_noinsert; \ 169 } \ 170 } \ 171 if (b->nonew == 1) goto b_noinsert; \ 172 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 173 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 174 CHKMEMQ;\ 175 N = nrow++ - 1; \ 176 /* shift up all the later entries in this row */ \ 177 for (ii=N; ii>=_i; ii--) { \ 178 rp[ii+1] = rp[ii]; \ 179 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 180 } \ 181 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 182 rp[_i] = bcol; \ 183 ap[bs2*_i + bs*cidx + ridx] = value; \ 184 b_noinsert:; \ 185 bilen[brow] = nrow; \ 186 } 187 188 #undef __FUNCT__ 189 #define __FUNCT__ "MatSetValues_MPIBAIJ" 190 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 191 { 192 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 193 MatScalar value; 194 PetscTruth roworiented = baij->roworiented; 195 PetscErrorCode ierr; 196 PetscInt i,j,row,col; 197 PetscInt rstart_orig=mat->rmap->rstart; 198 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 199 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 200 201 /* Some Variables required in the macro */ 202 Mat A = baij->A; 203 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 204 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 205 MatScalar *aa=a->a; 206 207 Mat B = baij->B; 208 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 209 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 210 MatScalar *ba=b->a; 211 212 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 213 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 214 MatScalar *ap,*bap; 215 216 PetscFunctionBegin; 217 if (v) PetscValidScalarPointer(v,6); 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_DEBUG) 221 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 222 #endif 223 if (im[i] >= rstart_orig && im[i] < rend_orig) { 224 row = im[i] - rstart_orig; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart_orig && in[j] < cend_orig){ 227 col = in[j] - cstart_orig; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 230 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 231 } else if (in[j] < 0) continue; 232 #if defined(PETSC_USE_DEBUG) 233 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1); 234 #endif 235 else { 236 if (mat->was_assembled) { 237 if (!baij->colmap) { 238 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 239 } 240 #if defined (PETSC_USE_CTABLE) 241 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 242 col = col - 1; 243 #else 244 col = baij->colmap[in[j]/bs] - 1; 245 #endif 246 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 247 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 248 col = in[j]; 249 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 250 B = baij->B; 251 b = (Mat_SeqBAIJ*)(B)->data; 252 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 253 ba=b->a; 254 } else col += in[j]%bs; 255 } else col = in[j]; 256 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 257 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 258 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 259 } 260 } 261 } else { 262 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 263 if (!baij->donotstash) { 264 if (roworiented) { 265 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 266 } else { 267 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 268 } 269 } 270 } 271 } 272 PetscFunctionReturn(0); 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 277 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 278 { 279 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 280 const PetscScalar *value; 281 MatScalar *barray=baij->barray; 282 PetscTruth roworiented = baij->roworiented; 283 PetscErrorCode ierr; 284 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 285 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 286 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 287 288 PetscFunctionBegin; 289 if(!barray) { 290 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 291 baij->barray = barray; 292 } 293 294 if (roworiented) { 295 stepval = (n-1)*bs; 296 } else { 297 stepval = (m-1)*bs; 298 } 299 for (i=0; i<m; i++) { 300 if (im[i] < 0) continue; 301 #if defined(PETSC_USE_DEBUG) 302 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 303 #endif 304 if (im[i] >= rstart && im[i] < rend) { 305 row = im[i] - rstart; 306 for (j=0; j<n; j++) { 307 /* If NumCol = 1 then a copy is not required */ 308 if ((roworiented) && (n == 1)) { 309 barray = (MatScalar*)v + i*bs2; 310 } else if((!roworiented) && (m == 1)) { 311 barray = (MatScalar*)v + j*bs2; 312 } else { /* Here a copy is required */ 313 if (roworiented) { 314 value = v + (i*(stepval+bs) + j)*bs; 315 } else { 316 value = v + (j*(stepval+bs) + i)*bs; 317 } 318 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 319 for (jj=0; jj<bs; jj++) { 320 barray[jj] = value[jj]; 321 } 322 barray += bs; 323 } 324 barray -= bs2; 325 } 326 327 if (in[j] >= cstart && in[j] < cend){ 328 col = in[j] - cstart; 329 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 330 } 331 else if (in[j] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 334 #endif 335 else { 336 if (mat->was_assembled) { 337 if (!baij->colmap) { 338 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 339 } 340 341 #if defined(PETSC_USE_DEBUG) 342 #if defined (PETSC_USE_CTABLE) 343 { PetscInt data; 344 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 345 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 346 } 347 #else 348 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 349 #endif 350 #endif 351 #if defined (PETSC_USE_CTABLE) 352 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 353 col = (col - 1)/bs; 354 #else 355 col = (baij->colmap[in[j]] - 1)/bs; 356 #endif 357 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 358 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 359 col = in[j]; 360 } 361 } 362 else col = in[j]; 363 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 364 } 365 } 366 } else { 367 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 368 if (!baij->donotstash) { 369 if (roworiented) { 370 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 371 } else { 372 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 373 } 374 } 375 } 376 } 377 PetscFunctionReturn(0); 378 } 379 380 #define HASH_KEY 0.6180339887 381 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 382 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 383 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 384 #undef __FUNCT__ 385 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 386 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 387 { 388 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 389 PetscTruth roworiented = baij->roworiented; 390 PetscErrorCode ierr; 391 PetscInt i,j,row,col; 392 PetscInt rstart_orig=mat->rmap->rstart; 393 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 394 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 395 PetscReal tmp; 396 MatScalar **HD = baij->hd,value; 397 #if defined(PETSC_USE_DEBUG) 398 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 399 #endif 400 401 PetscFunctionBegin; 402 if (v) PetscValidScalarPointer(v,6); 403 for (i=0; i<m; i++) { 404 #if defined(PETSC_USE_DEBUG) 405 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 406 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 407 #endif 408 row = im[i]; 409 if (row >= rstart_orig && row < rend_orig) { 410 for (j=0; j<n; j++) { 411 col = in[j]; 412 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 413 /* Look up PetscInto the Hash Table */ 414 key = (row/bs)*Nbs+(col/bs)+1; 415 h1 = HASH(size,key,tmp); 416 417 418 idx = h1; 419 #if defined(PETSC_USE_DEBUG) 420 insert_ct++; 421 total_ct++; 422 if (HT[idx] != key) { 423 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 424 if (idx == size) { 425 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 426 if (idx == h1) { 427 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 428 } 429 } 430 } 431 #else 432 if (HT[idx] != key) { 433 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 434 if (idx == size) { 435 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 436 if (idx == h1) { 437 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 438 } 439 } 440 } 441 #endif 442 /* A HASH table entry is found, so insert the values at the correct address */ 443 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 444 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 445 } 446 } else { 447 if (!baij->donotstash) { 448 if (roworiented) { 449 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 450 } else { 451 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 452 } 453 } 454 } 455 } 456 #if defined(PETSC_USE_DEBUG) 457 baij->ht_total_ct = total_ct; 458 baij->ht_insert_ct = insert_ct; 459 #endif 460 PetscFunctionReturn(0); 461 } 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 465 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 466 { 467 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 468 PetscTruth roworiented = baij->roworiented; 469 PetscErrorCode ierr; 470 PetscInt i,j,ii,jj,row,col; 471 PetscInt rstart=baij->rstartbs; 472 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 473 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 474 PetscReal tmp; 475 MatScalar **HD = baij->hd,*baij_a; 476 const PetscScalar *v_t,*value; 477 #if defined(PETSC_USE_DEBUG) 478 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 479 #endif 480 481 PetscFunctionBegin; 482 483 if (roworiented) { 484 stepval = (n-1)*bs; 485 } else { 486 stepval = (m-1)*bs; 487 } 488 for (i=0; i<m; i++) { 489 #if defined(PETSC_USE_DEBUG) 490 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 491 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 492 #endif 493 row = im[i]; 494 v_t = v + i*nbs2; 495 if (row >= rstart && row < rend) { 496 for (j=0; j<n; j++) { 497 col = in[j]; 498 499 /* Look up into the Hash Table */ 500 key = row*Nbs+col+1; 501 h1 = HASH(size,key,tmp); 502 503 idx = h1; 504 #if defined(PETSC_USE_DEBUG) 505 total_ct++; 506 insert_ct++; 507 if (HT[idx] != key) { 508 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 509 if (idx == size) { 510 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 511 if (idx == h1) { 512 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 513 } 514 } 515 } 516 #else 517 if (HT[idx] != key) { 518 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 519 if (idx == size) { 520 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 521 if (idx == h1) { 522 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 523 } 524 } 525 } 526 #endif 527 baij_a = HD[idx]; 528 if (roworiented) { 529 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 530 /* value = v + (i*(stepval+bs)+j)*bs; */ 531 value = v_t; 532 v_t += bs; 533 if (addv == ADD_VALUES) { 534 for (ii=0; ii<bs; ii++,value+=stepval) { 535 for (jj=ii; jj<bs2; jj+=bs) { 536 baij_a[jj] += *value++; 537 } 538 } 539 } else { 540 for (ii=0; ii<bs; ii++,value+=stepval) { 541 for (jj=ii; jj<bs2; jj+=bs) { 542 baij_a[jj] = *value++; 543 } 544 } 545 } 546 } else { 547 value = v + j*(stepval+bs)*bs + i*bs; 548 if (addv == ADD_VALUES) { 549 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 550 for (jj=0; jj<bs; jj++) { 551 baij_a[jj] += *value++; 552 } 553 } 554 } else { 555 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 556 for (jj=0; jj<bs; jj++) { 557 baij_a[jj] = *value++; 558 } 559 } 560 } 561 } 562 } 563 } else { 564 if (!baij->donotstash) { 565 if (roworiented) { 566 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 567 } else { 568 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 569 } 570 } 571 } 572 } 573 #if defined(PETSC_USE_DEBUG) 574 baij->ht_total_ct = total_ct; 575 baij->ht_insert_ct = insert_ct; 576 #endif 577 PetscFunctionReturn(0); 578 } 579 580 #undef __FUNCT__ 581 #define __FUNCT__ "MatGetValues_MPIBAIJ" 582 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 583 { 584 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 585 PetscErrorCode ierr; 586 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 587 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 588 589 PetscFunctionBegin; 590 for (i=0; i<m; i++) { 591 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 592 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 593 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 594 row = idxm[i] - bsrstart; 595 for (j=0; j<n; j++) { 596 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 597 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 598 if (idxn[j] >= bscstart && idxn[j] < bscend){ 599 col = idxn[j] - bscstart; 600 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 601 } else { 602 if (!baij->colmap) { 603 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 604 } 605 #if defined (PETSC_USE_CTABLE) 606 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 607 data --; 608 #else 609 data = baij->colmap[idxn[j]/bs]-1; 610 #endif 611 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 612 else { 613 col = data + idxn[j]%bs; 614 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 615 } 616 } 617 } 618 } else { 619 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 620 } 621 } 622 PetscFunctionReturn(0); 623 } 624 625 #undef __FUNCT__ 626 #define __FUNCT__ "MatNorm_MPIBAIJ" 627 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 628 { 629 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 630 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 631 PetscErrorCode ierr; 632 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 633 PetscReal sum = 0.0; 634 MatScalar *v; 635 636 PetscFunctionBegin; 637 if (baij->size == 1) { 638 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 639 } else { 640 if (type == NORM_FROBENIUS) { 641 v = amat->a; 642 nz = amat->nz*bs2; 643 for (i=0; i<nz; i++) { 644 #if defined(PETSC_USE_COMPLEX) 645 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 646 #else 647 sum += (*v)*(*v); v++; 648 #endif 649 } 650 v = bmat->a; 651 nz = bmat->nz*bs2; 652 for (i=0; i<nz; i++) { 653 #if defined(PETSC_USE_COMPLEX) 654 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 655 #else 656 sum += (*v)*(*v); v++; 657 #endif 658 } 659 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 660 *nrm = sqrt(*nrm); 661 } else if (type == NORM_1) { /* max column sum */ 662 PetscReal *tmp,*tmp2; 663 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 664 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 665 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 666 v = amat->a; jj = amat->j; 667 for (i=0; i<amat->nz; i++) { 668 for (j=0; j<bs; j++){ 669 col = bs*(cstart + *jj) + j; /* column index */ 670 for (row=0; row<bs; row++){ 671 tmp[col] += PetscAbsScalar(*v); v++; 672 } 673 } 674 jj++; 675 } 676 v = bmat->a; jj = bmat->j; 677 for (i=0; i<bmat->nz; i++) { 678 for (j=0; j<bs; j++){ 679 col = bs*garray[*jj] + j; 680 for (row=0; row<bs; row++){ 681 tmp[col] += PetscAbsScalar(*v); v++; 682 } 683 } 684 jj++; 685 } 686 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 687 *nrm = 0.0; 688 for (j=0; j<mat->cmap->N; j++) { 689 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 690 } 691 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 692 } else if (type == NORM_INFINITY) { /* max row sum */ 693 PetscReal *sums; 694 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 695 sum = 0.0; 696 for (j=0; j<amat->mbs; j++) { 697 for (row=0; row<bs; row++) sums[row] = 0.0; 698 v = amat->a + bs2*amat->i[j]; 699 nz = amat->i[j+1]-amat->i[j]; 700 for (i=0; i<nz; i++) { 701 for (col=0; col<bs; col++){ 702 for (row=0; row<bs; row++){ 703 sums[row] += PetscAbsScalar(*v); v++; 704 } 705 } 706 } 707 v = bmat->a + bs2*bmat->i[j]; 708 nz = bmat->i[j+1]-bmat->i[j]; 709 for (i=0; i<nz; i++) { 710 for (col=0; col<bs; col++){ 711 for (row=0; row<bs; row++){ 712 sums[row] += PetscAbsScalar(*v); v++; 713 } 714 } 715 } 716 for (row=0; row<bs; row++){ 717 if (sums[row] > sum) sum = sums[row]; 718 } 719 } 720 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 721 ierr = PetscFree(sums);CHKERRQ(ierr); 722 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 723 } 724 PetscFunctionReturn(0); 725 } 726 727 /* 728 Creates the hash table, and sets the table 729 This table is created only once. 730 If new entried need to be added to the matrix 731 then the hash table has to be destroyed and 732 recreated. 733 */ 734 #undef __FUNCT__ 735 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 736 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 737 { 738 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 739 Mat A = baij->A,B=baij->B; 740 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 741 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 742 PetscErrorCode ierr; 743 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 744 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 745 PetscInt *HT,key; 746 MatScalar **HD; 747 PetscReal tmp; 748 #if defined(PETSC_USE_INFO) 749 PetscInt ct=0,max=0; 750 #endif 751 752 PetscFunctionBegin; 753 if (baij->ht) PetscFunctionReturn(0); 754 755 baij->ht_size = (PetscInt)(factor*nz); 756 ht_size = baij->ht_size; 757 758 /* Allocate Memory for Hash Table */ 759 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 760 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 761 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 762 HD = baij->hd; 763 HT = baij->ht; 764 765 /* Loop Over A */ 766 for (i=0; i<a->mbs; i++) { 767 for (j=ai[i]; j<ai[i+1]; j++) { 768 row = i+rstart; 769 col = aj[j]+cstart; 770 771 key = row*Nbs + col + 1; 772 h1 = HASH(ht_size,key,tmp); 773 for (k=0; k<ht_size; k++){ 774 if (!HT[(h1+k)%ht_size]) { 775 HT[(h1+k)%ht_size] = key; 776 HD[(h1+k)%ht_size] = a->a + j*bs2; 777 break; 778 #if defined(PETSC_USE_INFO) 779 } else { 780 ct++; 781 #endif 782 } 783 } 784 #if defined(PETSC_USE_INFO) 785 if (k> max) max = k; 786 #endif 787 } 788 } 789 /* Loop Over B */ 790 for (i=0; i<b->mbs; i++) { 791 for (j=bi[i]; j<bi[i+1]; j++) { 792 row = i+rstart; 793 col = garray[bj[j]]; 794 key = row*Nbs + col + 1; 795 h1 = HASH(ht_size,key,tmp); 796 for (k=0; k<ht_size; k++){ 797 if (!HT[(h1+k)%ht_size]) { 798 HT[(h1+k)%ht_size] = key; 799 HD[(h1+k)%ht_size] = b->a + j*bs2; 800 break; 801 #if defined(PETSC_USE_INFO) 802 } else { 803 ct++; 804 #endif 805 } 806 } 807 #if defined(PETSC_USE_INFO) 808 if (k> max) max = k; 809 #endif 810 } 811 } 812 813 /* Print Summary */ 814 #if defined(PETSC_USE_INFO) 815 for (i=0,j=0; i<ht_size; i++) { 816 if (HT[i]) {j++;} 817 } 818 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 819 #endif 820 PetscFunctionReturn(0); 821 } 822 823 #undef __FUNCT__ 824 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 825 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 826 { 827 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 828 PetscErrorCode ierr; 829 PetscInt nstash,reallocs; 830 InsertMode addv; 831 832 PetscFunctionBegin; 833 if (baij->donotstash || mat->nooffprocentries) { 834 PetscFunctionReturn(0); 835 } 836 837 /* make sure all processors are either in INSERTMODE or ADDMODE */ 838 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 839 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 840 mat->insertmode = addv; /* in case this processor had no cache */ 841 842 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 843 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 844 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 845 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 846 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 847 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 848 PetscFunctionReturn(0); 849 } 850 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 853 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 854 { 855 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 856 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 857 PetscErrorCode ierr; 858 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 859 PetscInt *row,*col; 860 PetscTruth r1,r2,r3,other_disassembled; 861 MatScalar *val; 862 InsertMode addv = mat->insertmode; 863 PetscMPIInt n; 864 865 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 866 PetscFunctionBegin; 867 if (!baij->donotstash && !mat->nooffprocentries) { 868 while (1) { 869 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 870 if (!flg) break; 871 872 for (i=0; i<n;) { 873 /* Now identify the consecutive vals belonging to the same row */ 874 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 875 if (j < n) ncols = j-i; 876 else ncols = n-i; 877 /* Now assemble all these values with a single function call */ 878 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 879 i = j; 880 } 881 } 882 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 883 /* Now process the block-stash. Since the values are stashed column-oriented, 884 set the roworiented flag to column oriented, and after MatSetValues() 885 restore the original flags */ 886 r1 = baij->roworiented; 887 r2 = a->roworiented; 888 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 889 baij->roworiented = PETSC_FALSE; 890 a->roworiented = PETSC_FALSE; 891 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 892 while (1) { 893 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 894 if (!flg) break; 895 896 for (i=0; i<n;) { 897 /* Now identify the consecutive vals belonging to the same row */ 898 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 899 if (j < n) ncols = j-i; 900 else ncols = n-i; 901 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 902 i = j; 903 } 904 } 905 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 906 baij->roworiented = r1; 907 a->roworiented = r2; 908 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 909 } 910 911 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 912 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 913 914 /* determine if any processor has disassembled, if so we must 915 also disassemble ourselfs, in order that we may reassemble. */ 916 /* 917 if nonzero structure of submatrix B cannot change then we know that 918 no processor disassembled thus we can skip this stuff 919 */ 920 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 921 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 922 if (mat->was_assembled && !other_disassembled) { 923 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 924 } 925 } 926 927 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 928 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 929 } 930 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 931 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 932 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 933 934 #if defined(PETSC_USE_INFO) 935 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 936 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 937 baij->ht_total_ct = 0; 938 baij->ht_insert_ct = 0; 939 } 940 #endif 941 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 942 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 943 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 944 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 945 } 946 947 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 948 baij->rowvalues = 0; 949 PetscFunctionReturn(0); 950 } 951 952 #undef __FUNCT__ 953 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 954 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 955 { 956 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 957 PetscErrorCode ierr; 958 PetscMPIInt size = baij->size,rank = baij->rank; 959 PetscInt bs = mat->rmap->bs; 960 PetscTruth iascii,isdraw; 961 PetscViewer sviewer; 962 PetscViewerFormat format; 963 964 PetscFunctionBegin; 965 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 966 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 967 if (iascii) { 968 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 969 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 970 MatInfo info; 971 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 972 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 973 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 974 rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 975 mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 976 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 977 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 978 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 979 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 980 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 981 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 982 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 983 PetscFunctionReturn(0); 984 } else if (format == PETSC_VIEWER_ASCII_INFO) { 985 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 986 PetscFunctionReturn(0); 987 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 988 PetscFunctionReturn(0); 989 } 990 } 991 992 if (isdraw) { 993 PetscDraw draw; 994 PetscTruth isnull; 995 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 996 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 997 } 998 999 if (size == 1) { 1000 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1001 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1002 } else { 1003 /* assemble the entire matrix onto first processor. */ 1004 Mat A; 1005 Mat_SeqBAIJ *Aloc; 1006 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1007 MatScalar *a; 1008 1009 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1010 /* Perhaps this should be the type of mat? */ 1011 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1012 if (!rank) { 1013 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1014 } else { 1015 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1016 } 1017 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1018 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1019 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1020 1021 /* copy over the A part */ 1022 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1023 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1024 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1025 1026 for (i=0; i<mbs; i++) { 1027 rvals[0] = bs*(baij->rstartbs + i); 1028 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1029 for (j=ai[i]; j<ai[i+1]; j++) { 1030 col = (baij->cstartbs+aj[j])*bs; 1031 for (k=0; k<bs; k++) { 1032 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1033 col++; a += bs; 1034 } 1035 } 1036 } 1037 /* copy over the B part */ 1038 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1039 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1040 for (i=0; i<mbs; i++) { 1041 rvals[0] = bs*(baij->rstartbs + i); 1042 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1043 for (j=ai[i]; j<ai[i+1]; j++) { 1044 col = baij->garray[aj[j]]*bs; 1045 for (k=0; k<bs; k++) { 1046 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1047 col++; a += bs; 1048 } 1049 } 1050 } 1051 ierr = PetscFree(rvals);CHKERRQ(ierr); 1052 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1053 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1054 /* 1055 Everyone has to call to draw the matrix since the graphics waits are 1056 synchronized across all processors that share the PetscDraw object 1057 */ 1058 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1059 if (!rank) { 1060 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1061 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1062 } 1063 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1064 ierr = MatDestroy(A);CHKERRQ(ierr); 1065 } 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatView_MPIBAIJ" 1071 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1072 { 1073 PetscErrorCode ierr; 1074 PetscTruth iascii,isdraw,issocket,isbinary; 1075 1076 PetscFunctionBegin; 1077 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1078 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1079 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1080 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1081 if (iascii || isdraw || issocket || isbinary) { 1082 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1083 } else { 1084 SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1085 } 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1091 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1092 { 1093 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1094 PetscErrorCode ierr; 1095 1096 PetscFunctionBegin; 1097 #if defined(PETSC_USE_LOG) 1098 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1099 #endif 1100 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1101 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1102 if (baij->A){ierr = MatDestroy(baij->A);CHKERRQ(ierr);} 1103 if (baij->B){ierr = MatDestroy(baij->B);CHKERRQ(ierr);} 1104 #if defined (PETSC_USE_CTABLE) 1105 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 1106 #else 1107 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1108 #endif 1109 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1110 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1111 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1112 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1113 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1114 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1115 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1116 ierr = PetscFree(baij);CHKERRQ(ierr); 1117 1118 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1119 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1120 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1121 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1122 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1123 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1124 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1125 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1126 PetscFunctionReturn(0); 1127 } 1128 1129 #undef __FUNCT__ 1130 #define __FUNCT__ "MatMult_MPIBAIJ" 1131 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1132 { 1133 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1134 PetscErrorCode ierr; 1135 PetscInt nt; 1136 1137 PetscFunctionBegin; 1138 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1139 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1140 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1141 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1142 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1143 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1144 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1145 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1146 PetscFunctionReturn(0); 1147 } 1148 1149 #undef __FUNCT__ 1150 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1151 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1152 { 1153 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1154 PetscErrorCode ierr; 1155 1156 PetscFunctionBegin; 1157 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1158 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1159 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1160 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1161 PetscFunctionReturn(0); 1162 } 1163 1164 #undef __FUNCT__ 1165 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1166 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1167 { 1168 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1169 PetscErrorCode ierr; 1170 PetscTruth merged; 1171 1172 PetscFunctionBegin; 1173 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1174 /* do nondiagonal part */ 1175 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1176 if (!merged) { 1177 /* send it on its way */ 1178 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1179 /* do local part */ 1180 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1181 /* receive remote parts: note this assumes the values are not actually */ 1182 /* inserted in yy until the next line */ 1183 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1184 } else { 1185 /* do local part */ 1186 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1187 /* send it on its way */ 1188 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1189 /* values actually were received in the Begin() but we need to call this nop */ 1190 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1191 } 1192 PetscFunctionReturn(0); 1193 } 1194 1195 #undef __FUNCT__ 1196 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1197 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1198 { 1199 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1200 PetscErrorCode ierr; 1201 1202 PetscFunctionBegin; 1203 /* do nondiagonal part */ 1204 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1205 /* send it on its way */ 1206 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1207 /* do local part */ 1208 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1209 /* receive remote parts: note this assumes the values are not actually */ 1210 /* inserted in yy until the next line, which is true for my implementation*/ 1211 /* but is not perhaps always true. */ 1212 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1213 PetscFunctionReturn(0); 1214 } 1215 1216 /* 1217 This only works correctly for square matrices where the subblock A->A is the 1218 diagonal block 1219 */ 1220 #undef __FUNCT__ 1221 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1222 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1223 { 1224 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1225 PetscErrorCode ierr; 1226 1227 PetscFunctionBegin; 1228 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1229 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1230 PetscFunctionReturn(0); 1231 } 1232 1233 #undef __FUNCT__ 1234 #define __FUNCT__ "MatScale_MPIBAIJ" 1235 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1236 { 1237 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1238 PetscErrorCode ierr; 1239 1240 PetscFunctionBegin; 1241 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1242 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1243 PetscFunctionReturn(0); 1244 } 1245 1246 #undef __FUNCT__ 1247 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1248 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1249 { 1250 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1251 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1252 PetscErrorCode ierr; 1253 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1254 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1255 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1256 1257 PetscFunctionBegin; 1258 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1259 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1260 mat->getrowactive = PETSC_TRUE; 1261 1262 if (!mat->rowvalues && (idx || v)) { 1263 /* 1264 allocate enough space to hold information from the longest row. 1265 */ 1266 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1267 PetscInt max = 1,mbs = mat->mbs,tmp; 1268 for (i=0; i<mbs; i++) { 1269 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1270 if (max < tmp) { max = tmp; } 1271 } 1272 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1273 } 1274 lrow = row - brstart; 1275 1276 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1277 if (!v) {pvA = 0; pvB = 0;} 1278 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1279 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1280 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1281 nztot = nzA + nzB; 1282 1283 cmap = mat->garray; 1284 if (v || idx) { 1285 if (nztot) { 1286 /* Sort by increasing column numbers, assuming A and B already sorted */ 1287 PetscInt imark = -1; 1288 if (v) { 1289 *v = v_p = mat->rowvalues; 1290 for (i=0; i<nzB; i++) { 1291 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1292 else break; 1293 } 1294 imark = i; 1295 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1296 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1297 } 1298 if (idx) { 1299 *idx = idx_p = mat->rowindices; 1300 if (imark > -1) { 1301 for (i=0; i<imark; i++) { 1302 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1303 } 1304 } else { 1305 for (i=0; i<nzB; i++) { 1306 if (cmap[cworkB[i]/bs] < cstart) 1307 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1308 else break; 1309 } 1310 imark = i; 1311 } 1312 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1313 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1314 } 1315 } else { 1316 if (idx) *idx = 0; 1317 if (v) *v = 0; 1318 } 1319 } 1320 *nz = nztot; 1321 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1322 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1323 PetscFunctionReturn(0); 1324 } 1325 1326 #undef __FUNCT__ 1327 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1328 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1329 { 1330 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1331 1332 PetscFunctionBegin; 1333 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1334 baij->getrowactive = PETSC_FALSE; 1335 PetscFunctionReturn(0); 1336 } 1337 1338 #undef __FUNCT__ 1339 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1340 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1341 { 1342 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1343 PetscErrorCode ierr; 1344 1345 PetscFunctionBegin; 1346 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1347 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1348 PetscFunctionReturn(0); 1349 } 1350 1351 #undef __FUNCT__ 1352 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1353 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1354 { 1355 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1356 Mat A = a->A,B = a->B; 1357 PetscErrorCode ierr; 1358 PetscReal isend[5],irecv[5]; 1359 1360 PetscFunctionBegin; 1361 info->block_size = (PetscReal)matin->rmap->bs; 1362 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1363 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1364 isend[3] = info->memory; isend[4] = info->mallocs; 1365 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1366 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1367 isend[3] += info->memory; isend[4] += info->mallocs; 1368 if (flag == MAT_LOCAL) { 1369 info->nz_used = isend[0]; 1370 info->nz_allocated = isend[1]; 1371 info->nz_unneeded = isend[2]; 1372 info->memory = isend[3]; 1373 info->mallocs = isend[4]; 1374 } else if (flag == MAT_GLOBAL_MAX) { 1375 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1376 info->nz_used = irecv[0]; 1377 info->nz_allocated = irecv[1]; 1378 info->nz_unneeded = irecv[2]; 1379 info->memory = irecv[3]; 1380 info->mallocs = irecv[4]; 1381 } else if (flag == MAT_GLOBAL_SUM) { 1382 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1383 info->nz_used = irecv[0]; 1384 info->nz_allocated = irecv[1]; 1385 info->nz_unneeded = irecv[2]; 1386 info->memory = irecv[3]; 1387 info->mallocs = irecv[4]; 1388 } else { 1389 SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1390 } 1391 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1392 info->fill_ratio_needed = 0; 1393 info->factor_mallocs = 0; 1394 PetscFunctionReturn(0); 1395 } 1396 1397 #undef __FUNCT__ 1398 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1399 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg) 1400 { 1401 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1402 PetscErrorCode ierr; 1403 1404 PetscFunctionBegin; 1405 switch (op) { 1406 case MAT_NEW_NONZERO_LOCATIONS: 1407 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1408 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1409 case MAT_KEEP_NONZERO_PATTERN: 1410 case MAT_NEW_NONZERO_LOCATION_ERR: 1411 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1412 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1413 break; 1414 case MAT_ROW_ORIENTED: 1415 a->roworiented = flg; 1416 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1417 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1418 break; 1419 case MAT_NEW_DIAGONALS: 1420 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1421 break; 1422 case MAT_IGNORE_OFF_PROC_ENTRIES: 1423 a->donotstash = flg; 1424 break; 1425 case MAT_USE_HASH_TABLE: 1426 a->ht_flag = flg; 1427 break; 1428 case MAT_SYMMETRIC: 1429 case MAT_STRUCTURALLY_SYMMETRIC: 1430 case MAT_HERMITIAN: 1431 case MAT_SYMMETRY_ETERNAL: 1432 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1433 break; 1434 default: 1435 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op); 1436 } 1437 PetscFunctionReturn(0); 1438 } 1439 1440 #undef __FUNCT__ 1441 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1442 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1443 { 1444 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1445 Mat_SeqBAIJ *Aloc; 1446 Mat B; 1447 PetscErrorCode ierr; 1448 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1449 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1450 MatScalar *a; 1451 1452 PetscFunctionBegin; 1453 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1454 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1455 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1456 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1457 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1458 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1459 } else { 1460 B = *matout; 1461 } 1462 1463 /* copy over the A part */ 1464 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1465 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1466 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1467 1468 for (i=0; i<mbs; i++) { 1469 rvals[0] = bs*(baij->rstartbs + i); 1470 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1471 for (j=ai[i]; j<ai[i+1]; j++) { 1472 col = (baij->cstartbs+aj[j])*bs; 1473 for (k=0; k<bs; k++) { 1474 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1475 col++; a += bs; 1476 } 1477 } 1478 } 1479 /* copy over the B part */ 1480 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1481 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1482 for (i=0; i<mbs; i++) { 1483 rvals[0] = bs*(baij->rstartbs + i); 1484 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1485 for (j=ai[i]; j<ai[i+1]; j++) { 1486 col = baij->garray[aj[j]]*bs; 1487 for (k=0; k<bs; k++) { 1488 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1489 col++; a += bs; 1490 } 1491 } 1492 } 1493 ierr = PetscFree(rvals);CHKERRQ(ierr); 1494 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1495 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1496 1497 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1498 *matout = B; 1499 } else { 1500 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1501 } 1502 PetscFunctionReturn(0); 1503 } 1504 1505 #undef __FUNCT__ 1506 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1507 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1508 { 1509 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1510 Mat a = baij->A,b = baij->B; 1511 PetscErrorCode ierr; 1512 PetscInt s1,s2,s3; 1513 1514 PetscFunctionBegin; 1515 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1516 if (rr) { 1517 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1518 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1519 /* Overlap communication with computation. */ 1520 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1521 } 1522 if (ll) { 1523 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1524 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1525 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1526 } 1527 /* scale the diagonal block */ 1528 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1529 1530 if (rr) { 1531 /* Do a scatter end and then right scale the off-diagonal block */ 1532 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1533 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1534 } 1535 1536 PetscFunctionReturn(0); 1537 } 1538 1539 #undef __FUNCT__ 1540 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1541 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 1542 { 1543 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1544 PetscErrorCode ierr; 1545 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1546 PetscInt i,*owners = A->rmap->range; 1547 PetscInt *nprocs,j,idx,nsends,row; 1548 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1549 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1550 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1551 MPI_Comm comm = ((PetscObject)A)->comm; 1552 MPI_Request *send_waits,*recv_waits; 1553 MPI_Status recv_status,*send_status; 1554 #if defined(PETSC_DEBUG) 1555 PetscTruth found = PETSC_FALSE; 1556 #endif 1557 1558 PetscFunctionBegin; 1559 /* first count number of contributors to each processor */ 1560 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1561 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1562 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1563 j = 0; 1564 for (i=0; i<N; i++) { 1565 if (lastidx > (idx = rows[i])) j = 0; 1566 lastidx = idx; 1567 for (; j<size; j++) { 1568 if (idx >= owners[j] && idx < owners[j+1]) { 1569 nprocs[2*j]++; 1570 nprocs[2*j+1] = 1; 1571 owner[i] = j; 1572 #if defined(PETSC_DEBUG) 1573 found = PETSC_TRUE; 1574 #endif 1575 break; 1576 } 1577 } 1578 #if defined(PETSC_DEBUG) 1579 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1580 found = PETSC_FALSE; 1581 #endif 1582 } 1583 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1584 1585 if (A->nooffproczerorows) { 1586 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 1587 nrecvs = nsends; 1588 nmax = N; 1589 } else { 1590 /* inform other processors of number of messages and max length*/ 1591 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1592 } 1593 1594 /* post receives: */ 1595 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1596 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1597 for (i=0; i<nrecvs; i++) { 1598 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1599 } 1600 1601 /* do sends: 1602 1) starts[i] gives the starting index in svalues for stuff going to 1603 the ith processor 1604 */ 1605 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1606 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1607 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1608 starts[0] = 0; 1609 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1610 for (i=0; i<N; i++) { 1611 svalues[starts[owner[i]]++] = rows[i]; 1612 } 1613 1614 starts[0] = 0; 1615 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1616 count = 0; 1617 for (i=0; i<size; i++) { 1618 if (nprocs[2*i+1]) { 1619 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1620 } 1621 } 1622 ierr = PetscFree(starts);CHKERRQ(ierr); 1623 1624 base = owners[rank]; 1625 1626 /* wait on receives */ 1627 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1628 count = nrecvs; 1629 slen = 0; 1630 while (count) { 1631 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1632 /* unpack receives into our local space */ 1633 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1634 source[imdex] = recv_status.MPI_SOURCE; 1635 lens[imdex] = n; 1636 slen += n; 1637 count--; 1638 } 1639 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1640 1641 /* move the data into the send scatter */ 1642 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1643 count = 0; 1644 for (i=0; i<nrecvs; i++) { 1645 values = rvalues + i*nmax; 1646 for (j=0; j<lens[i]; j++) { 1647 lrows[count++] = values[j] - base; 1648 } 1649 } 1650 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1651 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1652 ierr = PetscFree(owner);CHKERRQ(ierr); 1653 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1654 1655 /* actually zap the local rows */ 1656 /* 1657 Zero the required rows. If the "diagonal block" of the matrix 1658 is square and the user wishes to set the diagonal we use separate 1659 code so that MatSetValues() is not called for each diagonal allocating 1660 new memory, thus calling lots of mallocs and slowing things down. 1661 1662 */ 1663 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1664 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1665 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1666 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1667 } else if (diag != 0.0) { 1668 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1669 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1670 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1671 for (i=0; i<slen; i++) { 1672 row = lrows[i] + rstart_bs; 1673 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1674 } 1675 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1676 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1677 } else { 1678 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1679 } 1680 1681 ierr = PetscFree(lrows);CHKERRQ(ierr); 1682 1683 /* wait on sends */ 1684 if (nsends) { 1685 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1686 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1687 ierr = PetscFree(send_status);CHKERRQ(ierr); 1688 } 1689 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1690 ierr = PetscFree(svalues);CHKERRQ(ierr); 1691 1692 PetscFunctionReturn(0); 1693 } 1694 1695 #undef __FUNCT__ 1696 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1697 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1698 { 1699 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1700 PetscErrorCode ierr; 1701 1702 PetscFunctionBegin; 1703 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1704 PetscFunctionReturn(0); 1705 } 1706 1707 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1708 1709 #undef __FUNCT__ 1710 #define __FUNCT__ "MatEqual_MPIBAIJ" 1711 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1712 { 1713 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1714 Mat a,b,c,d; 1715 PetscTruth flg; 1716 PetscErrorCode ierr; 1717 1718 PetscFunctionBegin; 1719 a = matA->A; b = matA->B; 1720 c = matB->A; d = matB->B; 1721 1722 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1723 if (flg) { 1724 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1725 } 1726 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1727 PetscFunctionReturn(0); 1728 } 1729 1730 #undef __FUNCT__ 1731 #define __FUNCT__ "MatCopy_MPIBAIJ" 1732 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1733 { 1734 PetscErrorCode ierr; 1735 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1736 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1737 1738 PetscFunctionBegin; 1739 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1740 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1741 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1742 } else { 1743 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1744 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1745 } 1746 PetscFunctionReturn(0); 1747 } 1748 1749 #undef __FUNCT__ 1750 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1751 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1752 { 1753 PetscErrorCode ierr; 1754 1755 PetscFunctionBegin; 1756 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1757 PetscFunctionReturn(0); 1758 } 1759 1760 #undef __FUNCT__ 1761 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1762 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1763 { 1764 PetscErrorCode ierr; 1765 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1766 PetscBLASInt bnz,one=1; 1767 Mat_SeqBAIJ *x,*y; 1768 1769 PetscFunctionBegin; 1770 if (str == SAME_NONZERO_PATTERN) { 1771 PetscScalar alpha = a; 1772 x = (Mat_SeqBAIJ *)xx->A->data; 1773 y = (Mat_SeqBAIJ *)yy->A->data; 1774 bnz = PetscBLASIntCast(x->nz); 1775 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1776 x = (Mat_SeqBAIJ *)xx->B->data; 1777 y = (Mat_SeqBAIJ *)yy->B->data; 1778 bnz = PetscBLASIntCast(x->nz); 1779 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1780 } else { 1781 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1782 } 1783 PetscFunctionReturn(0); 1784 } 1785 1786 #undef __FUNCT__ 1787 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ" 1788 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs) 1789 { 1790 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1791 PetscInt rbs,cbs; 1792 PetscErrorCode ierr; 1793 1794 PetscFunctionBegin; 1795 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1796 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1797 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 1798 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 1799 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 1800 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 1801 PetscFunctionReturn(0); 1802 } 1803 1804 #undef __FUNCT__ 1805 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1806 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1807 { 1808 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1809 PetscErrorCode ierr; 1810 1811 PetscFunctionBegin; 1812 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1813 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1814 PetscFunctionReturn(0); 1815 } 1816 1817 #undef __FUNCT__ 1818 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1819 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1820 { 1821 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1822 PetscErrorCode ierr; 1823 1824 PetscFunctionBegin; 1825 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1826 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1827 PetscFunctionReturn(0); 1828 } 1829 1830 #undef __FUNCT__ 1831 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 1832 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1833 { 1834 PetscErrorCode ierr; 1835 IS iscol_local; 1836 PetscInt csize; 1837 1838 PetscFunctionBegin; 1839 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1840 if (call == MAT_REUSE_MATRIX) { 1841 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1842 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1843 } else { 1844 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1845 } 1846 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1847 if (call == MAT_INITIAL_MATRIX) { 1848 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1849 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 1850 } 1851 PetscFunctionReturn(0); 1852 } 1853 1854 #undef __FUNCT__ 1855 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 1856 /* 1857 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1858 in local and then by concatenating the local matrices the end result. 1859 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 1860 */ 1861 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 1862 { 1863 PetscErrorCode ierr; 1864 PetscMPIInt rank,size; 1865 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 1866 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 1867 Mat *local,M,Mreuse; 1868 MatScalar *vwork,*aa; 1869 MPI_Comm comm = ((PetscObject)mat)->comm; 1870 Mat_SeqBAIJ *aij; 1871 1872 1873 PetscFunctionBegin; 1874 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1875 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1876 1877 if (call == MAT_REUSE_MATRIX) { 1878 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1879 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1880 local = &Mreuse; 1881 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1882 } else { 1883 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1884 Mreuse = *local; 1885 ierr = PetscFree(local);CHKERRQ(ierr); 1886 } 1887 1888 /* 1889 m - number of local rows 1890 n - number of columns (same on all processors) 1891 rstart - first row in new global matrix generated 1892 */ 1893 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 1894 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1895 m = m/bs; 1896 n = n/bs; 1897 1898 if (call == MAT_INITIAL_MATRIX) { 1899 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1900 ii = aij->i; 1901 jj = aij->j; 1902 1903 /* 1904 Determine the number of non-zeros in the diagonal and off-diagonal 1905 portions of the matrix in order to do correct preallocation 1906 */ 1907 1908 /* first get start and end of "diagonal" columns */ 1909 if (csize == PETSC_DECIDE) { 1910 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 1911 if (mglobal == n*bs) { /* square matrix */ 1912 nlocal = m; 1913 } else { 1914 nlocal = n/size + ((n % size) > rank); 1915 } 1916 } else { 1917 nlocal = csize/bs; 1918 } 1919 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1920 rstart = rend - nlocal; 1921 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 1922 1923 /* next, compute all the lengths */ 1924 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 1925 olens = dlens + m; 1926 for (i=0; i<m; i++) { 1927 jend = ii[i+1] - ii[i]; 1928 olen = 0; 1929 dlen = 0; 1930 for (j=0; j<jend; j++) { 1931 if (*jj < rstart || *jj >= rend) olen++; 1932 else dlen++; 1933 jj++; 1934 } 1935 olens[i] = olen; 1936 dlens[i] = dlen; 1937 } 1938 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 1939 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 1940 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 1941 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 1942 ierr = PetscFree(dlens);CHKERRQ(ierr); 1943 } else { 1944 PetscInt ml,nl; 1945 1946 M = *newmat; 1947 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1948 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1949 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1950 /* 1951 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1952 rather than the slower MatSetValues(). 1953 */ 1954 M->was_assembled = PETSC_TRUE; 1955 M->assembled = PETSC_FALSE; 1956 } 1957 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1958 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1959 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1960 ii = aij->i; 1961 jj = aij->j; 1962 aa = aij->a; 1963 for (i=0; i<m; i++) { 1964 row = rstart/bs + i; 1965 nz = ii[i+1] - ii[i]; 1966 cwork = jj; jj += nz; 1967 vwork = aa; aa += nz; 1968 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1969 } 1970 1971 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1972 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1973 *newmat = M; 1974 1975 /* save submatrix used in processor for next request */ 1976 if (call == MAT_INITIAL_MATRIX) { 1977 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 1978 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 1979 } 1980 1981 PetscFunctionReturn(0); 1982 } 1983 1984 #undef __FUNCT__ 1985 #define __FUNCT__ "MatPermute_MPIBAIJ" 1986 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 1987 { 1988 MPI_Comm comm,pcomm; 1989 PetscInt first,local_size,nrows; 1990 const PetscInt *rows; 1991 PetscMPIInt size; 1992 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1993 PetscErrorCode ierr; 1994 1995 PetscFunctionBegin; 1996 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1997 /* make a collective version of 'rowp' */ 1998 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1999 if (pcomm==comm) { 2000 crowp = rowp; 2001 } else { 2002 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2003 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2004 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 2005 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2006 } 2007 /* collect the global row permutation and invert it */ 2008 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2009 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2010 if (pcomm!=comm) { 2011 ierr = ISDestroy(crowp);CHKERRQ(ierr); 2012 } 2013 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2014 /* get the local target indices */ 2015 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2016 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2017 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2018 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 2019 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2020 ierr = ISDestroy(irowp);CHKERRQ(ierr); 2021 /* the column permutation is so much easier; 2022 make a local version of 'colp' and invert it */ 2023 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2024 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2025 if (size==1) { 2026 lcolp = colp; 2027 } else { 2028 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2029 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2030 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 2031 } 2032 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2033 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2034 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2035 if (size>1) { 2036 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2037 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 2038 } 2039 /* now we just get the submatrix */ 2040 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2041 /* clean up */ 2042 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 2043 ierr = ISDestroy(icolp);CHKERRQ(ierr); 2044 PetscFunctionReturn(0); 2045 } 2046 2047 #undef __FUNCT__ 2048 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2049 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2050 { 2051 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2052 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2053 2054 PetscFunctionBegin; 2055 if (nghosts) { *nghosts = B->nbs;} 2056 if (ghosts) {*ghosts = baij->garray;} 2057 PetscFunctionReturn(0); 2058 } 2059 2060 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat); 2061 2062 #undef __FUNCT__ 2063 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2064 /* 2065 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2066 */ 2067 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2068 { 2069 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2070 PetscErrorCode ierr; 2071 PetscMPIInt size,*ncolsonproc,*disp,nn; 2072 PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col; 2073 const PetscInt *is; 2074 PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj; 2075 PetscInt *rowhit,M,cstart,cend,colb; 2076 PetscInt *columnsforrow,l; 2077 IS *isa; 2078 PetscTruth done,flg; 2079 ISLocalToGlobalMapping map = mat->bmapping; 2080 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2081 2082 PetscFunctionBegin; 2083 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2084 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2085 2086 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2087 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2088 M = mat->rmap->n/bs; 2089 cstart = mat->cmap->rstart/bs; 2090 cend = mat->cmap->rend/bs; 2091 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2092 c->N = mat->cmap->N/bs; 2093 c->m = mat->rmap->n/bs; 2094 c->rstart = mat->rmap->rstart/bs; 2095 2096 c->ncolors = nis; 2097 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2098 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2099 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2100 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2101 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2102 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2103 2104 /* Allow access to data structures of local part of matrix */ 2105 if (!baij->colmap) { 2106 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2107 } 2108 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2109 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2110 2111 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2112 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2113 2114 for (i=0; i<nis; i++) { 2115 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2116 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2117 c->ncolumns[i] = n; 2118 if (n) { 2119 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2120 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2121 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2122 } else { 2123 c->columns[i] = 0; 2124 } 2125 2126 if (ctype == IS_COLORING_GLOBAL){ 2127 /* Determine the total (parallel) number of columns of this color */ 2128 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2129 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2130 2131 nn = PetscMPIIntCast(n); 2132 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2133 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2134 if (!nctot) { 2135 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2136 } 2137 2138 disp[0] = 0; 2139 for (j=1; j<size; j++) { 2140 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2141 } 2142 2143 /* Get complete list of columns for color on each processor */ 2144 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2145 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2146 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2147 } else if (ctype == IS_COLORING_GHOSTED){ 2148 /* Determine local number of columns of this color on this process, including ghost points */ 2149 nctot = n; 2150 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2151 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2152 } else { 2153 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2154 } 2155 2156 /* 2157 Mark all rows affect by these columns 2158 */ 2159 /* Temporary option to allow for debugging/testing */ 2160 flg = PETSC_FALSE; 2161 ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2162 if (!flg) {/*-----------------------------------------------------------------------------*/ 2163 /* crude, fast version */ 2164 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2165 /* loop over columns*/ 2166 for (j=0; j<nctot; j++) { 2167 if (ctype == IS_COLORING_GHOSTED) { 2168 col = ltog[cols[j]]; 2169 } else { 2170 col = cols[j]; 2171 } 2172 if (col >= cstart && col < cend) { 2173 /* column is in diagonal block of matrix */ 2174 rows = A_cj + A_ci[col-cstart]; 2175 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2176 } else { 2177 #if defined (PETSC_USE_CTABLE) 2178 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2179 colb --; 2180 #else 2181 colb = baij->colmap[col] - 1; 2182 #endif 2183 if (colb == -1) { 2184 m = 0; 2185 } else { 2186 colb = colb/bs; 2187 rows = B_cj + B_ci[colb]; 2188 m = B_ci[colb+1] - B_ci[colb]; 2189 } 2190 } 2191 /* loop over columns marking them in rowhit */ 2192 for (k=0; k<m; k++) { 2193 rowhit[*rows++] = col + 1; 2194 } 2195 } 2196 2197 /* count the number of hits */ 2198 nrows = 0; 2199 for (j=0; j<M; j++) { 2200 if (rowhit[j]) nrows++; 2201 } 2202 c->nrows[i] = nrows; 2203 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2204 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2205 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2206 nrows = 0; 2207 for (j=0; j<M; j++) { 2208 if (rowhit[j]) { 2209 c->rows[i][nrows] = j; 2210 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2211 nrows++; 2212 } 2213 } 2214 } else {/*-------------------------------------------------------------------------------*/ 2215 /* slow version, using rowhit as a linked list */ 2216 PetscInt currentcol,fm,mfm; 2217 rowhit[M] = M; 2218 nrows = 0; 2219 /* loop over columns*/ 2220 for (j=0; j<nctot; j++) { 2221 if (ctype == IS_COLORING_GHOSTED) { 2222 col = ltog[cols[j]]; 2223 } else { 2224 col = cols[j]; 2225 } 2226 if (col >= cstart && col < cend) { 2227 /* column is in diagonal block of matrix */ 2228 rows = A_cj + A_ci[col-cstart]; 2229 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2230 } else { 2231 #if defined (PETSC_USE_CTABLE) 2232 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2233 colb --; 2234 #else 2235 colb = baij->colmap[col] - 1; 2236 #endif 2237 if (colb == -1) { 2238 m = 0; 2239 } else { 2240 colb = colb/bs; 2241 rows = B_cj + B_ci[colb]; 2242 m = B_ci[colb+1] - B_ci[colb]; 2243 } 2244 } 2245 2246 /* loop over columns marking them in rowhit */ 2247 fm = M; /* fm points to first entry in linked list */ 2248 for (k=0; k<m; k++) { 2249 currentcol = *rows++; 2250 /* is it already in the list? */ 2251 do { 2252 mfm = fm; 2253 fm = rowhit[fm]; 2254 } while (fm < currentcol); 2255 /* not in list so add it */ 2256 if (fm != currentcol) { 2257 nrows++; 2258 columnsforrow[currentcol] = col; 2259 /* next three lines insert new entry into linked list */ 2260 rowhit[mfm] = currentcol; 2261 rowhit[currentcol] = fm; 2262 fm = currentcol; 2263 /* fm points to present position in list since we know the columns are sorted */ 2264 } else { 2265 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2266 } 2267 } 2268 } 2269 c->nrows[i] = nrows; 2270 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2271 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2272 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2273 /* now store the linked list of rows into c->rows[i] */ 2274 nrows = 0; 2275 fm = rowhit[M]; 2276 do { 2277 c->rows[i][nrows] = fm; 2278 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2279 fm = rowhit[fm]; 2280 } while (fm < M); 2281 } /* ---------------------------------------------------------------------------------------*/ 2282 ierr = PetscFree(cols);CHKERRQ(ierr); 2283 } 2284 2285 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2286 /* 2287 vscale will contain the "diagonal" on processor scalings followed by the off processor 2288 */ 2289 if (ctype == IS_COLORING_GLOBAL) { 2290 PetscInt *garray; 2291 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2292 for (i=0; i<baij->B->cmap->n/bs; i++) { 2293 for (j=0; j<bs; j++) { 2294 garray[i*bs+j] = bs*baij->garray[i]+j; 2295 } 2296 } 2297 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2298 ierr = PetscFree(garray);CHKERRQ(ierr); 2299 CHKMEMQ; 2300 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2301 for (k=0; k<c->ncolors; k++) { 2302 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2303 for (l=0; l<c->nrows[k]; l++) { 2304 col = c->columnsforrow[k][l]; 2305 if (col >= cstart && col < cend) { 2306 /* column is in diagonal block of matrix */ 2307 colb = col - cstart; 2308 } else { 2309 /* column is in "off-processor" part */ 2310 #if defined (PETSC_USE_CTABLE) 2311 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2312 colb --; 2313 #else 2314 colb = baij->colmap[col] - 1; 2315 #endif 2316 colb = colb/bs; 2317 colb += cend - cstart; 2318 } 2319 c->vscaleforrow[k][l] = colb; 2320 } 2321 } 2322 } else if (ctype == IS_COLORING_GHOSTED) { 2323 /* Get gtol mapping */ 2324 PetscInt N = mat->cmap->N, *gtol; 2325 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2326 for (i=0; i<N; i++) gtol[i] = -1; 2327 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2328 2329 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2330 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2331 for (k=0; k<c->ncolors; k++) { 2332 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2333 for (l=0; l<c->nrows[k]; l++) { 2334 col = c->columnsforrow[k][l]; /* global column index */ 2335 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2336 } 2337 } 2338 ierr = PetscFree(gtol);CHKERRQ(ierr); 2339 } 2340 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2341 2342 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2343 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2344 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2345 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2346 CHKMEMQ; 2347 PetscFunctionReturn(0); 2348 } 2349 2350 #undef __FUNCT__ 2351 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ" 2352 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat) 2353 { 2354 Mat B; 2355 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2356 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2357 Mat_SeqAIJ *b; 2358 PetscErrorCode ierr; 2359 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2360 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2361 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2362 2363 PetscFunctionBegin; 2364 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2365 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2366 2367 /* ---------------------------------------------------------------- 2368 Tell every processor the number of nonzeros per row 2369 */ 2370 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2371 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2372 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2373 } 2374 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2375 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2376 displs = recvcounts + size; 2377 for (i=0; i<size; i++) { 2378 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2379 displs[i] = A->rmap->range[i]/bs; 2380 } 2381 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2382 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2383 #else 2384 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2385 #endif 2386 /* --------------------------------------------------------------- 2387 Create the sequential matrix of the same type as the local block diagonal 2388 */ 2389 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2390 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2391 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2392 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2393 b = (Mat_SeqAIJ *)B->data; 2394 2395 /*-------------------------------------------------------------------- 2396 Copy my part of matrix column indices over 2397 */ 2398 sendcount = ad->nz + bd->nz; 2399 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2400 a_jsendbuf = ad->j; 2401 b_jsendbuf = bd->j; 2402 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2403 cnt = 0; 2404 for (i=0; i<n; i++) { 2405 2406 /* put in lower diagonal portion */ 2407 m = bd->i[i+1] - bd->i[i]; 2408 while (m > 0) { 2409 /* is it above diagonal (in bd (compressed) numbering) */ 2410 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2411 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2412 m--; 2413 } 2414 2415 /* put in diagonal portion */ 2416 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2417 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2418 } 2419 2420 /* put in upper diagonal portion */ 2421 while (m-- > 0) { 2422 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2423 } 2424 } 2425 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2426 2427 /*-------------------------------------------------------------------- 2428 Gather all column indices to all processors 2429 */ 2430 for (i=0; i<size; i++) { 2431 recvcounts[i] = 0; 2432 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2433 recvcounts[i] += lens[j]; 2434 } 2435 } 2436 displs[0] = 0; 2437 for (i=1; i<size; i++) { 2438 displs[i] = displs[i-1] + recvcounts[i-1]; 2439 } 2440 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2441 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2442 #else 2443 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2444 #endif 2445 /*-------------------------------------------------------------------- 2446 Assemble the matrix into useable form (note numerical values not yet set) 2447 */ 2448 /* set the b->ilen (length of each row) values */ 2449 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2450 /* set the b->i indices */ 2451 b->i[0] = 0; 2452 for (i=1; i<=A->rmap->N/bs; i++) { 2453 b->i[i] = b->i[i-1] + lens[i-1]; 2454 } 2455 ierr = PetscFree(lens);CHKERRQ(ierr); 2456 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2457 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2458 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2459 2460 if (A->symmetric){ 2461 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2462 } else if (A->hermitian) { 2463 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2464 } else if (A->structurally_symmetric) { 2465 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2466 } 2467 *newmat = B; 2468 PetscFunctionReturn(0); 2469 } 2470 2471 #undef __FUNCT__ 2472 #define __FUNCT__ "MatSOR_MPIBAIJ" 2473 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2474 { 2475 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2476 PetscErrorCode ierr; 2477 Vec bb1 = 0; 2478 2479 PetscFunctionBegin; 2480 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2481 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2482 } 2483 2484 if (flag == SOR_APPLY_UPPER) { 2485 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2486 PetscFunctionReturn(0); 2487 } 2488 2489 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2490 if (flag & SOR_ZERO_INITIAL_GUESS) { 2491 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2492 its--; 2493 } 2494 2495 while (its--) { 2496 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2497 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2498 2499 /* update rhs: bb1 = bb - B*x */ 2500 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2501 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2502 2503 /* local sweep */ 2504 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2505 } 2506 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2507 if (flag & SOR_ZERO_INITIAL_GUESS) { 2508 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2509 its--; 2510 } 2511 while (its--) { 2512 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2513 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2514 2515 /* update rhs: bb1 = bb - B*x */ 2516 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2517 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2518 2519 /* local sweep */ 2520 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2521 } 2522 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2523 if (flag & SOR_ZERO_INITIAL_GUESS) { 2524 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2525 its--; 2526 } 2527 while (its--) { 2528 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2529 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2530 2531 /* update rhs: bb1 = bb - B*x */ 2532 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2533 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2534 2535 /* local sweep */ 2536 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2537 } 2538 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2539 2540 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 2541 PetscFunctionReturn(0); 2542 } 2543 2544 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2545 2546 2547 /* -------------------------------------------------------------------*/ 2548 static struct _MatOps MatOps_Values = { 2549 MatSetValues_MPIBAIJ, 2550 MatGetRow_MPIBAIJ, 2551 MatRestoreRow_MPIBAIJ, 2552 MatMult_MPIBAIJ, 2553 /* 4*/ MatMultAdd_MPIBAIJ, 2554 MatMultTranspose_MPIBAIJ, 2555 MatMultTransposeAdd_MPIBAIJ, 2556 0, 2557 0, 2558 0, 2559 /*10*/ 0, 2560 0, 2561 0, 2562 MatSOR_MPIBAIJ, 2563 MatTranspose_MPIBAIJ, 2564 /*15*/ MatGetInfo_MPIBAIJ, 2565 MatEqual_MPIBAIJ, 2566 MatGetDiagonal_MPIBAIJ, 2567 MatDiagonalScale_MPIBAIJ, 2568 MatNorm_MPIBAIJ, 2569 /*20*/ MatAssemblyBegin_MPIBAIJ, 2570 MatAssemblyEnd_MPIBAIJ, 2571 MatSetOption_MPIBAIJ, 2572 MatZeroEntries_MPIBAIJ, 2573 /*24*/ MatZeroRows_MPIBAIJ, 2574 0, 2575 0, 2576 0, 2577 0, 2578 /*29*/ MatSetUpPreallocation_MPIBAIJ, 2579 0, 2580 0, 2581 0, 2582 0, 2583 /*34*/ MatDuplicate_MPIBAIJ, 2584 0, 2585 0, 2586 0, 2587 0, 2588 /*39*/ MatAXPY_MPIBAIJ, 2589 MatGetSubMatrices_MPIBAIJ, 2590 MatIncreaseOverlap_MPIBAIJ, 2591 MatGetValues_MPIBAIJ, 2592 MatCopy_MPIBAIJ, 2593 /*44*/ 0, 2594 MatScale_MPIBAIJ, 2595 0, 2596 0, 2597 0, 2598 /*49*/ MatSetBlockSize_MPIBAIJ, 2599 0, 2600 0, 2601 0, 2602 0, 2603 /*54*/ MatFDColoringCreate_MPIBAIJ, 2604 0, 2605 MatSetUnfactored_MPIBAIJ, 2606 MatPermute_MPIBAIJ, 2607 MatSetValuesBlocked_MPIBAIJ, 2608 /*59*/ MatGetSubMatrix_MPIBAIJ, 2609 MatDestroy_MPIBAIJ, 2610 MatView_MPIBAIJ, 2611 0, 2612 0, 2613 /*64*/ 0, 2614 0, 2615 0, 2616 0, 2617 0, 2618 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2619 0, 2620 0, 2621 0, 2622 0, 2623 /*74*/ 0, 2624 MatFDColoringApply_BAIJ, 2625 0, 2626 0, 2627 0, 2628 /*79*/ 0, 2629 0, 2630 0, 2631 0, 2632 MatLoad_MPIBAIJ, 2633 /*84*/ 0, 2634 0, 2635 0, 2636 0, 2637 0, 2638 /*89*/ 0, 2639 0, 2640 0, 2641 0, 2642 0, 2643 /*94*/ 0, 2644 0, 2645 0, 2646 0, 2647 0, 2648 /*99*/ 0, 2649 0, 2650 0, 2651 0, 2652 0, 2653 /*104*/0, 2654 MatRealPart_MPIBAIJ, 2655 MatImaginaryPart_MPIBAIJ, 2656 0, 2657 0, 2658 /*109*/0, 2659 0, 2660 0, 2661 0, 2662 0, 2663 /*114*/MatGetSeqNonzerostructure_MPIBAIJ, 2664 0, 2665 MatGetGhosts_MPIBAIJ, 2666 0, 2667 0, 2668 /*119*/0, 2669 0, 2670 0, 2671 0 2672 }; 2673 2674 EXTERN_C_BEGIN 2675 #undef __FUNCT__ 2676 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2677 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2678 { 2679 PetscFunctionBegin; 2680 *a = ((Mat_MPIBAIJ *)A->data)->A; 2681 *iscopy = PETSC_FALSE; 2682 PetscFunctionReturn(0); 2683 } 2684 EXTERN_C_END 2685 2686 EXTERN_C_BEGIN 2687 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2688 EXTERN_C_END 2689 2690 EXTERN_C_BEGIN 2691 #undef __FUNCT__ 2692 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2693 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2694 { 2695 PetscInt m,rstart,cstart,cend; 2696 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2697 const PetscInt *JJ=0; 2698 PetscScalar *values=0; 2699 PetscErrorCode ierr; 2700 2701 PetscFunctionBegin; 2702 2703 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2704 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2705 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2706 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2707 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2708 m = B->rmap->n/bs; 2709 rstart = B->rmap->rstart/bs; 2710 cstart = B->cmap->rstart/bs; 2711 cend = B->cmap->rend/bs; 2712 2713 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2714 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2715 for (i=0; i<m; i++) { 2716 nz = ii[i+1] - ii[i]; 2717 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2718 nz_max = PetscMax(nz_max,nz); 2719 JJ = jj + ii[i]; 2720 for (j=0; j<nz; j++) { 2721 if (*JJ >= cstart) break; 2722 JJ++; 2723 } 2724 d = 0; 2725 for (; j<nz; j++) { 2726 if (*JJ++ >= cend) break; 2727 d++; 2728 } 2729 d_nnz[i] = d; 2730 o_nnz[i] = nz - d; 2731 } 2732 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2733 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2734 2735 values = (PetscScalar*)V; 2736 if (!values) { 2737 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2738 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2739 } 2740 for (i=0; i<m; i++) { 2741 PetscInt row = i + rstart; 2742 PetscInt ncols = ii[i+1] - ii[i]; 2743 const PetscInt *icols = jj + ii[i]; 2744 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2745 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2746 } 2747 2748 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2749 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2750 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2751 2752 PetscFunctionReturn(0); 2753 } 2754 EXTERN_C_END 2755 2756 #undef __FUNCT__ 2757 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2758 /*@C 2759 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2760 (the default parallel PETSc format). 2761 2762 Collective on MPI_Comm 2763 2764 Input Parameters: 2765 + A - the matrix 2766 . i - the indices into j for the start of each local row (starts with zero) 2767 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2768 - v - optional values in the matrix 2769 2770 Level: developer 2771 2772 .keywords: matrix, aij, compressed row, sparse, parallel 2773 2774 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2775 @*/ 2776 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2777 { 2778 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2779 2780 PetscFunctionBegin; 2781 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2782 if (f) { 2783 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2784 } 2785 PetscFunctionReturn(0); 2786 } 2787 2788 EXTERN_C_BEGIN 2789 #undef __FUNCT__ 2790 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2791 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2792 { 2793 Mat_MPIBAIJ *b; 2794 PetscErrorCode ierr; 2795 PetscInt i, newbs = PetscAbs(bs); 2796 2797 PetscFunctionBegin; 2798 if (bs < 0) { 2799 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2800 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2801 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2802 bs = PetscAbs(bs); 2803 } 2804 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 2805 bs = newbs; 2806 2807 2808 if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2809 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2810 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2811 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2812 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2813 2814 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2815 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2816 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2817 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2818 2819 if (d_nnz) { 2820 for (i=0; i<B->rmap->n/bs; i++) { 2821 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2822 } 2823 } 2824 if (o_nnz) { 2825 for (i=0; i<B->rmap->n/bs; i++) { 2826 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2827 } 2828 } 2829 2830 b = (Mat_MPIBAIJ*)B->data; 2831 b->bs2 = bs*bs; 2832 b->mbs = B->rmap->n/bs; 2833 b->nbs = B->cmap->n/bs; 2834 b->Mbs = B->rmap->N/bs; 2835 b->Nbs = B->cmap->N/bs; 2836 2837 for (i=0; i<=b->size; i++) { 2838 b->rangebs[i] = B->rmap->range[i]/bs; 2839 } 2840 b->rstartbs = B->rmap->rstart/bs; 2841 b->rendbs = B->rmap->rend/bs; 2842 b->cstartbs = B->cmap->rstart/bs; 2843 b->cendbs = B->cmap->rend/bs; 2844 2845 if (!B->preallocated) { 2846 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2847 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2848 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2849 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2850 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2851 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2852 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2853 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2854 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 2855 } 2856 2857 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2858 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2859 B->preallocated = PETSC_TRUE; 2860 PetscFunctionReturn(0); 2861 } 2862 EXTERN_C_END 2863 2864 EXTERN_C_BEGIN 2865 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2866 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2867 EXTERN_C_END 2868 2869 2870 EXTERN_C_BEGIN 2871 #undef __FUNCT__ 2872 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 2873 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj) 2874 { 2875 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2876 PetscErrorCode ierr; 2877 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2878 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2879 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2880 2881 PetscFunctionBegin; 2882 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 2883 ii[0] = 0; 2884 CHKMEMQ; 2885 for (i=0; i<M; i++) { 2886 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2887 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2888 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2889 /* remove one from count of matrix has diagonal */ 2890 for (j=id[i]; j<id[i+1]; j++) { 2891 if (jd[j] == i) {ii[i+1]--;break;} 2892 } 2893 CHKMEMQ; 2894 } 2895 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2896 cnt = 0; 2897 for (i=0; i<M; i++) { 2898 for (j=io[i]; j<io[i+1]; j++) { 2899 if (garray[jo[j]] > rstart) break; 2900 jj[cnt++] = garray[jo[j]]; 2901 CHKMEMQ; 2902 } 2903 for (k=id[i]; k<id[i+1]; k++) { 2904 if (jd[k] != i) { 2905 jj[cnt++] = rstart + jd[k]; 2906 CHKMEMQ; 2907 } 2908 } 2909 for (;j<io[i+1]; j++) { 2910 jj[cnt++] = garray[jo[j]]; 2911 CHKMEMQ; 2912 } 2913 } 2914 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 2915 PetscFunctionReturn(0); 2916 } 2917 EXTERN_C_END 2918 2919 EXTERN_C_BEGIN 2920 #if defined(PETSC_HAVE_MUMPS) 2921 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 2922 #endif 2923 EXTERN_C_END 2924 2925 /*MC 2926 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2927 2928 Options Database Keys: 2929 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2930 . -mat_block_size <bs> - set the blocksize used to store the matrix 2931 - -mat_use_hash_table <fact> 2932 2933 Level: beginner 2934 2935 .seealso: MatCreateMPIBAIJ 2936 M*/ 2937 2938 EXTERN_C_BEGIN 2939 #undef __FUNCT__ 2940 #define __FUNCT__ "MatCreate_MPIBAIJ" 2941 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2942 { 2943 Mat_MPIBAIJ *b; 2944 PetscErrorCode ierr; 2945 PetscTruth flg; 2946 2947 PetscFunctionBegin; 2948 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2949 B->data = (void*)b; 2950 2951 2952 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2953 B->mapping = 0; 2954 B->assembled = PETSC_FALSE; 2955 2956 B->insertmode = NOT_SET_VALUES; 2957 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 2958 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 2959 2960 /* build local table of row and column ownerships */ 2961 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 2962 2963 /* build cache for off array entries formed */ 2964 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 2965 b->donotstash = PETSC_FALSE; 2966 b->colmap = PETSC_NULL; 2967 b->garray = PETSC_NULL; 2968 b->roworiented = PETSC_TRUE; 2969 2970 /* stuff used in block assembly */ 2971 b->barray = 0; 2972 2973 /* stuff used for matrix vector multiply */ 2974 b->lvec = 0; 2975 b->Mvctx = 0; 2976 2977 /* stuff for MatGetRow() */ 2978 b->rowindices = 0; 2979 b->rowvalues = 0; 2980 b->getrowactive = PETSC_FALSE; 2981 2982 /* hash table stuff */ 2983 b->ht = 0; 2984 b->hd = 0; 2985 b->ht_size = 0; 2986 b->ht_flag = PETSC_FALSE; 2987 b->ht_fact = 0; 2988 b->ht_total_ct = 0; 2989 b->ht_insert_ct = 0; 2990 2991 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 2992 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2993 if (flg) { 2994 PetscReal fact = 1.39; 2995 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2996 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 2997 if (fact <= 1.0) fact = 1.39; 2998 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2999 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3000 } 3001 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3002 3003 #if defined(PETSC_HAVE_MUMPS) 3004 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3005 #endif 3006 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3007 "MatConvert_MPIBAIJ_MPIAdj", 3008 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3009 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3010 "MatStoreValues_MPIBAIJ", 3011 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3012 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3013 "MatRetrieveValues_MPIBAIJ", 3014 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3015 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3016 "MatGetDiagonalBlock_MPIBAIJ", 3017 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3018 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3019 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3020 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3021 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3022 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3023 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3024 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3025 "MatDiagonalScaleLocal_MPIBAIJ", 3026 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3027 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3028 "MatSetHashTableFactor_MPIBAIJ", 3029 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3030 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3031 PetscFunctionReturn(0); 3032 } 3033 EXTERN_C_END 3034 3035 /*MC 3036 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3037 3038 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3039 and MATMPIBAIJ otherwise. 3040 3041 Options Database Keys: 3042 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3043 3044 Level: beginner 3045 3046 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3047 M*/ 3048 3049 EXTERN_C_BEGIN 3050 #undef __FUNCT__ 3051 #define __FUNCT__ "MatCreate_BAIJ" 3052 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 3053 { 3054 PetscErrorCode ierr; 3055 PetscMPIInt size; 3056 3057 PetscFunctionBegin; 3058 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3059 if (size == 1) { 3060 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 3061 } else { 3062 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 3063 } 3064 PetscFunctionReturn(0); 3065 } 3066 EXTERN_C_END 3067 3068 #undef __FUNCT__ 3069 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3070 /*@C 3071 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3072 (block compressed row). For good matrix assembly performance 3073 the user should preallocate the matrix storage by setting the parameters 3074 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3075 performance can be increased by more than a factor of 50. 3076 3077 Collective on Mat 3078 3079 Input Parameters: 3080 + A - the matrix 3081 . bs - size of blockk 3082 . d_nz - number of block nonzeros per block row in diagonal portion of local 3083 submatrix (same for all local rows) 3084 . d_nnz - array containing the number of block nonzeros in the various block rows 3085 of the in diagonal portion of the local (possibly different for each block 3086 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3087 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3088 submatrix (same for all local rows). 3089 - o_nnz - array containing the number of nonzeros in the various block rows of the 3090 off-diagonal portion of the local submatrix (possibly different for 3091 each block row) or PETSC_NULL. 3092 3093 If the *_nnz parameter is given then the *_nz parameter is ignored 3094 3095 Options Database Keys: 3096 + -mat_block_size - size of the blocks to use 3097 - -mat_use_hash_table <fact> 3098 3099 Notes: 3100 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3101 than it must be used on all processors that share the object for that argument. 3102 3103 Storage Information: 3104 For a square global matrix we define each processor's diagonal portion 3105 to be its local rows and the corresponding columns (a square submatrix); 3106 each processor's off-diagonal portion encompasses the remainder of the 3107 local matrix (a rectangular submatrix). 3108 3109 The user can specify preallocated storage for the diagonal part of 3110 the local submatrix with either d_nz or d_nnz (not both). Set 3111 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3112 memory allocation. Likewise, specify preallocated storage for the 3113 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3114 3115 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3116 the figure below we depict these three local rows and all columns (0-11). 3117 3118 .vb 3119 0 1 2 3 4 5 6 7 8 9 10 11 3120 ------------------- 3121 row 3 | o o o d d d o o o o o o 3122 row 4 | o o o d d d o o o o o o 3123 row 5 | o o o d d d o o o o o o 3124 ------------------- 3125 .ve 3126 3127 Thus, any entries in the d locations are stored in the d (diagonal) 3128 submatrix, and any entries in the o locations are stored in the 3129 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3130 stored simply in the MATSEQBAIJ format for compressed row storage. 3131 3132 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3133 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3134 In general, for PDE problems in which most nonzeros are near the diagonal, 3135 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3136 or you will get TERRIBLE performance; see the users' manual chapter on 3137 matrices. 3138 3139 You can call MatGetInfo() to get information on how effective the preallocation was; 3140 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3141 You can also run with the option -info and look for messages with the string 3142 malloc in them to see if additional memory allocation was needed. 3143 3144 Level: intermediate 3145 3146 .keywords: matrix, block, aij, compressed row, sparse, parallel 3147 3148 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 3149 @*/ 3150 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3151 { 3152 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3153 3154 PetscFunctionBegin; 3155 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3156 if (f) { 3157 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3158 } 3159 PetscFunctionReturn(0); 3160 } 3161 3162 #undef __FUNCT__ 3163 #define __FUNCT__ "MatCreateMPIBAIJ" 3164 /*@C 3165 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 3166 (block compressed row). For good matrix assembly performance 3167 the user should preallocate the matrix storage by setting the parameters 3168 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3169 performance can be increased by more than a factor of 50. 3170 3171 Collective on MPI_Comm 3172 3173 Input Parameters: 3174 + comm - MPI communicator 3175 . bs - size of blockk 3176 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3177 This value should be the same as the local size used in creating the 3178 y vector for the matrix-vector product y = Ax. 3179 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3180 This value should be the same as the local size used in creating the 3181 x vector for the matrix-vector product y = Ax. 3182 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3183 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3184 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3185 submatrix (same for all local rows) 3186 . d_nnz - array containing the number of nonzero blocks in the various block rows 3187 of the in diagonal portion of the local (possibly different for each block 3188 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3189 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3190 submatrix (same for all local rows). 3191 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3192 off-diagonal portion of the local submatrix (possibly different for 3193 each block row) or PETSC_NULL. 3194 3195 Output Parameter: 3196 . A - the matrix 3197 3198 Options Database Keys: 3199 + -mat_block_size - size of the blocks to use 3200 - -mat_use_hash_table <fact> 3201 3202 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3203 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3204 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3205 3206 Notes: 3207 If the *_nnz parameter is given then the *_nz parameter is ignored 3208 3209 A nonzero block is any block that as 1 or more nonzeros in it 3210 3211 The user MUST specify either the local or global matrix dimensions 3212 (possibly both). 3213 3214 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3215 than it must be used on all processors that share the object for that argument. 3216 3217 Storage Information: 3218 For a square global matrix we define each processor's diagonal portion 3219 to be its local rows and the corresponding columns (a square submatrix); 3220 each processor's off-diagonal portion encompasses the remainder of the 3221 local matrix (a rectangular submatrix). 3222 3223 The user can specify preallocated storage for the diagonal part of 3224 the local submatrix with either d_nz or d_nnz (not both). Set 3225 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3226 memory allocation. Likewise, specify preallocated storage for the 3227 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3228 3229 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3230 the figure below we depict these three local rows and all columns (0-11). 3231 3232 .vb 3233 0 1 2 3 4 5 6 7 8 9 10 11 3234 ------------------- 3235 row 3 | o o o d d d o o o o o o 3236 row 4 | o o o d d d o o o o o o 3237 row 5 | o o o d d d o o o o o o 3238 ------------------- 3239 .ve 3240 3241 Thus, any entries in the d locations are stored in the d (diagonal) 3242 submatrix, and any entries in the o locations are stored in the 3243 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3244 stored simply in the MATSEQBAIJ format for compressed row storage. 3245 3246 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3247 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3248 In general, for PDE problems in which most nonzeros are near the diagonal, 3249 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3250 or you will get TERRIBLE performance; see the users' manual chapter on 3251 matrices. 3252 3253 Level: intermediate 3254 3255 .keywords: matrix, block, aij, compressed row, sparse, parallel 3256 3257 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3258 @*/ 3259 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3260 { 3261 PetscErrorCode ierr; 3262 PetscMPIInt size; 3263 3264 PetscFunctionBegin; 3265 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3266 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3267 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3268 if (size > 1) { 3269 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3270 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3271 } else { 3272 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3273 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3274 } 3275 PetscFunctionReturn(0); 3276 } 3277 3278 #undef __FUNCT__ 3279 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3280 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3281 { 3282 Mat mat; 3283 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3284 PetscErrorCode ierr; 3285 PetscInt len=0; 3286 3287 PetscFunctionBegin; 3288 *newmat = 0; 3289 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3290 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3291 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3292 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3293 3294 mat->factortype = matin->factortype; 3295 mat->preallocated = PETSC_TRUE; 3296 mat->assembled = PETSC_TRUE; 3297 mat->insertmode = NOT_SET_VALUES; 3298 3299 a = (Mat_MPIBAIJ*)mat->data; 3300 mat->rmap->bs = matin->rmap->bs; 3301 a->bs2 = oldmat->bs2; 3302 a->mbs = oldmat->mbs; 3303 a->nbs = oldmat->nbs; 3304 a->Mbs = oldmat->Mbs; 3305 a->Nbs = oldmat->Nbs; 3306 3307 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3308 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3309 3310 a->size = oldmat->size; 3311 a->rank = oldmat->rank; 3312 a->donotstash = oldmat->donotstash; 3313 a->roworiented = oldmat->roworiented; 3314 a->rowindices = 0; 3315 a->rowvalues = 0; 3316 a->getrowactive = PETSC_FALSE; 3317 a->barray = 0; 3318 a->rstartbs = oldmat->rstartbs; 3319 a->rendbs = oldmat->rendbs; 3320 a->cstartbs = oldmat->cstartbs; 3321 a->cendbs = oldmat->cendbs; 3322 3323 /* hash table stuff */ 3324 a->ht = 0; 3325 a->hd = 0; 3326 a->ht_size = 0; 3327 a->ht_flag = oldmat->ht_flag; 3328 a->ht_fact = oldmat->ht_fact; 3329 a->ht_total_ct = 0; 3330 a->ht_insert_ct = 0; 3331 3332 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3333 if (oldmat->colmap) { 3334 #if defined (PETSC_USE_CTABLE) 3335 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3336 #else 3337 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3338 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3339 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3340 #endif 3341 } else a->colmap = 0; 3342 3343 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3344 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3345 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3346 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3347 } else a->garray = 0; 3348 3349 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3350 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3351 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3352 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3353 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3354 3355 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3356 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3357 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3358 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3359 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3360 *newmat = mat; 3361 3362 PetscFunctionReturn(0); 3363 } 3364 3365 #undef __FUNCT__ 3366 #define __FUNCT__ "MatLoad_MPIBAIJ" 3367 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3368 { 3369 PetscErrorCode ierr; 3370 int fd; 3371 PetscInt i,nz,j,rstart,rend; 3372 PetscScalar *vals,*buf; 3373 MPI_Comm comm = ((PetscObject)viewer)->comm; 3374 MPI_Status status; 3375 PetscMPIInt rank,size,maxnz; 3376 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3377 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3378 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3379 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3380 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3381 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3382 3383 PetscFunctionBegin; 3384 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3385 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3386 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3387 3388 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3389 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3390 if (!rank) { 3391 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3392 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3393 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3394 } 3395 3396 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3397 3398 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3399 M = header[1]; N = header[2]; 3400 3401 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3402 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3403 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3404 3405 /* If global sizes are set, check if they are consistent with that given in the file */ 3406 if (sizesset) { 3407 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3408 } 3409 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3410 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3411 3412 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3413 3414 /* 3415 This code adds extra rows to make sure the number of rows is 3416 divisible by the blocksize 3417 */ 3418 Mbs = M/bs; 3419 extra_rows = bs - M + bs*Mbs; 3420 if (extra_rows == bs) extra_rows = 0; 3421 else Mbs++; 3422 if (extra_rows && !rank) { 3423 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3424 } 3425 3426 /* determine ownership of all rows */ 3427 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3428 mbs = Mbs/size + ((Mbs % size) > rank); 3429 m = mbs*bs; 3430 } else { /* User set */ 3431 m = newmat->rmap->n; 3432 mbs = m/bs; 3433 } 3434 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3435 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3436 3437 /* process 0 needs enough room for process with most rows */ 3438 if (!rank) { 3439 mmax = rowners[1]; 3440 for (i=2; i<size; i++) { 3441 mmax = PetscMax(mmax,rowners[i]); 3442 } 3443 mmax*=bs; 3444 } else mmax = m; 3445 3446 rowners[0] = 0; 3447 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3448 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3449 rstart = rowners[rank]; 3450 rend = rowners[rank+1]; 3451 3452 /* distribute row lengths to all processors */ 3453 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3454 if (!rank) { 3455 mend = m; 3456 if (size == 1) mend = mend - extra_rows; 3457 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3458 for (j=mend; j<m; j++) locrowlens[j] = 1; 3459 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3460 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3461 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3462 for (j=0; j<m; j++) { 3463 procsnz[0] += locrowlens[j]; 3464 } 3465 for (i=1; i<size; i++) { 3466 mend = browners[i+1] - browners[i]; 3467 if (i == size-1) mend = mend - extra_rows; 3468 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3469 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3470 /* calculate the number of nonzeros on each processor */ 3471 for (j=0; j<browners[i+1]-browners[i]; j++) { 3472 procsnz[i] += rowlengths[j]; 3473 } 3474 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3475 } 3476 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3477 } else { 3478 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3479 } 3480 3481 if (!rank) { 3482 /* determine max buffer needed and allocate it */ 3483 maxnz = procsnz[0]; 3484 for (i=1; i<size; i++) { 3485 maxnz = PetscMax(maxnz,procsnz[i]); 3486 } 3487 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3488 3489 /* read in my part of the matrix column indices */ 3490 nz = procsnz[0]; 3491 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3492 mycols = ibuf; 3493 if (size == 1) nz -= extra_rows; 3494 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3495 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3496 3497 /* read in every ones (except the last) and ship off */ 3498 for (i=1; i<size-1; i++) { 3499 nz = procsnz[i]; 3500 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3501 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3502 } 3503 /* read in the stuff for the last proc */ 3504 if (size != 1) { 3505 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3506 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3507 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3508 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3509 } 3510 ierr = PetscFree(cols);CHKERRQ(ierr); 3511 } else { 3512 /* determine buffer space needed for message */ 3513 nz = 0; 3514 for (i=0; i<m; i++) { 3515 nz += locrowlens[i]; 3516 } 3517 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3518 mycols = ibuf; 3519 /* receive message of column indices*/ 3520 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3521 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3522 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3523 } 3524 3525 /* loop over local rows, determining number of off diagonal entries */ 3526 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3527 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3528 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3529 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3530 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3531 rowcount = 0; nzcount = 0; 3532 for (i=0; i<mbs; i++) { 3533 dcount = 0; 3534 odcount = 0; 3535 for (j=0; j<bs; j++) { 3536 kmax = locrowlens[rowcount]; 3537 for (k=0; k<kmax; k++) { 3538 tmp = mycols[nzcount++]/bs; 3539 if (!mask[tmp]) { 3540 mask[tmp] = 1; 3541 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3542 else masked1[dcount++] = tmp; 3543 } 3544 } 3545 rowcount++; 3546 } 3547 3548 dlens[i] = dcount; 3549 odlens[i] = odcount; 3550 3551 /* zero out the mask elements we set */ 3552 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3553 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3554 } 3555 3556 3557 if (!sizesset) { 3558 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3559 } 3560 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3561 3562 if (!rank) { 3563 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3564 /* read in my part of the matrix numerical values */ 3565 nz = procsnz[0]; 3566 vals = buf; 3567 mycols = ibuf; 3568 if (size == 1) nz -= extra_rows; 3569 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3570 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3571 3572 /* insert into matrix */ 3573 jj = rstart*bs; 3574 for (i=0; i<m; i++) { 3575 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3576 mycols += locrowlens[i]; 3577 vals += locrowlens[i]; 3578 jj++; 3579 } 3580 /* read in other processors (except the last one) and ship out */ 3581 for (i=1; i<size-1; i++) { 3582 nz = procsnz[i]; 3583 vals = buf; 3584 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3585 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3586 } 3587 /* the last proc */ 3588 if (size != 1){ 3589 nz = procsnz[i] - extra_rows; 3590 vals = buf; 3591 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3592 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3593 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3594 } 3595 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3596 } else { 3597 /* receive numeric values */ 3598 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3599 3600 /* receive message of values*/ 3601 vals = buf; 3602 mycols = ibuf; 3603 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 3604 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3605 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3606 3607 /* insert into matrix */ 3608 jj = rstart*bs; 3609 for (i=0; i<m; i++) { 3610 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3611 mycols += locrowlens[i]; 3612 vals += locrowlens[i]; 3613 jj++; 3614 } 3615 } 3616 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3617 ierr = PetscFree(buf);CHKERRQ(ierr); 3618 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3619 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3620 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3621 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3622 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3623 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3624 3625 PetscFunctionReturn(0); 3626 } 3627 3628 #undef __FUNCT__ 3629 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3630 /*@ 3631 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3632 3633 Input Parameters: 3634 . mat - the matrix 3635 . fact - factor 3636 3637 Not Collective, each process can use a different factor 3638 3639 Level: advanced 3640 3641 Notes: 3642 This can also be set by the command line option: -mat_use_hash_table <fact> 3643 3644 .keywords: matrix, hashtable, factor, HT 3645 3646 .seealso: MatSetOption() 3647 @*/ 3648 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3649 { 3650 PetscErrorCode ierr,(*f)(Mat,PetscReal); 3651 3652 PetscFunctionBegin; 3653 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 3654 if (f) { 3655 ierr = (*f)(mat,fact);CHKERRQ(ierr); 3656 } 3657 PetscFunctionReturn(0); 3658 } 3659 3660 EXTERN_C_BEGIN 3661 #undef __FUNCT__ 3662 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3663 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3664 { 3665 Mat_MPIBAIJ *baij; 3666 3667 PetscFunctionBegin; 3668 baij = (Mat_MPIBAIJ*)mat->data; 3669 baij->ht_fact = fact; 3670 PetscFunctionReturn(0); 3671 } 3672 EXTERN_C_END 3673 3674 #undef __FUNCT__ 3675 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3676 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3677 { 3678 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3679 PetscFunctionBegin; 3680 *Ad = a->A; 3681 *Ao = a->B; 3682 *colmap = a->garray; 3683 PetscFunctionReturn(0); 3684 } 3685 3686 /* 3687 Special version for direct calls from Fortran (to eliminate two function call overheads 3688 */ 3689 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3690 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3691 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3692 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3693 #endif 3694 3695 #undef __FUNCT__ 3696 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3697 /*@C 3698 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3699 3700 Collective on Mat 3701 3702 Input Parameters: 3703 + mat - the matrix 3704 . min - number of input rows 3705 . im - input rows 3706 . nin - number of input columns 3707 . in - input columns 3708 . v - numerical values input 3709 - addvin - INSERT_VALUES or ADD_VALUES 3710 3711 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3712 3713 Level: advanced 3714 3715 .seealso: MatSetValuesBlocked() 3716 @*/ 3717 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3718 { 3719 /* convert input arguments to C version */ 3720 Mat mat = *matin; 3721 PetscInt m = *min, n = *nin; 3722 InsertMode addv = *addvin; 3723 3724 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3725 const MatScalar *value; 3726 MatScalar *barray=baij->barray; 3727 PetscTruth roworiented = baij->roworiented; 3728 PetscErrorCode ierr; 3729 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3730 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3731 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3732 3733 PetscFunctionBegin; 3734 /* tasks normally handled by MatSetValuesBlocked() */ 3735 if (mat->insertmode == NOT_SET_VALUES) { 3736 mat->insertmode = addv; 3737 } 3738 #if defined(PETSC_USE_DEBUG) 3739 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3740 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3741 #endif 3742 if (mat->assembled) { 3743 mat->was_assembled = PETSC_TRUE; 3744 mat->assembled = PETSC_FALSE; 3745 } 3746 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3747 3748 3749 if(!barray) { 3750 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3751 baij->barray = barray; 3752 } 3753 3754 if (roworiented) { 3755 stepval = (n-1)*bs; 3756 } else { 3757 stepval = (m-1)*bs; 3758 } 3759 for (i=0; i<m; i++) { 3760 if (im[i] < 0) continue; 3761 #if defined(PETSC_USE_DEBUG) 3762 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3763 #endif 3764 if (im[i] >= rstart && im[i] < rend) { 3765 row = im[i] - rstart; 3766 for (j=0; j<n; j++) { 3767 /* If NumCol = 1 then a copy is not required */ 3768 if ((roworiented) && (n == 1)) { 3769 barray = (MatScalar*)v + i*bs2; 3770 } else if((!roworiented) && (m == 1)) { 3771 barray = (MatScalar*)v + j*bs2; 3772 } else { /* Here a copy is required */ 3773 if (roworiented) { 3774 value = v + i*(stepval+bs)*bs + j*bs; 3775 } else { 3776 value = v + j*(stepval+bs)*bs + i*bs; 3777 } 3778 for (ii=0; ii<bs; ii++,value+=stepval) { 3779 for (jj=0; jj<bs; jj++) { 3780 *barray++ = *value++; 3781 } 3782 } 3783 barray -=bs2; 3784 } 3785 3786 if (in[j] >= cstart && in[j] < cend){ 3787 col = in[j] - cstart; 3788 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3789 } 3790 else if (in[j] < 0) continue; 3791 #if defined(PETSC_USE_DEBUG) 3792 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3793 #endif 3794 else { 3795 if (mat->was_assembled) { 3796 if (!baij->colmap) { 3797 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3798 } 3799 3800 #if defined(PETSC_USE_DEBUG) 3801 #if defined (PETSC_USE_CTABLE) 3802 { PetscInt data; 3803 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3804 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3805 } 3806 #else 3807 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3808 #endif 3809 #endif 3810 #if defined (PETSC_USE_CTABLE) 3811 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3812 col = (col - 1)/bs; 3813 #else 3814 col = (baij->colmap[in[j]] - 1)/bs; 3815 #endif 3816 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3817 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3818 col = in[j]; 3819 } 3820 } 3821 else col = in[j]; 3822 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3823 } 3824 } 3825 } else { 3826 if (!baij->donotstash) { 3827 if (roworiented) { 3828 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3829 } else { 3830 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3831 } 3832 } 3833 } 3834 } 3835 3836 /* task normally handled by MatSetValuesBlocked() */ 3837 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3838 PetscFunctionReturn(0); 3839 } 3840