1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 8 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 9 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 10 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 11 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 12 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 13 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 16 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 19 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 20 { 21 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 22 PetscErrorCode ierr; 23 PetscInt i,*idxb = 0; 24 PetscScalar *va,*vb; 25 Vec vtmp; 26 27 PetscFunctionBegin; 28 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 29 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 30 if (idx) { 31 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 32 } 33 34 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 35 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 36 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 37 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 38 39 for (i=0; i<A->rmap->n; i++){ 40 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 41 } 42 43 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 44 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 45 if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);} 46 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 47 PetscFunctionReturn(0); 48 } 49 50 EXTERN_C_BEGIN 51 #undef __FUNCT__ 52 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 53 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 54 { 55 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 56 PetscErrorCode ierr; 57 58 PetscFunctionBegin; 59 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 60 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 61 PetscFunctionReturn(0); 62 } 63 EXTERN_C_END 64 65 EXTERN_C_BEGIN 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 68 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 69 { 70 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 75 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 76 PetscFunctionReturn(0); 77 } 78 EXTERN_C_END 79 80 /* 81 Local utility routine that creates a mapping from the global column 82 number to the local number in the off-diagonal part of the local 83 storage of the matrix. This is done in a non scalable way since the 84 length of colmap equals the global matrix length. 85 */ 86 #undef __FUNCT__ 87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 89 { 90 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 91 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 92 PetscErrorCode ierr; 93 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 94 95 PetscFunctionBegin; 96 #if defined (PETSC_USE_CTABLE) 97 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 98 for (i=0; i<nbs; i++){ 99 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 100 } 101 #else 102 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 103 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 105 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 106 #endif 107 PetscFunctionReturn(0); 108 } 109 110 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 111 { \ 112 \ 113 brow = row/bs; \ 114 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 115 rmax = aimax[brow]; nrow = ailen[brow]; \ 116 bcol = col/bs; \ 117 ridx = row % bs; cidx = col % bs; \ 118 low = 0; high = nrow; \ 119 while (high-low > 3) { \ 120 t = (low+high)/2; \ 121 if (rp[t] > bcol) high = t; \ 122 else low = t; \ 123 } \ 124 for (_i=low; _i<high; _i++) { \ 125 if (rp[_i] > bcol) break; \ 126 if (rp[_i] == bcol) { \ 127 bap = ap + bs2*_i + bs*cidx + ridx; \ 128 if (addv == ADD_VALUES) *bap += value; \ 129 else *bap = value; \ 130 goto a_noinsert; \ 131 } \ 132 } \ 133 if (a->nonew == 1) goto a_noinsert; \ 134 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 135 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 136 N = nrow++ - 1; \ 137 /* shift up all the later entries in this row */ \ 138 for (ii=N; ii>=_i; ii--) { \ 139 rp[ii+1] = rp[ii]; \ 140 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 141 } \ 142 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 143 rp[_i] = bcol; \ 144 ap[bs2*_i + bs*cidx + ridx] = value; \ 145 a_noinsert:; \ 146 ailen[brow] = nrow; \ 147 } 148 149 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 150 { \ 151 brow = row/bs; \ 152 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 153 rmax = bimax[brow]; nrow = bilen[brow]; \ 154 bcol = col/bs; \ 155 ridx = row % bs; cidx = col % bs; \ 156 low = 0; high = nrow; \ 157 while (high-low > 3) { \ 158 t = (low+high)/2; \ 159 if (rp[t] > bcol) high = t; \ 160 else low = t; \ 161 } \ 162 for (_i=low; _i<high; _i++) { \ 163 if (rp[_i] > bcol) break; \ 164 if (rp[_i] == bcol) { \ 165 bap = ap + bs2*_i + bs*cidx + ridx; \ 166 if (addv == ADD_VALUES) *bap += value; \ 167 else *bap = value; \ 168 goto b_noinsert; \ 169 } \ 170 } \ 171 if (b->nonew == 1) goto b_noinsert; \ 172 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 173 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 174 CHKMEMQ;\ 175 N = nrow++ - 1; \ 176 /* shift up all the later entries in this row */ \ 177 for (ii=N; ii>=_i; ii--) { \ 178 rp[ii+1] = rp[ii]; \ 179 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 180 } \ 181 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 182 rp[_i] = bcol; \ 183 ap[bs2*_i + bs*cidx + ridx] = value; \ 184 b_noinsert:; \ 185 bilen[brow] = nrow; \ 186 } 187 188 #undef __FUNCT__ 189 #define __FUNCT__ "MatSetValues_MPIBAIJ" 190 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 191 { 192 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 193 MatScalar value; 194 PetscTruth roworiented = baij->roworiented; 195 PetscErrorCode ierr; 196 PetscInt i,j,row,col; 197 PetscInt rstart_orig=mat->rmap->rstart; 198 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 199 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 200 201 /* Some Variables required in the macro */ 202 Mat A = baij->A; 203 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 204 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 205 MatScalar *aa=a->a; 206 207 Mat B = baij->B; 208 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 209 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 210 MatScalar *ba=b->a; 211 212 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 213 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 214 MatScalar *ap,*bap; 215 216 PetscFunctionBegin; 217 if (v) PetscValidScalarPointer(v,6); 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_DEBUG) 221 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 222 #endif 223 if (im[i] >= rstart_orig && im[i] < rend_orig) { 224 row = im[i] - rstart_orig; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart_orig && in[j] < cend_orig){ 227 col = in[j] - cstart_orig; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 230 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 231 } else if (in[j] < 0) continue; 232 #if defined(PETSC_USE_DEBUG) 233 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1); 234 #endif 235 else { 236 if (mat->was_assembled) { 237 if (!baij->colmap) { 238 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 239 } 240 #if defined (PETSC_USE_CTABLE) 241 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 242 col = col - 1; 243 #else 244 col = baij->colmap[in[j]/bs] - 1; 245 #endif 246 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 247 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 248 col = in[j]; 249 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 250 B = baij->B; 251 b = (Mat_SeqBAIJ*)(B)->data; 252 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 253 ba=b->a; 254 } else col += in[j]%bs; 255 } else col = in[j]; 256 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 257 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 258 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 259 } 260 } 261 } else { 262 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 263 if (!baij->donotstash) { 264 if (roworiented) { 265 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 266 } else { 267 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 268 } 269 } 270 } 271 } 272 PetscFunctionReturn(0); 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 277 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 278 { 279 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 280 const PetscScalar *value; 281 MatScalar *barray=baij->barray; 282 PetscTruth roworiented = baij->roworiented; 283 PetscErrorCode ierr; 284 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 285 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 286 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 287 288 PetscFunctionBegin; 289 if(!barray) { 290 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 291 baij->barray = barray; 292 } 293 294 if (roworiented) { 295 stepval = (n-1)*bs; 296 } else { 297 stepval = (m-1)*bs; 298 } 299 for (i=0; i<m; i++) { 300 if (im[i] < 0) continue; 301 #if defined(PETSC_USE_DEBUG) 302 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 303 #endif 304 if (im[i] >= rstart && im[i] < rend) { 305 row = im[i] - rstart; 306 for (j=0; j<n; j++) { 307 /* If NumCol = 1 then a copy is not required */ 308 if ((roworiented) && (n == 1)) { 309 barray = (MatScalar*)v + i*bs2; 310 } else if((!roworiented) && (m == 1)) { 311 barray = (MatScalar*)v + j*bs2; 312 } else { /* Here a copy is required */ 313 if (roworiented) { 314 value = v + (i*(stepval+bs) + j)*bs; 315 } else { 316 value = v + (j*(stepval+bs) + i)*bs; 317 } 318 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 319 for (jj=0; jj<bs; jj++) { 320 barray[jj] = value[jj]; 321 } 322 barray += bs; 323 } 324 barray -= bs2; 325 } 326 327 if (in[j] >= cstart && in[j] < cend){ 328 col = in[j] - cstart; 329 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 330 } 331 else if (in[j] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 334 #endif 335 else { 336 if (mat->was_assembled) { 337 if (!baij->colmap) { 338 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 339 } 340 341 #if defined(PETSC_USE_DEBUG) 342 #if defined (PETSC_USE_CTABLE) 343 { PetscInt data; 344 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 345 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 346 } 347 #else 348 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 349 #endif 350 #endif 351 #if defined (PETSC_USE_CTABLE) 352 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 353 col = (col - 1)/bs; 354 #else 355 col = (baij->colmap[in[j]] - 1)/bs; 356 #endif 357 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 358 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 359 col = in[j]; 360 } 361 } 362 else col = in[j]; 363 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 364 } 365 } 366 } else { 367 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 368 if (!baij->donotstash) { 369 if (roworiented) { 370 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 371 } else { 372 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 373 } 374 } 375 } 376 } 377 PetscFunctionReturn(0); 378 } 379 380 #define HASH_KEY 0.6180339887 381 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 382 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 383 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 384 #undef __FUNCT__ 385 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 386 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 387 { 388 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 389 PetscTruth roworiented = baij->roworiented; 390 PetscErrorCode ierr; 391 PetscInt i,j,row,col; 392 PetscInt rstart_orig=mat->rmap->rstart; 393 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 394 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 395 PetscReal tmp; 396 MatScalar **HD = baij->hd,value; 397 #if defined(PETSC_USE_DEBUG) 398 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 399 #endif 400 401 PetscFunctionBegin; 402 if (v) PetscValidScalarPointer(v,6); 403 for (i=0; i<m; i++) { 404 #if defined(PETSC_USE_DEBUG) 405 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 406 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 407 #endif 408 row = im[i]; 409 if (row >= rstart_orig && row < rend_orig) { 410 for (j=0; j<n; j++) { 411 col = in[j]; 412 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 413 /* Look up PetscInto the Hash Table */ 414 key = (row/bs)*Nbs+(col/bs)+1; 415 h1 = HASH(size,key,tmp); 416 417 418 idx = h1; 419 #if defined(PETSC_USE_DEBUG) 420 insert_ct++; 421 total_ct++; 422 if (HT[idx] != key) { 423 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 424 if (idx == size) { 425 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 426 if (idx == h1) { 427 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 428 } 429 } 430 } 431 #else 432 if (HT[idx] != key) { 433 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 434 if (idx == size) { 435 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 436 if (idx == h1) { 437 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 438 } 439 } 440 } 441 #endif 442 /* A HASH table entry is found, so insert the values at the correct address */ 443 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 444 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 445 } 446 } else { 447 if (!baij->donotstash) { 448 if (roworiented) { 449 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 450 } else { 451 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 452 } 453 } 454 } 455 } 456 #if defined(PETSC_USE_DEBUG) 457 baij->ht_total_ct = total_ct; 458 baij->ht_insert_ct = insert_ct; 459 #endif 460 PetscFunctionReturn(0); 461 } 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 465 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 466 { 467 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 468 PetscTruth roworiented = baij->roworiented; 469 PetscErrorCode ierr; 470 PetscInt i,j,ii,jj,row,col; 471 PetscInt rstart=baij->rstartbs; 472 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 473 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 474 PetscReal tmp; 475 MatScalar **HD = baij->hd,*baij_a; 476 const PetscScalar *v_t,*value; 477 #if defined(PETSC_USE_DEBUG) 478 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 479 #endif 480 481 PetscFunctionBegin; 482 483 if (roworiented) { 484 stepval = (n-1)*bs; 485 } else { 486 stepval = (m-1)*bs; 487 } 488 for (i=0; i<m; i++) { 489 #if defined(PETSC_USE_DEBUG) 490 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 491 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 492 #endif 493 row = im[i]; 494 v_t = v + i*nbs2; 495 if (row >= rstart && row < rend) { 496 for (j=0; j<n; j++) { 497 col = in[j]; 498 499 /* Look up into the Hash Table */ 500 key = row*Nbs+col+1; 501 h1 = HASH(size,key,tmp); 502 503 idx = h1; 504 #if defined(PETSC_USE_DEBUG) 505 total_ct++; 506 insert_ct++; 507 if (HT[idx] != key) { 508 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 509 if (idx == size) { 510 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 511 if (idx == h1) { 512 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 513 } 514 } 515 } 516 #else 517 if (HT[idx] != key) { 518 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 519 if (idx == size) { 520 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 521 if (idx == h1) { 522 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 523 } 524 } 525 } 526 #endif 527 baij_a = HD[idx]; 528 if (roworiented) { 529 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 530 /* value = v + (i*(stepval+bs)+j)*bs; */ 531 value = v_t; 532 v_t += bs; 533 if (addv == ADD_VALUES) { 534 for (ii=0; ii<bs; ii++,value+=stepval) { 535 for (jj=ii; jj<bs2; jj+=bs) { 536 baij_a[jj] += *value++; 537 } 538 } 539 } else { 540 for (ii=0; ii<bs; ii++,value+=stepval) { 541 for (jj=ii; jj<bs2; jj+=bs) { 542 baij_a[jj] = *value++; 543 } 544 } 545 } 546 } else { 547 value = v + j*(stepval+bs)*bs + i*bs; 548 if (addv == ADD_VALUES) { 549 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 550 for (jj=0; jj<bs; jj++) { 551 baij_a[jj] += *value++; 552 } 553 } 554 } else { 555 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 556 for (jj=0; jj<bs; jj++) { 557 baij_a[jj] = *value++; 558 } 559 } 560 } 561 } 562 } 563 } else { 564 if (!baij->donotstash) { 565 if (roworiented) { 566 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 567 } else { 568 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 569 } 570 } 571 } 572 } 573 #if defined(PETSC_USE_DEBUG) 574 baij->ht_total_ct = total_ct; 575 baij->ht_insert_ct = insert_ct; 576 #endif 577 PetscFunctionReturn(0); 578 } 579 580 #undef __FUNCT__ 581 #define __FUNCT__ "MatGetValues_MPIBAIJ" 582 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 583 { 584 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 585 PetscErrorCode ierr; 586 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 587 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 588 589 PetscFunctionBegin; 590 for (i=0; i<m; i++) { 591 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 592 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 593 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 594 row = idxm[i] - bsrstart; 595 for (j=0; j<n; j++) { 596 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 597 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 598 if (idxn[j] >= bscstart && idxn[j] < bscend){ 599 col = idxn[j] - bscstart; 600 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 601 } else { 602 if (!baij->colmap) { 603 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 604 } 605 #if defined (PETSC_USE_CTABLE) 606 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 607 data --; 608 #else 609 data = baij->colmap[idxn[j]/bs]-1; 610 #endif 611 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 612 else { 613 col = data + idxn[j]%bs; 614 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 615 } 616 } 617 } 618 } else { 619 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 620 } 621 } 622 PetscFunctionReturn(0); 623 } 624 625 #undef __FUNCT__ 626 #define __FUNCT__ "MatNorm_MPIBAIJ" 627 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 628 { 629 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 630 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 631 PetscErrorCode ierr; 632 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 633 PetscReal sum = 0.0; 634 MatScalar *v; 635 636 PetscFunctionBegin; 637 if (baij->size == 1) { 638 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 639 } else { 640 if (type == NORM_FROBENIUS) { 641 v = amat->a; 642 nz = amat->nz*bs2; 643 for (i=0; i<nz; i++) { 644 #if defined(PETSC_USE_COMPLEX) 645 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 646 #else 647 sum += (*v)*(*v); v++; 648 #endif 649 } 650 v = bmat->a; 651 nz = bmat->nz*bs2; 652 for (i=0; i<nz; i++) { 653 #if defined(PETSC_USE_COMPLEX) 654 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 655 #else 656 sum += (*v)*(*v); v++; 657 #endif 658 } 659 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 660 *nrm = sqrt(*nrm); 661 } else if (type == NORM_1) { /* max column sum */ 662 PetscReal *tmp,*tmp2; 663 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 664 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 665 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 666 v = amat->a; jj = amat->j; 667 for (i=0; i<amat->nz; i++) { 668 for (j=0; j<bs; j++){ 669 col = bs*(cstart + *jj) + j; /* column index */ 670 for (row=0; row<bs; row++){ 671 tmp[col] += PetscAbsScalar(*v); v++; 672 } 673 } 674 jj++; 675 } 676 v = bmat->a; jj = bmat->j; 677 for (i=0; i<bmat->nz; i++) { 678 for (j=0; j<bs; j++){ 679 col = bs*garray[*jj] + j; 680 for (row=0; row<bs; row++){ 681 tmp[col] += PetscAbsScalar(*v); v++; 682 } 683 } 684 jj++; 685 } 686 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 687 *nrm = 0.0; 688 for (j=0; j<mat->cmap->N; j++) { 689 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 690 } 691 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 692 } else if (type == NORM_INFINITY) { /* max row sum */ 693 PetscReal *sums; 694 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 695 sum = 0.0; 696 for (j=0; j<amat->mbs; j++) { 697 for (row=0; row<bs; row++) sums[row] = 0.0; 698 v = amat->a + bs2*amat->i[j]; 699 nz = amat->i[j+1]-amat->i[j]; 700 for (i=0; i<nz; i++) { 701 for (col=0; col<bs; col++){ 702 for (row=0; row<bs; row++){ 703 sums[row] += PetscAbsScalar(*v); v++; 704 } 705 } 706 } 707 v = bmat->a + bs2*bmat->i[j]; 708 nz = bmat->i[j+1]-bmat->i[j]; 709 for (i=0; i<nz; i++) { 710 for (col=0; col<bs; col++){ 711 for (row=0; row<bs; row++){ 712 sums[row] += PetscAbsScalar(*v); v++; 713 } 714 } 715 } 716 for (row=0; row<bs; row++){ 717 if (sums[row] > sum) sum = sums[row]; 718 } 719 } 720 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 721 ierr = PetscFree(sums);CHKERRQ(ierr); 722 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 723 } 724 PetscFunctionReturn(0); 725 } 726 727 /* 728 Creates the hash table, and sets the table 729 This table is created only once. 730 If new entried need to be added to the matrix 731 then the hash table has to be destroyed and 732 recreated. 733 */ 734 #undef __FUNCT__ 735 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 736 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 737 { 738 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 739 Mat A = baij->A,B=baij->B; 740 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 741 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 742 PetscErrorCode ierr; 743 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 744 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 745 PetscInt *HT,key; 746 MatScalar **HD; 747 PetscReal tmp; 748 #if defined(PETSC_USE_INFO) 749 PetscInt ct=0,max=0; 750 #endif 751 752 PetscFunctionBegin; 753 if (baij->ht) PetscFunctionReturn(0); 754 755 baij->ht_size = (PetscInt)(factor*nz); 756 ht_size = baij->ht_size; 757 758 /* Allocate Memory for Hash Table */ 759 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 760 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 761 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 762 HD = baij->hd; 763 HT = baij->ht; 764 765 /* Loop Over A */ 766 for (i=0; i<a->mbs; i++) { 767 for (j=ai[i]; j<ai[i+1]; j++) { 768 row = i+rstart; 769 col = aj[j]+cstart; 770 771 key = row*Nbs + col + 1; 772 h1 = HASH(ht_size,key,tmp); 773 for (k=0; k<ht_size; k++){ 774 if (!HT[(h1+k)%ht_size]) { 775 HT[(h1+k)%ht_size] = key; 776 HD[(h1+k)%ht_size] = a->a + j*bs2; 777 break; 778 #if defined(PETSC_USE_INFO) 779 } else { 780 ct++; 781 #endif 782 } 783 } 784 #if defined(PETSC_USE_INFO) 785 if (k> max) max = k; 786 #endif 787 } 788 } 789 /* Loop Over B */ 790 for (i=0; i<b->mbs; i++) { 791 for (j=bi[i]; j<bi[i+1]; j++) { 792 row = i+rstart; 793 col = garray[bj[j]]; 794 key = row*Nbs + col + 1; 795 h1 = HASH(ht_size,key,tmp); 796 for (k=0; k<ht_size; k++){ 797 if (!HT[(h1+k)%ht_size]) { 798 HT[(h1+k)%ht_size] = key; 799 HD[(h1+k)%ht_size] = b->a + j*bs2; 800 break; 801 #if defined(PETSC_USE_INFO) 802 } else { 803 ct++; 804 #endif 805 } 806 } 807 #if defined(PETSC_USE_INFO) 808 if (k> max) max = k; 809 #endif 810 } 811 } 812 813 /* Print Summary */ 814 #if defined(PETSC_USE_INFO) 815 for (i=0,j=0; i<ht_size; i++) { 816 if (HT[i]) {j++;} 817 } 818 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 819 #endif 820 PetscFunctionReturn(0); 821 } 822 823 #undef __FUNCT__ 824 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 825 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 826 { 827 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 828 PetscErrorCode ierr; 829 PetscInt nstash,reallocs; 830 InsertMode addv; 831 832 PetscFunctionBegin; 833 if (baij->donotstash || mat->nooffprocentries) { 834 PetscFunctionReturn(0); 835 } 836 837 /* make sure all processors are either in INSERTMODE or ADDMODE */ 838 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 839 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 840 mat->insertmode = addv; /* in case this processor had no cache */ 841 842 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 843 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 844 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 845 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 846 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 847 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 848 PetscFunctionReturn(0); 849 } 850 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 853 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 854 { 855 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 856 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 857 PetscErrorCode ierr; 858 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 859 PetscInt *row,*col; 860 PetscTruth r1,r2,r3,other_disassembled; 861 MatScalar *val; 862 InsertMode addv = mat->insertmode; 863 PetscMPIInt n; 864 865 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 866 PetscFunctionBegin; 867 if (!baij->donotstash && !mat->nooffprocentries) { 868 while (1) { 869 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 870 if (!flg) break; 871 872 for (i=0; i<n;) { 873 /* Now identify the consecutive vals belonging to the same row */ 874 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 875 if (j < n) ncols = j-i; 876 else ncols = n-i; 877 /* Now assemble all these values with a single function call */ 878 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 879 i = j; 880 } 881 } 882 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 883 /* Now process the block-stash. Since the values are stashed column-oriented, 884 set the roworiented flag to column oriented, and after MatSetValues() 885 restore the original flags */ 886 r1 = baij->roworiented; 887 r2 = a->roworiented; 888 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 889 baij->roworiented = PETSC_FALSE; 890 a->roworiented = PETSC_FALSE; 891 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 892 while (1) { 893 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 894 if (!flg) break; 895 896 for (i=0; i<n;) { 897 /* Now identify the consecutive vals belonging to the same row */ 898 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 899 if (j < n) ncols = j-i; 900 else ncols = n-i; 901 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 902 i = j; 903 } 904 } 905 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 906 baij->roworiented = r1; 907 a->roworiented = r2; 908 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 909 } 910 911 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 912 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 913 914 /* determine if any processor has disassembled, if so we must 915 also disassemble ourselfs, in order that we may reassemble. */ 916 /* 917 if nonzero structure of submatrix B cannot change then we know that 918 no processor disassembled thus we can skip this stuff 919 */ 920 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 921 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 922 if (mat->was_assembled && !other_disassembled) { 923 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 924 } 925 } 926 927 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 928 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 929 } 930 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 931 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 932 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 933 934 #if defined(PETSC_USE_INFO) 935 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 936 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 937 baij->ht_total_ct = 0; 938 baij->ht_insert_ct = 0; 939 } 940 #endif 941 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 942 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 943 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 944 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 945 } 946 947 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 948 baij->rowvalues = 0; 949 PetscFunctionReturn(0); 950 } 951 952 #undef __FUNCT__ 953 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 954 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 955 { 956 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 957 PetscErrorCode ierr; 958 PetscMPIInt size = baij->size,rank = baij->rank; 959 PetscInt bs = mat->rmap->bs; 960 PetscTruth iascii,isdraw; 961 PetscViewer sviewer; 962 PetscViewerFormat format; 963 964 PetscFunctionBegin; 965 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 966 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 967 if (iascii) { 968 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 969 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 970 MatInfo info; 971 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 972 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 973 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 974 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 975 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 976 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 977 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 978 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 979 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 980 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 981 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 982 PetscFunctionReturn(0); 983 } else if (format == PETSC_VIEWER_ASCII_INFO) { 984 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 985 PetscFunctionReturn(0); 986 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 987 PetscFunctionReturn(0); 988 } 989 } 990 991 if (isdraw) { 992 PetscDraw draw; 993 PetscTruth isnull; 994 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 995 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 996 } 997 998 if (size == 1) { 999 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1000 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1001 } else { 1002 /* assemble the entire matrix onto first processor. */ 1003 Mat A; 1004 Mat_SeqBAIJ *Aloc; 1005 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1006 MatScalar *a; 1007 1008 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1009 /* Perhaps this should be the type of mat? */ 1010 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1011 if (!rank) { 1012 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1013 } else { 1014 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1015 } 1016 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1017 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1018 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1019 1020 /* copy over the A part */ 1021 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1022 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1023 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1024 1025 for (i=0; i<mbs; i++) { 1026 rvals[0] = bs*(baij->rstartbs + i); 1027 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1028 for (j=ai[i]; j<ai[i+1]; j++) { 1029 col = (baij->cstartbs+aj[j])*bs; 1030 for (k=0; k<bs; k++) { 1031 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1032 col++; a += bs; 1033 } 1034 } 1035 } 1036 /* copy over the B part */ 1037 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1038 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1039 for (i=0; i<mbs; i++) { 1040 rvals[0] = bs*(baij->rstartbs + i); 1041 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1042 for (j=ai[i]; j<ai[i+1]; j++) { 1043 col = baij->garray[aj[j]]*bs; 1044 for (k=0; k<bs; k++) { 1045 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1046 col++; a += bs; 1047 } 1048 } 1049 } 1050 ierr = PetscFree(rvals);CHKERRQ(ierr); 1051 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1052 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1053 /* 1054 Everyone has to call to draw the matrix since the graphics waits are 1055 synchronized across all processors that share the PetscDraw object 1056 */ 1057 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1058 if (!rank) { 1059 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1060 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1061 } 1062 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1063 ierr = MatDestroy(A);CHKERRQ(ierr); 1064 } 1065 PetscFunctionReturn(0); 1066 } 1067 1068 #undef __FUNCT__ 1069 #define __FUNCT__ "MatView_MPIBAIJ" 1070 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1071 { 1072 PetscErrorCode ierr; 1073 PetscTruth iascii,isdraw,issocket,isbinary; 1074 1075 PetscFunctionBegin; 1076 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1077 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1078 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1079 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1080 if (iascii || isdraw || issocket || isbinary) { 1081 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1082 } else { 1083 SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1084 } 1085 PetscFunctionReturn(0); 1086 } 1087 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1090 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1091 { 1092 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1093 PetscErrorCode ierr; 1094 1095 PetscFunctionBegin; 1096 #if defined(PETSC_USE_LOG) 1097 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1098 #endif 1099 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1100 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1101 if (baij->A){ierr = MatDestroy(baij->A);CHKERRQ(ierr);} 1102 if (baij->B){ierr = MatDestroy(baij->B);CHKERRQ(ierr);} 1103 #if defined (PETSC_USE_CTABLE) 1104 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 1105 #else 1106 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1107 #endif 1108 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1109 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1110 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1111 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1112 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1113 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1114 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1115 ierr = PetscFree(baij);CHKERRQ(ierr); 1116 1117 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1118 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1119 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1120 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1121 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1122 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1123 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1124 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1125 PetscFunctionReturn(0); 1126 } 1127 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "MatMult_MPIBAIJ" 1130 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1131 { 1132 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1133 PetscErrorCode ierr; 1134 PetscInt nt; 1135 1136 PetscFunctionBegin; 1137 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1138 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1139 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1140 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1141 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1142 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1143 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1144 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1145 PetscFunctionReturn(0); 1146 } 1147 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1150 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1151 { 1152 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1153 PetscErrorCode ierr; 1154 1155 PetscFunctionBegin; 1156 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1157 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1158 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1159 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1160 PetscFunctionReturn(0); 1161 } 1162 1163 #undef __FUNCT__ 1164 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1165 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1166 { 1167 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1168 PetscErrorCode ierr; 1169 PetscTruth merged; 1170 1171 PetscFunctionBegin; 1172 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1173 /* do nondiagonal part */ 1174 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1175 if (!merged) { 1176 /* send it on its way */ 1177 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1178 /* do local part */ 1179 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1180 /* receive remote parts: note this assumes the values are not actually */ 1181 /* inserted in yy until the next line */ 1182 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1183 } else { 1184 /* do local part */ 1185 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1186 /* send it on its way */ 1187 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1188 /* values actually were received in the Begin() but we need to call this nop */ 1189 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1190 } 1191 PetscFunctionReturn(0); 1192 } 1193 1194 #undef __FUNCT__ 1195 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1196 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1197 { 1198 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1199 PetscErrorCode ierr; 1200 1201 PetscFunctionBegin; 1202 /* do nondiagonal part */ 1203 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1204 /* send it on its way */ 1205 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1206 /* do local part */ 1207 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1208 /* receive remote parts: note this assumes the values are not actually */ 1209 /* inserted in yy until the next line, which is true for my implementation*/ 1210 /* but is not perhaps always true. */ 1211 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1212 PetscFunctionReturn(0); 1213 } 1214 1215 /* 1216 This only works correctly for square matrices where the subblock A->A is the 1217 diagonal block 1218 */ 1219 #undef __FUNCT__ 1220 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1221 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1222 { 1223 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1224 PetscErrorCode ierr; 1225 1226 PetscFunctionBegin; 1227 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1228 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1229 PetscFunctionReturn(0); 1230 } 1231 1232 #undef __FUNCT__ 1233 #define __FUNCT__ "MatScale_MPIBAIJ" 1234 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1235 { 1236 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1237 PetscErrorCode ierr; 1238 1239 PetscFunctionBegin; 1240 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1241 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1242 PetscFunctionReturn(0); 1243 } 1244 1245 #undef __FUNCT__ 1246 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1247 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1248 { 1249 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1250 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1251 PetscErrorCode ierr; 1252 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1253 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1254 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1255 1256 PetscFunctionBegin; 1257 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1258 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1259 mat->getrowactive = PETSC_TRUE; 1260 1261 if (!mat->rowvalues && (idx || v)) { 1262 /* 1263 allocate enough space to hold information from the longest row. 1264 */ 1265 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1266 PetscInt max = 1,mbs = mat->mbs,tmp; 1267 for (i=0; i<mbs; i++) { 1268 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1269 if (max < tmp) { max = tmp; } 1270 } 1271 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1272 } 1273 lrow = row - brstart; 1274 1275 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1276 if (!v) {pvA = 0; pvB = 0;} 1277 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1278 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1279 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1280 nztot = nzA + nzB; 1281 1282 cmap = mat->garray; 1283 if (v || idx) { 1284 if (nztot) { 1285 /* Sort by increasing column numbers, assuming A and B already sorted */ 1286 PetscInt imark = -1; 1287 if (v) { 1288 *v = v_p = mat->rowvalues; 1289 for (i=0; i<nzB; i++) { 1290 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1291 else break; 1292 } 1293 imark = i; 1294 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1295 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1296 } 1297 if (idx) { 1298 *idx = idx_p = mat->rowindices; 1299 if (imark > -1) { 1300 for (i=0; i<imark; i++) { 1301 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1302 } 1303 } else { 1304 for (i=0; i<nzB; i++) { 1305 if (cmap[cworkB[i]/bs] < cstart) 1306 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1307 else break; 1308 } 1309 imark = i; 1310 } 1311 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1312 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1313 } 1314 } else { 1315 if (idx) *idx = 0; 1316 if (v) *v = 0; 1317 } 1318 } 1319 *nz = nztot; 1320 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1321 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1322 PetscFunctionReturn(0); 1323 } 1324 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1327 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1328 { 1329 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1330 1331 PetscFunctionBegin; 1332 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1333 baij->getrowactive = PETSC_FALSE; 1334 PetscFunctionReturn(0); 1335 } 1336 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1339 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1340 { 1341 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1342 PetscErrorCode ierr; 1343 1344 PetscFunctionBegin; 1345 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1346 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1347 PetscFunctionReturn(0); 1348 } 1349 1350 #undef __FUNCT__ 1351 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1352 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1353 { 1354 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1355 Mat A = a->A,B = a->B; 1356 PetscErrorCode ierr; 1357 PetscReal isend[5],irecv[5]; 1358 1359 PetscFunctionBegin; 1360 info->block_size = (PetscReal)matin->rmap->bs; 1361 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1362 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1363 isend[3] = info->memory; isend[4] = info->mallocs; 1364 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1365 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1366 isend[3] += info->memory; isend[4] += info->mallocs; 1367 if (flag == MAT_LOCAL) { 1368 info->nz_used = isend[0]; 1369 info->nz_allocated = isend[1]; 1370 info->nz_unneeded = isend[2]; 1371 info->memory = isend[3]; 1372 info->mallocs = isend[4]; 1373 } else if (flag == MAT_GLOBAL_MAX) { 1374 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1375 info->nz_used = irecv[0]; 1376 info->nz_allocated = irecv[1]; 1377 info->nz_unneeded = irecv[2]; 1378 info->memory = irecv[3]; 1379 info->mallocs = irecv[4]; 1380 } else if (flag == MAT_GLOBAL_SUM) { 1381 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1382 info->nz_used = irecv[0]; 1383 info->nz_allocated = irecv[1]; 1384 info->nz_unneeded = irecv[2]; 1385 info->memory = irecv[3]; 1386 info->mallocs = irecv[4]; 1387 } else { 1388 SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1389 } 1390 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1391 info->fill_ratio_needed = 0; 1392 info->factor_mallocs = 0; 1393 PetscFunctionReturn(0); 1394 } 1395 1396 #undef __FUNCT__ 1397 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1398 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg) 1399 { 1400 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1401 PetscErrorCode ierr; 1402 1403 PetscFunctionBegin; 1404 switch (op) { 1405 case MAT_NEW_NONZERO_LOCATIONS: 1406 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1407 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1408 case MAT_KEEP_NONZERO_PATTERN: 1409 case MAT_NEW_NONZERO_LOCATION_ERR: 1410 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1411 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1412 break; 1413 case MAT_ROW_ORIENTED: 1414 a->roworiented = flg; 1415 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1416 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1417 break; 1418 case MAT_NEW_DIAGONALS: 1419 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1420 break; 1421 case MAT_IGNORE_OFF_PROC_ENTRIES: 1422 a->donotstash = flg; 1423 break; 1424 case MAT_USE_HASH_TABLE: 1425 a->ht_flag = flg; 1426 break; 1427 case MAT_SYMMETRIC: 1428 case MAT_STRUCTURALLY_SYMMETRIC: 1429 case MAT_HERMITIAN: 1430 case MAT_SYMMETRY_ETERNAL: 1431 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1432 break; 1433 default: 1434 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op); 1435 } 1436 PetscFunctionReturn(0); 1437 } 1438 1439 #undef __FUNCT__ 1440 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1441 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1442 { 1443 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1444 Mat_SeqBAIJ *Aloc; 1445 Mat B; 1446 PetscErrorCode ierr; 1447 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1448 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1449 MatScalar *a; 1450 1451 PetscFunctionBegin; 1452 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1453 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1454 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1455 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1456 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1457 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1458 } else { 1459 B = *matout; 1460 } 1461 1462 /* copy over the A part */ 1463 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1464 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1465 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1466 1467 for (i=0; i<mbs; i++) { 1468 rvals[0] = bs*(baij->rstartbs + i); 1469 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1470 for (j=ai[i]; j<ai[i+1]; j++) { 1471 col = (baij->cstartbs+aj[j])*bs; 1472 for (k=0; k<bs; k++) { 1473 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1474 col++; a += bs; 1475 } 1476 } 1477 } 1478 /* copy over the B part */ 1479 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1480 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1481 for (i=0; i<mbs; i++) { 1482 rvals[0] = bs*(baij->rstartbs + i); 1483 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1484 for (j=ai[i]; j<ai[i+1]; j++) { 1485 col = baij->garray[aj[j]]*bs; 1486 for (k=0; k<bs; k++) { 1487 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1488 col++; a += bs; 1489 } 1490 } 1491 } 1492 ierr = PetscFree(rvals);CHKERRQ(ierr); 1493 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1494 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1495 1496 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1497 *matout = B; 1498 } else { 1499 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1500 } 1501 PetscFunctionReturn(0); 1502 } 1503 1504 #undef __FUNCT__ 1505 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1506 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1507 { 1508 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1509 Mat a = baij->A,b = baij->B; 1510 PetscErrorCode ierr; 1511 PetscInt s1,s2,s3; 1512 1513 PetscFunctionBegin; 1514 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1515 if (rr) { 1516 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1517 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1518 /* Overlap communication with computation. */ 1519 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1520 } 1521 if (ll) { 1522 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1523 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1524 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1525 } 1526 /* scale the diagonal block */ 1527 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1528 1529 if (rr) { 1530 /* Do a scatter end and then right scale the off-diagonal block */ 1531 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1532 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1533 } 1534 1535 PetscFunctionReturn(0); 1536 } 1537 1538 #undef __FUNCT__ 1539 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1540 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 1541 { 1542 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1543 PetscErrorCode ierr; 1544 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1545 PetscInt i,*owners = A->rmap->range; 1546 PetscInt *nprocs,j,idx,nsends,row; 1547 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1548 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1549 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1550 MPI_Comm comm = ((PetscObject)A)->comm; 1551 MPI_Request *send_waits,*recv_waits; 1552 MPI_Status recv_status,*send_status; 1553 #if defined(PETSC_DEBUG) 1554 PetscTruth found = PETSC_FALSE; 1555 #endif 1556 1557 PetscFunctionBegin; 1558 /* first count number of contributors to each processor */ 1559 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1560 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1561 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1562 j = 0; 1563 for (i=0; i<N; i++) { 1564 if (lastidx > (idx = rows[i])) j = 0; 1565 lastidx = idx; 1566 for (; j<size; j++) { 1567 if (idx >= owners[j] && idx < owners[j+1]) { 1568 nprocs[2*j]++; 1569 nprocs[2*j+1] = 1; 1570 owner[i] = j; 1571 #if defined(PETSC_DEBUG) 1572 found = PETSC_TRUE; 1573 #endif 1574 break; 1575 } 1576 } 1577 #if defined(PETSC_DEBUG) 1578 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1579 found = PETSC_FALSE; 1580 #endif 1581 } 1582 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1583 1584 if (A->nooffproczerorows) { 1585 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 1586 nrecvs = nsends; 1587 nmax = N; 1588 } else { 1589 /* inform other processors of number of messages and max length*/ 1590 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1591 } 1592 1593 /* post receives: */ 1594 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1595 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1596 for (i=0; i<nrecvs; i++) { 1597 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1598 } 1599 1600 /* do sends: 1601 1) starts[i] gives the starting index in svalues for stuff going to 1602 the ith processor 1603 */ 1604 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1605 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1606 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1607 starts[0] = 0; 1608 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1609 for (i=0; i<N; i++) { 1610 svalues[starts[owner[i]]++] = rows[i]; 1611 } 1612 1613 starts[0] = 0; 1614 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1615 count = 0; 1616 for (i=0; i<size; i++) { 1617 if (nprocs[2*i+1]) { 1618 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1619 } 1620 } 1621 ierr = PetscFree(starts);CHKERRQ(ierr); 1622 1623 base = owners[rank]; 1624 1625 /* wait on receives */ 1626 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1627 count = nrecvs; 1628 slen = 0; 1629 while (count) { 1630 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1631 /* unpack receives into our local space */ 1632 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1633 source[imdex] = recv_status.MPI_SOURCE; 1634 lens[imdex] = n; 1635 slen += n; 1636 count--; 1637 } 1638 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1639 1640 /* move the data into the send scatter */ 1641 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1642 count = 0; 1643 for (i=0; i<nrecvs; i++) { 1644 values = rvalues + i*nmax; 1645 for (j=0; j<lens[i]; j++) { 1646 lrows[count++] = values[j] - base; 1647 } 1648 } 1649 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1650 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1651 ierr = PetscFree(owner);CHKERRQ(ierr); 1652 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1653 1654 /* actually zap the local rows */ 1655 /* 1656 Zero the required rows. If the "diagonal block" of the matrix 1657 is square and the user wishes to set the diagonal we use separate 1658 code so that MatSetValues() is not called for each diagonal allocating 1659 new memory, thus calling lots of mallocs and slowing things down. 1660 1661 */ 1662 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1663 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1664 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1665 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1666 } else if (diag != 0.0) { 1667 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1668 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1669 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1670 for (i=0; i<slen; i++) { 1671 row = lrows[i] + rstart_bs; 1672 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1673 } 1674 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1675 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1676 } else { 1677 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1678 } 1679 1680 ierr = PetscFree(lrows);CHKERRQ(ierr); 1681 1682 /* wait on sends */ 1683 if (nsends) { 1684 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1685 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1686 ierr = PetscFree(send_status);CHKERRQ(ierr); 1687 } 1688 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1689 ierr = PetscFree(svalues);CHKERRQ(ierr); 1690 1691 PetscFunctionReturn(0); 1692 } 1693 1694 #undef __FUNCT__ 1695 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1696 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1697 { 1698 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1699 PetscErrorCode ierr; 1700 1701 PetscFunctionBegin; 1702 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1703 PetscFunctionReturn(0); 1704 } 1705 1706 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1707 1708 #undef __FUNCT__ 1709 #define __FUNCT__ "MatEqual_MPIBAIJ" 1710 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1711 { 1712 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1713 Mat a,b,c,d; 1714 PetscTruth flg; 1715 PetscErrorCode ierr; 1716 1717 PetscFunctionBegin; 1718 a = matA->A; b = matA->B; 1719 c = matB->A; d = matB->B; 1720 1721 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1722 if (flg) { 1723 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1724 } 1725 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1726 PetscFunctionReturn(0); 1727 } 1728 1729 #undef __FUNCT__ 1730 #define __FUNCT__ "MatCopy_MPIBAIJ" 1731 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1732 { 1733 PetscErrorCode ierr; 1734 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1735 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1736 1737 PetscFunctionBegin; 1738 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1739 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1740 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1741 } else { 1742 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1743 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1744 } 1745 PetscFunctionReturn(0); 1746 } 1747 1748 #undef __FUNCT__ 1749 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1750 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1751 { 1752 PetscErrorCode ierr; 1753 1754 PetscFunctionBegin; 1755 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1756 PetscFunctionReturn(0); 1757 } 1758 1759 #undef __FUNCT__ 1760 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1761 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1762 { 1763 PetscErrorCode ierr; 1764 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1765 PetscBLASInt bnz,one=1; 1766 Mat_SeqBAIJ *x,*y; 1767 1768 PetscFunctionBegin; 1769 if (str == SAME_NONZERO_PATTERN) { 1770 PetscScalar alpha = a; 1771 x = (Mat_SeqBAIJ *)xx->A->data; 1772 y = (Mat_SeqBAIJ *)yy->A->data; 1773 bnz = PetscBLASIntCast(x->nz); 1774 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1775 x = (Mat_SeqBAIJ *)xx->B->data; 1776 y = (Mat_SeqBAIJ *)yy->B->data; 1777 bnz = PetscBLASIntCast(x->nz); 1778 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1779 } else { 1780 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1781 } 1782 PetscFunctionReturn(0); 1783 } 1784 1785 #undef __FUNCT__ 1786 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ" 1787 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs) 1788 { 1789 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1790 PetscInt rbs,cbs; 1791 PetscErrorCode ierr; 1792 1793 PetscFunctionBegin; 1794 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1795 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1796 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 1797 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 1798 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 1799 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 1800 PetscFunctionReturn(0); 1801 } 1802 1803 #undef __FUNCT__ 1804 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1805 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1806 { 1807 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1808 PetscErrorCode ierr; 1809 1810 PetscFunctionBegin; 1811 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1812 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1813 PetscFunctionReturn(0); 1814 } 1815 1816 #undef __FUNCT__ 1817 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1818 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1819 { 1820 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1821 PetscErrorCode ierr; 1822 1823 PetscFunctionBegin; 1824 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1825 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1826 PetscFunctionReturn(0); 1827 } 1828 1829 #undef __FUNCT__ 1830 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 1831 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1832 { 1833 PetscErrorCode ierr; 1834 IS iscol_local; 1835 PetscInt csize; 1836 1837 PetscFunctionBegin; 1838 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1839 if (call == MAT_REUSE_MATRIX) { 1840 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1841 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1842 } else { 1843 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1844 } 1845 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1846 if (call == MAT_INITIAL_MATRIX) { 1847 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1848 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 1849 } 1850 PetscFunctionReturn(0); 1851 } 1852 1853 #undef __FUNCT__ 1854 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 1855 /* 1856 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1857 in local and then by concatenating the local matrices the end result. 1858 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 1859 */ 1860 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 1861 { 1862 PetscErrorCode ierr; 1863 PetscMPIInt rank,size; 1864 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 1865 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 1866 Mat *local,M,Mreuse; 1867 MatScalar *vwork,*aa; 1868 MPI_Comm comm = ((PetscObject)mat)->comm; 1869 Mat_SeqBAIJ *aij; 1870 1871 1872 PetscFunctionBegin; 1873 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1874 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1875 1876 if (call == MAT_REUSE_MATRIX) { 1877 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1878 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1879 local = &Mreuse; 1880 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1881 } else { 1882 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1883 Mreuse = *local; 1884 ierr = PetscFree(local);CHKERRQ(ierr); 1885 } 1886 1887 /* 1888 m - number of local rows 1889 n - number of columns (same on all processors) 1890 rstart - first row in new global matrix generated 1891 */ 1892 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 1893 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1894 m = m/bs; 1895 n = n/bs; 1896 1897 if (call == MAT_INITIAL_MATRIX) { 1898 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1899 ii = aij->i; 1900 jj = aij->j; 1901 1902 /* 1903 Determine the number of non-zeros in the diagonal and off-diagonal 1904 portions of the matrix in order to do correct preallocation 1905 */ 1906 1907 /* first get start and end of "diagonal" columns */ 1908 if (csize == PETSC_DECIDE) { 1909 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 1910 if (mglobal == n*bs) { /* square matrix */ 1911 nlocal = m; 1912 } else { 1913 nlocal = n/size + ((n % size) > rank); 1914 } 1915 } else { 1916 nlocal = csize/bs; 1917 } 1918 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1919 rstart = rend - nlocal; 1920 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 1921 1922 /* next, compute all the lengths */ 1923 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 1924 olens = dlens + m; 1925 for (i=0; i<m; i++) { 1926 jend = ii[i+1] - ii[i]; 1927 olen = 0; 1928 dlen = 0; 1929 for (j=0; j<jend; j++) { 1930 if (*jj < rstart || *jj >= rend) olen++; 1931 else dlen++; 1932 jj++; 1933 } 1934 olens[i] = olen; 1935 dlens[i] = dlen; 1936 } 1937 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 1938 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 1939 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 1940 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 1941 ierr = PetscFree(dlens);CHKERRQ(ierr); 1942 } else { 1943 PetscInt ml,nl; 1944 1945 M = *newmat; 1946 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1947 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1948 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1949 /* 1950 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1951 rather than the slower MatSetValues(). 1952 */ 1953 M->was_assembled = PETSC_TRUE; 1954 M->assembled = PETSC_FALSE; 1955 } 1956 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1957 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1958 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1959 ii = aij->i; 1960 jj = aij->j; 1961 aa = aij->a; 1962 for (i=0; i<m; i++) { 1963 row = rstart/bs + i; 1964 nz = ii[i+1] - ii[i]; 1965 cwork = jj; jj += nz; 1966 vwork = aa; aa += nz; 1967 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1968 } 1969 1970 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1971 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1972 *newmat = M; 1973 1974 /* save submatrix used in processor for next request */ 1975 if (call == MAT_INITIAL_MATRIX) { 1976 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 1977 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 1978 } 1979 1980 PetscFunctionReturn(0); 1981 } 1982 1983 #undef __FUNCT__ 1984 #define __FUNCT__ "MatPermute_MPIBAIJ" 1985 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 1986 { 1987 MPI_Comm comm,pcomm; 1988 PetscInt first,local_size,nrows; 1989 const PetscInt *rows; 1990 PetscMPIInt size; 1991 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1992 PetscErrorCode ierr; 1993 1994 PetscFunctionBegin; 1995 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1996 /* make a collective version of 'rowp' */ 1997 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1998 if (pcomm==comm) { 1999 crowp = rowp; 2000 } else { 2001 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2002 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2003 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 2004 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2005 } 2006 /* collect the global row permutation and invert it */ 2007 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2008 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2009 if (pcomm!=comm) { 2010 ierr = ISDestroy(crowp);CHKERRQ(ierr); 2011 } 2012 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2013 /* get the local target indices */ 2014 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2015 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2016 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2017 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 2018 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2019 ierr = ISDestroy(irowp);CHKERRQ(ierr); 2020 /* the column permutation is so much easier; 2021 make a local version of 'colp' and invert it */ 2022 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2023 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2024 if (size==1) { 2025 lcolp = colp; 2026 } else { 2027 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2028 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2029 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 2030 } 2031 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2032 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2033 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2034 if (size>1) { 2035 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2036 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 2037 } 2038 /* now we just get the submatrix */ 2039 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2040 /* clean up */ 2041 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 2042 ierr = ISDestroy(icolp);CHKERRQ(ierr); 2043 PetscFunctionReturn(0); 2044 } 2045 2046 #undef __FUNCT__ 2047 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2048 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2049 { 2050 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2051 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2052 2053 PetscFunctionBegin; 2054 if (nghosts) { *nghosts = B->nbs;} 2055 if (ghosts) {*ghosts = baij->garray;} 2056 PetscFunctionReturn(0); 2057 } 2058 2059 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat); 2060 2061 #undef __FUNCT__ 2062 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2063 /* 2064 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2065 */ 2066 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2067 { 2068 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2069 PetscErrorCode ierr; 2070 PetscMPIInt size,*ncolsonproc,*disp,nn; 2071 PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col; 2072 const PetscInt *is; 2073 PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj; 2074 PetscInt *rowhit,M,cstart,cend,colb; 2075 PetscInt *columnsforrow,l; 2076 IS *isa; 2077 PetscTruth done,flg; 2078 ISLocalToGlobalMapping map = mat->bmapping; 2079 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2080 2081 PetscFunctionBegin; 2082 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2083 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2084 2085 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2086 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2087 M = mat->rmap->n/bs; 2088 cstart = mat->cmap->rstart/bs; 2089 cend = mat->cmap->rend/bs; 2090 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2091 c->N = mat->cmap->N/bs; 2092 c->m = mat->rmap->n/bs; 2093 c->rstart = mat->rmap->rstart/bs; 2094 2095 c->ncolors = nis; 2096 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2097 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2098 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2099 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2100 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2101 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2102 2103 /* Allow access to data structures of local part of matrix */ 2104 if (!baij->colmap) { 2105 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2106 } 2107 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2108 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2109 2110 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2111 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2112 2113 for (i=0; i<nis; i++) { 2114 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2115 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2116 c->ncolumns[i] = n; 2117 if (n) { 2118 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2119 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2120 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2121 } else { 2122 c->columns[i] = 0; 2123 } 2124 2125 if (ctype == IS_COLORING_GLOBAL){ 2126 /* Determine the total (parallel) number of columns of this color */ 2127 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2128 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2129 2130 nn = PetscMPIIntCast(n); 2131 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2132 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2133 if (!nctot) { 2134 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2135 } 2136 2137 disp[0] = 0; 2138 for (j=1; j<size; j++) { 2139 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2140 } 2141 2142 /* Get complete list of columns for color on each processor */ 2143 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2144 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2145 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2146 } else if (ctype == IS_COLORING_GHOSTED){ 2147 /* Determine local number of columns of this color on this process, including ghost points */ 2148 nctot = n; 2149 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2150 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2151 } else { 2152 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2153 } 2154 2155 /* 2156 Mark all rows affect by these columns 2157 */ 2158 /* Temporary option to allow for debugging/testing */ 2159 flg = PETSC_FALSE; 2160 ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2161 if (!flg) {/*-----------------------------------------------------------------------------*/ 2162 /* crude, fast version */ 2163 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2164 /* loop over columns*/ 2165 for (j=0; j<nctot; j++) { 2166 if (ctype == IS_COLORING_GHOSTED) { 2167 col = ltog[cols[j]]; 2168 } else { 2169 col = cols[j]; 2170 } 2171 if (col >= cstart && col < cend) { 2172 /* column is in diagonal block of matrix */ 2173 rows = A_cj + A_ci[col-cstart]; 2174 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2175 } else { 2176 #if defined (PETSC_USE_CTABLE) 2177 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2178 colb --; 2179 #else 2180 colb = baij->colmap[col] - 1; 2181 #endif 2182 if (colb == -1) { 2183 m = 0; 2184 } else { 2185 colb = colb/bs; 2186 rows = B_cj + B_ci[colb]; 2187 m = B_ci[colb+1] - B_ci[colb]; 2188 } 2189 } 2190 /* loop over columns marking them in rowhit */ 2191 for (k=0; k<m; k++) { 2192 rowhit[*rows++] = col + 1; 2193 } 2194 } 2195 2196 /* count the number of hits */ 2197 nrows = 0; 2198 for (j=0; j<M; j++) { 2199 if (rowhit[j]) nrows++; 2200 } 2201 c->nrows[i] = nrows; 2202 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2203 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2204 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2205 nrows = 0; 2206 for (j=0; j<M; j++) { 2207 if (rowhit[j]) { 2208 c->rows[i][nrows] = j; 2209 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2210 nrows++; 2211 } 2212 } 2213 } else {/*-------------------------------------------------------------------------------*/ 2214 /* slow version, using rowhit as a linked list */ 2215 PetscInt currentcol,fm,mfm; 2216 rowhit[M] = M; 2217 nrows = 0; 2218 /* loop over columns*/ 2219 for (j=0; j<nctot; j++) { 2220 if (ctype == IS_COLORING_GHOSTED) { 2221 col = ltog[cols[j]]; 2222 } else { 2223 col = cols[j]; 2224 } 2225 if (col >= cstart && col < cend) { 2226 /* column is in diagonal block of matrix */ 2227 rows = A_cj + A_ci[col-cstart]; 2228 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2229 } else { 2230 #if defined (PETSC_USE_CTABLE) 2231 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2232 colb --; 2233 #else 2234 colb = baij->colmap[col] - 1; 2235 #endif 2236 if (colb == -1) { 2237 m = 0; 2238 } else { 2239 colb = colb/bs; 2240 rows = B_cj + B_ci[colb]; 2241 m = B_ci[colb+1] - B_ci[colb]; 2242 } 2243 } 2244 2245 /* loop over columns marking them in rowhit */ 2246 fm = M; /* fm points to first entry in linked list */ 2247 for (k=0; k<m; k++) { 2248 currentcol = *rows++; 2249 /* is it already in the list? */ 2250 do { 2251 mfm = fm; 2252 fm = rowhit[fm]; 2253 } while (fm < currentcol); 2254 /* not in list so add it */ 2255 if (fm != currentcol) { 2256 nrows++; 2257 columnsforrow[currentcol] = col; 2258 /* next three lines insert new entry into linked list */ 2259 rowhit[mfm] = currentcol; 2260 rowhit[currentcol] = fm; 2261 fm = currentcol; 2262 /* fm points to present position in list since we know the columns are sorted */ 2263 } else { 2264 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2265 } 2266 } 2267 } 2268 c->nrows[i] = nrows; 2269 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2270 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2271 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2272 /* now store the linked list of rows into c->rows[i] */ 2273 nrows = 0; 2274 fm = rowhit[M]; 2275 do { 2276 c->rows[i][nrows] = fm; 2277 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2278 fm = rowhit[fm]; 2279 } while (fm < M); 2280 } /* ---------------------------------------------------------------------------------------*/ 2281 ierr = PetscFree(cols);CHKERRQ(ierr); 2282 } 2283 2284 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2285 /* 2286 vscale will contain the "diagonal" on processor scalings followed by the off processor 2287 */ 2288 if (ctype == IS_COLORING_GLOBAL) { 2289 PetscInt *garray; 2290 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2291 for (i=0; i<baij->B->cmap->n/bs; i++) { 2292 for (j=0; j<bs; j++) { 2293 garray[i*bs+j] = bs*baij->garray[i]+j; 2294 } 2295 } 2296 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2297 ierr = PetscFree(garray);CHKERRQ(ierr); 2298 CHKMEMQ; 2299 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2300 for (k=0; k<c->ncolors; k++) { 2301 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2302 for (l=0; l<c->nrows[k]; l++) { 2303 col = c->columnsforrow[k][l]; 2304 if (col >= cstart && col < cend) { 2305 /* column is in diagonal block of matrix */ 2306 colb = col - cstart; 2307 } else { 2308 /* column is in "off-processor" part */ 2309 #if defined (PETSC_USE_CTABLE) 2310 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2311 colb --; 2312 #else 2313 colb = baij->colmap[col] - 1; 2314 #endif 2315 colb = colb/bs; 2316 colb += cend - cstart; 2317 } 2318 c->vscaleforrow[k][l] = colb; 2319 } 2320 } 2321 } else if (ctype == IS_COLORING_GHOSTED) { 2322 /* Get gtol mapping */ 2323 PetscInt N = mat->cmap->N, *gtol; 2324 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2325 for (i=0; i<N; i++) gtol[i] = -1; 2326 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2327 2328 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2329 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2330 for (k=0; k<c->ncolors; k++) { 2331 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2332 for (l=0; l<c->nrows[k]; l++) { 2333 col = c->columnsforrow[k][l]; /* global column index */ 2334 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2335 } 2336 } 2337 ierr = PetscFree(gtol);CHKERRQ(ierr); 2338 } 2339 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2340 2341 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2342 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2343 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2344 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2345 CHKMEMQ; 2346 PetscFunctionReturn(0); 2347 } 2348 2349 #undef __FUNCT__ 2350 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ" 2351 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat) 2352 { 2353 Mat B; 2354 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2355 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2356 Mat_SeqAIJ *b; 2357 PetscErrorCode ierr; 2358 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2359 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2360 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2361 2362 PetscFunctionBegin; 2363 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2364 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2365 2366 /* ---------------------------------------------------------------- 2367 Tell every processor the number of nonzeros per row 2368 */ 2369 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2370 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2371 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2372 } 2373 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2374 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2375 displs = recvcounts + size; 2376 for (i=0; i<size; i++) { 2377 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2378 displs[i] = A->rmap->range[i]/bs; 2379 } 2380 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2381 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2382 #else 2383 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2384 #endif 2385 /* --------------------------------------------------------------- 2386 Create the sequential matrix of the same type as the local block diagonal 2387 */ 2388 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2389 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2390 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2391 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2392 b = (Mat_SeqAIJ *)B->data; 2393 2394 /*-------------------------------------------------------------------- 2395 Copy my part of matrix column indices over 2396 */ 2397 sendcount = ad->nz + bd->nz; 2398 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2399 a_jsendbuf = ad->j; 2400 b_jsendbuf = bd->j; 2401 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2402 cnt = 0; 2403 for (i=0; i<n; i++) { 2404 2405 /* put in lower diagonal portion */ 2406 m = bd->i[i+1] - bd->i[i]; 2407 while (m > 0) { 2408 /* is it above diagonal (in bd (compressed) numbering) */ 2409 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2410 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2411 m--; 2412 } 2413 2414 /* put in diagonal portion */ 2415 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2416 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2417 } 2418 2419 /* put in upper diagonal portion */ 2420 while (m-- > 0) { 2421 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2422 } 2423 } 2424 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2425 2426 /*-------------------------------------------------------------------- 2427 Gather all column indices to all processors 2428 */ 2429 for (i=0; i<size; i++) { 2430 recvcounts[i] = 0; 2431 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2432 recvcounts[i] += lens[j]; 2433 } 2434 } 2435 displs[0] = 0; 2436 for (i=1; i<size; i++) { 2437 displs[i] = displs[i-1] + recvcounts[i-1]; 2438 } 2439 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2440 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2441 #else 2442 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2443 #endif 2444 /*-------------------------------------------------------------------- 2445 Assemble the matrix into useable form (note numerical values not yet set) 2446 */ 2447 /* set the b->ilen (length of each row) values */ 2448 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2449 /* set the b->i indices */ 2450 b->i[0] = 0; 2451 for (i=1; i<=A->rmap->N/bs; i++) { 2452 b->i[i] = b->i[i-1] + lens[i-1]; 2453 } 2454 ierr = PetscFree(lens);CHKERRQ(ierr); 2455 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2456 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2457 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2458 2459 if (A->symmetric){ 2460 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2461 } else if (A->hermitian) { 2462 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2463 } else if (A->structurally_symmetric) { 2464 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2465 } 2466 *newmat = B; 2467 PetscFunctionReturn(0); 2468 } 2469 2470 #undef __FUNCT__ 2471 #define __FUNCT__ "MatSOR_MPIBAIJ" 2472 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2473 { 2474 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2475 PetscErrorCode ierr; 2476 Vec bb1 = 0; 2477 2478 PetscFunctionBegin; 2479 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2480 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2481 } 2482 2483 if (flag == SOR_APPLY_UPPER) { 2484 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2485 PetscFunctionReturn(0); 2486 } 2487 2488 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2489 if (flag & SOR_ZERO_INITIAL_GUESS) { 2490 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2491 its--; 2492 } 2493 2494 while (its--) { 2495 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2496 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2497 2498 /* update rhs: bb1 = bb - B*x */ 2499 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2500 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2501 2502 /* local sweep */ 2503 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2504 } 2505 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2506 if (flag & SOR_ZERO_INITIAL_GUESS) { 2507 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2508 its--; 2509 } 2510 while (its--) { 2511 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2512 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2513 2514 /* update rhs: bb1 = bb - B*x */ 2515 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2516 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2517 2518 /* local sweep */ 2519 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2520 } 2521 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2522 if (flag & SOR_ZERO_INITIAL_GUESS) { 2523 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2524 its--; 2525 } 2526 while (its--) { 2527 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2528 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2529 2530 /* update rhs: bb1 = bb - B*x */ 2531 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2532 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2533 2534 /* local sweep */ 2535 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2536 } 2537 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2538 2539 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 2540 PetscFunctionReturn(0); 2541 } 2542 2543 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2544 2545 2546 /* -------------------------------------------------------------------*/ 2547 static struct _MatOps MatOps_Values = { 2548 MatSetValues_MPIBAIJ, 2549 MatGetRow_MPIBAIJ, 2550 MatRestoreRow_MPIBAIJ, 2551 MatMult_MPIBAIJ, 2552 /* 4*/ MatMultAdd_MPIBAIJ, 2553 MatMultTranspose_MPIBAIJ, 2554 MatMultTransposeAdd_MPIBAIJ, 2555 0, 2556 0, 2557 0, 2558 /*10*/ 0, 2559 0, 2560 0, 2561 MatSOR_MPIBAIJ, 2562 MatTranspose_MPIBAIJ, 2563 /*15*/ MatGetInfo_MPIBAIJ, 2564 MatEqual_MPIBAIJ, 2565 MatGetDiagonal_MPIBAIJ, 2566 MatDiagonalScale_MPIBAIJ, 2567 MatNorm_MPIBAIJ, 2568 /*20*/ MatAssemblyBegin_MPIBAIJ, 2569 MatAssemblyEnd_MPIBAIJ, 2570 MatSetOption_MPIBAIJ, 2571 MatZeroEntries_MPIBAIJ, 2572 /*24*/ MatZeroRows_MPIBAIJ, 2573 0, 2574 0, 2575 0, 2576 0, 2577 /*29*/ MatSetUpPreallocation_MPIBAIJ, 2578 0, 2579 0, 2580 0, 2581 0, 2582 /*34*/ MatDuplicate_MPIBAIJ, 2583 0, 2584 0, 2585 0, 2586 0, 2587 /*39*/ MatAXPY_MPIBAIJ, 2588 MatGetSubMatrices_MPIBAIJ, 2589 MatIncreaseOverlap_MPIBAIJ, 2590 MatGetValues_MPIBAIJ, 2591 MatCopy_MPIBAIJ, 2592 /*44*/ 0, 2593 MatScale_MPIBAIJ, 2594 0, 2595 0, 2596 0, 2597 /*49*/ MatSetBlockSize_MPIBAIJ, 2598 0, 2599 0, 2600 0, 2601 0, 2602 /*54*/ MatFDColoringCreate_MPIBAIJ, 2603 0, 2604 MatSetUnfactored_MPIBAIJ, 2605 MatPermute_MPIBAIJ, 2606 MatSetValuesBlocked_MPIBAIJ, 2607 /*59*/ MatGetSubMatrix_MPIBAIJ, 2608 MatDestroy_MPIBAIJ, 2609 MatView_MPIBAIJ, 2610 0, 2611 0, 2612 /*64*/ 0, 2613 0, 2614 0, 2615 0, 2616 0, 2617 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2618 0, 2619 0, 2620 0, 2621 0, 2622 /*74*/ 0, 2623 MatFDColoringApply_BAIJ, 2624 0, 2625 0, 2626 0, 2627 /*79*/ 0, 2628 0, 2629 0, 2630 0, 2631 MatLoad_MPIBAIJ, 2632 /*84*/ 0, 2633 0, 2634 0, 2635 0, 2636 0, 2637 /*89*/ 0, 2638 0, 2639 0, 2640 0, 2641 0, 2642 /*94*/ 0, 2643 0, 2644 0, 2645 0, 2646 0, 2647 /*99*/ 0, 2648 0, 2649 0, 2650 0, 2651 0, 2652 /*104*/0, 2653 MatRealPart_MPIBAIJ, 2654 MatImaginaryPart_MPIBAIJ, 2655 0, 2656 0, 2657 /*109*/0, 2658 0, 2659 0, 2660 0, 2661 0, 2662 /*114*/MatGetSeqNonzerostructure_MPIBAIJ, 2663 0, 2664 MatGetGhosts_MPIBAIJ, 2665 0, 2666 0, 2667 /*119*/0, 2668 0, 2669 0, 2670 0 2671 }; 2672 2673 EXTERN_C_BEGIN 2674 #undef __FUNCT__ 2675 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2676 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2677 { 2678 PetscFunctionBegin; 2679 *a = ((Mat_MPIBAIJ *)A->data)->A; 2680 *iscopy = PETSC_FALSE; 2681 PetscFunctionReturn(0); 2682 } 2683 EXTERN_C_END 2684 2685 EXTERN_C_BEGIN 2686 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2687 EXTERN_C_END 2688 2689 EXTERN_C_BEGIN 2690 #undef __FUNCT__ 2691 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2692 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2693 { 2694 PetscInt m,rstart,cstart,cend; 2695 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2696 const PetscInt *JJ=0; 2697 PetscScalar *values=0; 2698 PetscErrorCode ierr; 2699 2700 PetscFunctionBegin; 2701 2702 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2703 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2704 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2705 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2706 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2707 m = B->rmap->n/bs; 2708 rstart = B->rmap->rstart/bs; 2709 cstart = B->cmap->rstart/bs; 2710 cend = B->cmap->rend/bs; 2711 2712 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2713 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2714 for (i=0; i<m; i++) { 2715 nz = ii[i+1] - ii[i]; 2716 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2717 nz_max = PetscMax(nz_max,nz); 2718 JJ = jj + ii[i]; 2719 for (j=0; j<nz; j++) { 2720 if (*JJ >= cstart) break; 2721 JJ++; 2722 } 2723 d = 0; 2724 for (; j<nz; j++) { 2725 if (*JJ++ >= cend) break; 2726 d++; 2727 } 2728 d_nnz[i] = d; 2729 o_nnz[i] = nz - d; 2730 } 2731 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2732 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2733 2734 values = (PetscScalar*)V; 2735 if (!values) { 2736 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2737 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2738 } 2739 for (i=0; i<m; i++) { 2740 PetscInt row = i + rstart; 2741 PetscInt ncols = ii[i+1] - ii[i]; 2742 const PetscInt *icols = jj + ii[i]; 2743 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2744 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2745 } 2746 2747 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2748 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2749 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2750 2751 PetscFunctionReturn(0); 2752 } 2753 EXTERN_C_END 2754 2755 #undef __FUNCT__ 2756 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2757 /*@C 2758 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2759 (the default parallel PETSc format). 2760 2761 Collective on MPI_Comm 2762 2763 Input Parameters: 2764 + A - the matrix 2765 . i - the indices into j for the start of each local row (starts with zero) 2766 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2767 - v - optional values in the matrix 2768 2769 Level: developer 2770 2771 .keywords: matrix, aij, compressed row, sparse, parallel 2772 2773 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2774 @*/ 2775 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2776 { 2777 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2778 2779 PetscFunctionBegin; 2780 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2781 if (f) { 2782 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2783 } 2784 PetscFunctionReturn(0); 2785 } 2786 2787 EXTERN_C_BEGIN 2788 #undef __FUNCT__ 2789 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2790 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2791 { 2792 Mat_MPIBAIJ *b; 2793 PetscErrorCode ierr; 2794 PetscInt i, newbs = PetscAbs(bs); 2795 2796 PetscFunctionBegin; 2797 if (bs < 0) { 2798 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2799 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2800 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2801 bs = PetscAbs(bs); 2802 } 2803 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 2804 bs = newbs; 2805 2806 2807 if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2808 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2809 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2810 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2811 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2812 2813 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2814 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2815 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2816 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2817 2818 if (d_nnz) { 2819 for (i=0; i<B->rmap->n/bs; i++) { 2820 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2821 } 2822 } 2823 if (o_nnz) { 2824 for (i=0; i<B->rmap->n/bs; i++) { 2825 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2826 } 2827 } 2828 2829 b = (Mat_MPIBAIJ*)B->data; 2830 b->bs2 = bs*bs; 2831 b->mbs = B->rmap->n/bs; 2832 b->nbs = B->cmap->n/bs; 2833 b->Mbs = B->rmap->N/bs; 2834 b->Nbs = B->cmap->N/bs; 2835 2836 for (i=0; i<=b->size; i++) { 2837 b->rangebs[i] = B->rmap->range[i]/bs; 2838 } 2839 b->rstartbs = B->rmap->rstart/bs; 2840 b->rendbs = B->rmap->rend/bs; 2841 b->cstartbs = B->cmap->rstart/bs; 2842 b->cendbs = B->cmap->rend/bs; 2843 2844 if (!B->preallocated) { 2845 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2846 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2847 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2848 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2849 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2850 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2851 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2852 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2853 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 2854 } 2855 2856 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2857 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2858 B->preallocated = PETSC_TRUE; 2859 PetscFunctionReturn(0); 2860 } 2861 EXTERN_C_END 2862 2863 EXTERN_C_BEGIN 2864 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2865 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2866 EXTERN_C_END 2867 2868 2869 EXTERN_C_BEGIN 2870 #undef __FUNCT__ 2871 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 2872 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj) 2873 { 2874 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2875 PetscErrorCode ierr; 2876 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2877 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2878 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2879 2880 PetscFunctionBegin; 2881 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 2882 ii[0] = 0; 2883 CHKMEMQ; 2884 for (i=0; i<M; i++) { 2885 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2886 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2887 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2888 /* remove one from count of matrix has diagonal */ 2889 for (j=id[i]; j<id[i+1]; j++) { 2890 if (jd[j] == i) {ii[i+1]--;break;} 2891 } 2892 CHKMEMQ; 2893 } 2894 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2895 cnt = 0; 2896 for (i=0; i<M; i++) { 2897 for (j=io[i]; j<io[i+1]; j++) { 2898 if (garray[jo[j]] > rstart) break; 2899 jj[cnt++] = garray[jo[j]]; 2900 CHKMEMQ; 2901 } 2902 for (k=id[i]; k<id[i+1]; k++) { 2903 if (jd[k] != i) { 2904 jj[cnt++] = rstart + jd[k]; 2905 CHKMEMQ; 2906 } 2907 } 2908 for (;j<io[i+1]; j++) { 2909 jj[cnt++] = garray[jo[j]]; 2910 CHKMEMQ; 2911 } 2912 } 2913 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 2914 PetscFunctionReturn(0); 2915 } 2916 EXTERN_C_END 2917 2918 EXTERN_C_BEGIN 2919 #if defined(PETSC_HAVE_MUMPS) 2920 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 2921 #endif 2922 EXTERN_C_END 2923 2924 /*MC 2925 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2926 2927 Options Database Keys: 2928 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2929 . -mat_block_size <bs> - set the blocksize used to store the matrix 2930 - -mat_use_hash_table <fact> 2931 2932 Level: beginner 2933 2934 .seealso: MatCreateMPIBAIJ 2935 M*/ 2936 2937 EXTERN_C_BEGIN 2938 #undef __FUNCT__ 2939 #define __FUNCT__ "MatCreate_MPIBAIJ" 2940 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2941 { 2942 Mat_MPIBAIJ *b; 2943 PetscErrorCode ierr; 2944 PetscTruth flg; 2945 2946 PetscFunctionBegin; 2947 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2948 B->data = (void*)b; 2949 2950 2951 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2952 B->mapping = 0; 2953 B->assembled = PETSC_FALSE; 2954 2955 B->insertmode = NOT_SET_VALUES; 2956 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 2957 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 2958 2959 /* build local table of row and column ownerships */ 2960 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 2961 2962 /* build cache for off array entries formed */ 2963 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 2964 b->donotstash = PETSC_FALSE; 2965 b->colmap = PETSC_NULL; 2966 b->garray = PETSC_NULL; 2967 b->roworiented = PETSC_TRUE; 2968 2969 /* stuff used in block assembly */ 2970 b->barray = 0; 2971 2972 /* stuff used for matrix vector multiply */ 2973 b->lvec = 0; 2974 b->Mvctx = 0; 2975 2976 /* stuff for MatGetRow() */ 2977 b->rowindices = 0; 2978 b->rowvalues = 0; 2979 b->getrowactive = PETSC_FALSE; 2980 2981 /* hash table stuff */ 2982 b->ht = 0; 2983 b->hd = 0; 2984 b->ht_size = 0; 2985 b->ht_flag = PETSC_FALSE; 2986 b->ht_fact = 0; 2987 b->ht_total_ct = 0; 2988 b->ht_insert_ct = 0; 2989 2990 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 2991 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2992 if (flg) { 2993 PetscReal fact = 1.39; 2994 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2995 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 2996 if (fact <= 1.0) fact = 1.39; 2997 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2998 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2999 } 3000 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3001 3002 #if defined(PETSC_HAVE_MUMPS) 3003 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3004 #endif 3005 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3006 "MatConvert_MPIBAIJ_MPIAdj", 3007 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3008 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3009 "MatStoreValues_MPIBAIJ", 3010 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3011 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3012 "MatRetrieveValues_MPIBAIJ", 3013 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3014 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3015 "MatGetDiagonalBlock_MPIBAIJ", 3016 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3017 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3018 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3019 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3020 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3021 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3022 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3023 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3024 "MatDiagonalScaleLocal_MPIBAIJ", 3025 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3026 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3027 "MatSetHashTableFactor_MPIBAIJ", 3028 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3029 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3030 PetscFunctionReturn(0); 3031 } 3032 EXTERN_C_END 3033 3034 /*MC 3035 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3036 3037 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3038 and MATMPIBAIJ otherwise. 3039 3040 Options Database Keys: 3041 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3042 3043 Level: beginner 3044 3045 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3046 M*/ 3047 3048 EXTERN_C_BEGIN 3049 #undef __FUNCT__ 3050 #define __FUNCT__ "MatCreate_BAIJ" 3051 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 3052 { 3053 PetscErrorCode ierr; 3054 PetscMPIInt size; 3055 3056 PetscFunctionBegin; 3057 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3058 if (size == 1) { 3059 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 3060 } else { 3061 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 3062 } 3063 PetscFunctionReturn(0); 3064 } 3065 EXTERN_C_END 3066 3067 #undef __FUNCT__ 3068 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3069 /*@C 3070 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3071 (block compressed row). For good matrix assembly performance 3072 the user should preallocate the matrix storage by setting the parameters 3073 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3074 performance can be increased by more than a factor of 50. 3075 3076 Collective on Mat 3077 3078 Input Parameters: 3079 + A - the matrix 3080 . bs - size of blockk 3081 . d_nz - number of block nonzeros per block row in diagonal portion of local 3082 submatrix (same for all local rows) 3083 . d_nnz - array containing the number of block nonzeros in the various block rows 3084 of the in diagonal portion of the local (possibly different for each block 3085 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3086 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3087 submatrix (same for all local rows). 3088 - o_nnz - array containing the number of nonzeros in the various block rows of the 3089 off-diagonal portion of the local submatrix (possibly different for 3090 each block row) or PETSC_NULL. 3091 3092 If the *_nnz parameter is given then the *_nz parameter is ignored 3093 3094 Options Database Keys: 3095 + -mat_block_size - size of the blocks to use 3096 - -mat_use_hash_table <fact> 3097 3098 Notes: 3099 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3100 than it must be used on all processors that share the object for that argument. 3101 3102 Storage Information: 3103 For a square global matrix we define each processor's diagonal portion 3104 to be its local rows and the corresponding columns (a square submatrix); 3105 each processor's off-diagonal portion encompasses the remainder of the 3106 local matrix (a rectangular submatrix). 3107 3108 The user can specify preallocated storage for the diagonal part of 3109 the local submatrix with either d_nz or d_nnz (not both). Set 3110 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3111 memory allocation. Likewise, specify preallocated storage for the 3112 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3113 3114 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3115 the figure below we depict these three local rows and all columns (0-11). 3116 3117 .vb 3118 0 1 2 3 4 5 6 7 8 9 10 11 3119 ------------------- 3120 row 3 | o o o d d d o o o o o o 3121 row 4 | o o o d d d o o o o o o 3122 row 5 | o o o d d d o o o o o o 3123 ------------------- 3124 .ve 3125 3126 Thus, any entries in the d locations are stored in the d (diagonal) 3127 submatrix, and any entries in the o locations are stored in the 3128 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3129 stored simply in the MATSEQBAIJ format for compressed row storage. 3130 3131 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3132 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3133 In general, for PDE problems in which most nonzeros are near the diagonal, 3134 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3135 or you will get TERRIBLE performance; see the users' manual chapter on 3136 matrices. 3137 3138 You can call MatGetInfo() to get information on how effective the preallocation was; 3139 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3140 You can also run with the option -info and look for messages with the string 3141 malloc in them to see if additional memory allocation was needed. 3142 3143 Level: intermediate 3144 3145 .keywords: matrix, block, aij, compressed row, sparse, parallel 3146 3147 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 3148 @*/ 3149 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3150 { 3151 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3152 3153 PetscFunctionBegin; 3154 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3155 if (f) { 3156 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3157 } 3158 PetscFunctionReturn(0); 3159 } 3160 3161 #undef __FUNCT__ 3162 #define __FUNCT__ "MatCreateMPIBAIJ" 3163 /*@C 3164 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 3165 (block compressed row). For good matrix assembly performance 3166 the user should preallocate the matrix storage by setting the parameters 3167 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3168 performance can be increased by more than a factor of 50. 3169 3170 Collective on MPI_Comm 3171 3172 Input Parameters: 3173 + comm - MPI communicator 3174 . bs - size of blockk 3175 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3176 This value should be the same as the local size used in creating the 3177 y vector for the matrix-vector product y = Ax. 3178 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3179 This value should be the same as the local size used in creating the 3180 x vector for the matrix-vector product y = Ax. 3181 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3182 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3183 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3184 submatrix (same for all local rows) 3185 . d_nnz - array containing the number of nonzero blocks in the various block rows 3186 of the in diagonal portion of the local (possibly different for each block 3187 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3188 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3189 submatrix (same for all local rows). 3190 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3191 off-diagonal portion of the local submatrix (possibly different for 3192 each block row) or PETSC_NULL. 3193 3194 Output Parameter: 3195 . A - the matrix 3196 3197 Options Database Keys: 3198 + -mat_block_size - size of the blocks to use 3199 - -mat_use_hash_table <fact> 3200 3201 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3202 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3203 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3204 3205 Notes: 3206 If the *_nnz parameter is given then the *_nz parameter is ignored 3207 3208 A nonzero block is any block that as 1 or more nonzeros in it 3209 3210 The user MUST specify either the local or global matrix dimensions 3211 (possibly both). 3212 3213 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3214 than it must be used on all processors that share the object for that argument. 3215 3216 Storage Information: 3217 For a square global matrix we define each processor's diagonal portion 3218 to be its local rows and the corresponding columns (a square submatrix); 3219 each processor's off-diagonal portion encompasses the remainder of the 3220 local matrix (a rectangular submatrix). 3221 3222 The user can specify preallocated storage for the diagonal part of 3223 the local submatrix with either d_nz or d_nnz (not both). Set 3224 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3225 memory allocation. Likewise, specify preallocated storage for the 3226 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3227 3228 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3229 the figure below we depict these three local rows and all columns (0-11). 3230 3231 .vb 3232 0 1 2 3 4 5 6 7 8 9 10 11 3233 ------------------- 3234 row 3 | o o o d d d o o o o o o 3235 row 4 | o o o d d d o o o o o o 3236 row 5 | o o o d d d o o o o o o 3237 ------------------- 3238 .ve 3239 3240 Thus, any entries in the d locations are stored in the d (diagonal) 3241 submatrix, and any entries in the o locations are stored in the 3242 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3243 stored simply in the MATSEQBAIJ format for compressed row storage. 3244 3245 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3246 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3247 In general, for PDE problems in which most nonzeros are near the diagonal, 3248 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3249 or you will get TERRIBLE performance; see the users' manual chapter on 3250 matrices. 3251 3252 Level: intermediate 3253 3254 .keywords: matrix, block, aij, compressed row, sparse, parallel 3255 3256 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3257 @*/ 3258 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3259 { 3260 PetscErrorCode ierr; 3261 PetscMPIInt size; 3262 3263 PetscFunctionBegin; 3264 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3265 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3266 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3267 if (size > 1) { 3268 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3269 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3270 } else { 3271 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3272 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3273 } 3274 PetscFunctionReturn(0); 3275 } 3276 3277 #undef __FUNCT__ 3278 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3279 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3280 { 3281 Mat mat; 3282 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3283 PetscErrorCode ierr; 3284 PetscInt len=0; 3285 3286 PetscFunctionBegin; 3287 *newmat = 0; 3288 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3289 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3290 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3291 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3292 3293 mat->factortype = matin->factortype; 3294 mat->preallocated = PETSC_TRUE; 3295 mat->assembled = PETSC_TRUE; 3296 mat->insertmode = NOT_SET_VALUES; 3297 3298 a = (Mat_MPIBAIJ*)mat->data; 3299 mat->rmap->bs = matin->rmap->bs; 3300 a->bs2 = oldmat->bs2; 3301 a->mbs = oldmat->mbs; 3302 a->nbs = oldmat->nbs; 3303 a->Mbs = oldmat->Mbs; 3304 a->Nbs = oldmat->Nbs; 3305 3306 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3307 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3308 3309 a->size = oldmat->size; 3310 a->rank = oldmat->rank; 3311 a->donotstash = oldmat->donotstash; 3312 a->roworiented = oldmat->roworiented; 3313 a->rowindices = 0; 3314 a->rowvalues = 0; 3315 a->getrowactive = PETSC_FALSE; 3316 a->barray = 0; 3317 a->rstartbs = oldmat->rstartbs; 3318 a->rendbs = oldmat->rendbs; 3319 a->cstartbs = oldmat->cstartbs; 3320 a->cendbs = oldmat->cendbs; 3321 3322 /* hash table stuff */ 3323 a->ht = 0; 3324 a->hd = 0; 3325 a->ht_size = 0; 3326 a->ht_flag = oldmat->ht_flag; 3327 a->ht_fact = oldmat->ht_fact; 3328 a->ht_total_ct = 0; 3329 a->ht_insert_ct = 0; 3330 3331 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3332 if (oldmat->colmap) { 3333 #if defined (PETSC_USE_CTABLE) 3334 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3335 #else 3336 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3337 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3338 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3339 #endif 3340 } else a->colmap = 0; 3341 3342 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3343 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3344 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3345 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3346 } else a->garray = 0; 3347 3348 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3349 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3350 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3351 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3352 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3353 3354 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3355 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3356 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3357 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3358 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3359 *newmat = mat; 3360 3361 PetscFunctionReturn(0); 3362 } 3363 3364 #undef __FUNCT__ 3365 #define __FUNCT__ "MatLoad_MPIBAIJ" 3366 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3367 { 3368 PetscErrorCode ierr; 3369 int fd; 3370 PetscInt i,nz,j,rstart,rend; 3371 PetscScalar *vals,*buf; 3372 MPI_Comm comm = ((PetscObject)viewer)->comm; 3373 MPI_Status status; 3374 PetscMPIInt rank,size,maxnz; 3375 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3376 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3377 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3378 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3379 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3380 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3381 3382 PetscFunctionBegin; 3383 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3384 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3385 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3386 3387 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3388 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3389 if (!rank) { 3390 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3391 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3392 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3393 } 3394 3395 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3396 3397 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3398 M = header[1]; N = header[2]; 3399 3400 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3401 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3402 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3403 3404 /* If global sizes are set, check if they are consistent with that given in the file */ 3405 if (sizesset) { 3406 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3407 } 3408 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3409 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3410 3411 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3412 3413 /* 3414 This code adds extra rows to make sure the number of rows is 3415 divisible by the blocksize 3416 */ 3417 Mbs = M/bs; 3418 extra_rows = bs - M + bs*Mbs; 3419 if (extra_rows == bs) extra_rows = 0; 3420 else Mbs++; 3421 if (extra_rows && !rank) { 3422 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3423 } 3424 3425 /* determine ownership of all rows */ 3426 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3427 mbs = Mbs/size + ((Mbs % size) > rank); 3428 m = mbs*bs; 3429 } else { /* User set */ 3430 m = newmat->rmap->n; 3431 mbs = m/bs; 3432 } 3433 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3434 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3435 3436 /* process 0 needs enough room for process with most rows */ 3437 if (!rank) { 3438 mmax = rowners[1]; 3439 for (i=2; i<size; i++) { 3440 mmax = PetscMax(mmax,rowners[i]); 3441 } 3442 mmax*=bs; 3443 } else mmax = m; 3444 3445 rowners[0] = 0; 3446 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3447 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3448 rstart = rowners[rank]; 3449 rend = rowners[rank+1]; 3450 3451 /* distribute row lengths to all processors */ 3452 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3453 if (!rank) { 3454 mend = m; 3455 if (size == 1) mend = mend - extra_rows; 3456 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3457 for (j=mend; j<m; j++) locrowlens[j] = 1; 3458 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3459 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3460 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3461 for (j=0; j<m; j++) { 3462 procsnz[0] += locrowlens[j]; 3463 } 3464 for (i=1; i<size; i++) { 3465 mend = browners[i+1] - browners[i]; 3466 if (i == size-1) mend = mend - extra_rows; 3467 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3468 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3469 /* calculate the number of nonzeros on each processor */ 3470 for (j=0; j<browners[i+1]-browners[i]; j++) { 3471 procsnz[i] += rowlengths[j]; 3472 } 3473 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3474 } 3475 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3476 } else { 3477 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3478 } 3479 3480 if (!rank) { 3481 /* determine max buffer needed and allocate it */ 3482 maxnz = procsnz[0]; 3483 for (i=1; i<size; i++) { 3484 maxnz = PetscMax(maxnz,procsnz[i]); 3485 } 3486 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3487 3488 /* read in my part of the matrix column indices */ 3489 nz = procsnz[0]; 3490 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3491 mycols = ibuf; 3492 if (size == 1) nz -= extra_rows; 3493 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3494 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3495 3496 /* read in every ones (except the last) and ship off */ 3497 for (i=1; i<size-1; i++) { 3498 nz = procsnz[i]; 3499 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3500 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3501 } 3502 /* read in the stuff for the last proc */ 3503 if (size != 1) { 3504 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3505 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3506 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3507 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3508 } 3509 ierr = PetscFree(cols);CHKERRQ(ierr); 3510 } else { 3511 /* determine buffer space needed for message */ 3512 nz = 0; 3513 for (i=0; i<m; i++) { 3514 nz += locrowlens[i]; 3515 } 3516 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3517 mycols = ibuf; 3518 /* receive message of column indices*/ 3519 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3520 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3521 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3522 } 3523 3524 /* loop over local rows, determining number of off diagonal entries */ 3525 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3526 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3527 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3528 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3529 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3530 rowcount = 0; nzcount = 0; 3531 for (i=0; i<mbs; i++) { 3532 dcount = 0; 3533 odcount = 0; 3534 for (j=0; j<bs; j++) { 3535 kmax = locrowlens[rowcount]; 3536 for (k=0; k<kmax; k++) { 3537 tmp = mycols[nzcount++]/bs; 3538 if (!mask[tmp]) { 3539 mask[tmp] = 1; 3540 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3541 else masked1[dcount++] = tmp; 3542 } 3543 } 3544 rowcount++; 3545 } 3546 3547 dlens[i] = dcount; 3548 odlens[i] = odcount; 3549 3550 /* zero out the mask elements we set */ 3551 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3552 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3553 } 3554 3555 3556 if (!sizesset) { 3557 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3558 } 3559 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3560 3561 if (!rank) { 3562 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3563 /* read in my part of the matrix numerical values */ 3564 nz = procsnz[0]; 3565 vals = buf; 3566 mycols = ibuf; 3567 if (size == 1) nz -= extra_rows; 3568 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3569 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3570 3571 /* insert into matrix */ 3572 jj = rstart*bs; 3573 for (i=0; i<m; i++) { 3574 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3575 mycols += locrowlens[i]; 3576 vals += locrowlens[i]; 3577 jj++; 3578 } 3579 /* read in other processors (except the last one) and ship out */ 3580 for (i=1; i<size-1; i++) { 3581 nz = procsnz[i]; 3582 vals = buf; 3583 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3584 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3585 } 3586 /* the last proc */ 3587 if (size != 1){ 3588 nz = procsnz[i] - extra_rows; 3589 vals = buf; 3590 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3591 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3592 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3593 } 3594 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3595 } else { 3596 /* receive numeric values */ 3597 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3598 3599 /* receive message of values*/ 3600 vals = buf; 3601 mycols = ibuf; 3602 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 3603 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3604 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3605 3606 /* insert into matrix */ 3607 jj = rstart*bs; 3608 for (i=0; i<m; i++) { 3609 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3610 mycols += locrowlens[i]; 3611 vals += locrowlens[i]; 3612 jj++; 3613 } 3614 } 3615 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3616 ierr = PetscFree(buf);CHKERRQ(ierr); 3617 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3618 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3619 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3620 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3621 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3622 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3623 3624 PetscFunctionReturn(0); 3625 } 3626 3627 #undef __FUNCT__ 3628 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3629 /*@ 3630 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3631 3632 Input Parameters: 3633 . mat - the matrix 3634 . fact - factor 3635 3636 Not Collective, each process can use a different factor 3637 3638 Level: advanced 3639 3640 Notes: 3641 This can also be set by the command line option: -mat_use_hash_table <fact> 3642 3643 .keywords: matrix, hashtable, factor, HT 3644 3645 .seealso: MatSetOption() 3646 @*/ 3647 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3648 { 3649 PetscErrorCode ierr,(*f)(Mat,PetscReal); 3650 3651 PetscFunctionBegin; 3652 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 3653 if (f) { 3654 ierr = (*f)(mat,fact);CHKERRQ(ierr); 3655 } 3656 PetscFunctionReturn(0); 3657 } 3658 3659 EXTERN_C_BEGIN 3660 #undef __FUNCT__ 3661 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3662 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3663 { 3664 Mat_MPIBAIJ *baij; 3665 3666 PetscFunctionBegin; 3667 baij = (Mat_MPIBAIJ*)mat->data; 3668 baij->ht_fact = fact; 3669 PetscFunctionReturn(0); 3670 } 3671 EXTERN_C_END 3672 3673 #undef __FUNCT__ 3674 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3675 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3676 { 3677 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3678 PetscFunctionBegin; 3679 *Ad = a->A; 3680 *Ao = a->B; 3681 *colmap = a->garray; 3682 PetscFunctionReturn(0); 3683 } 3684 3685 /* 3686 Special version for direct calls from Fortran (to eliminate two function call overheads 3687 */ 3688 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3689 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3690 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3691 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3692 #endif 3693 3694 #undef __FUNCT__ 3695 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3696 /*@C 3697 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3698 3699 Collective on Mat 3700 3701 Input Parameters: 3702 + mat - the matrix 3703 . min - number of input rows 3704 . im - input rows 3705 . nin - number of input columns 3706 . in - input columns 3707 . v - numerical values input 3708 - addvin - INSERT_VALUES or ADD_VALUES 3709 3710 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3711 3712 Level: advanced 3713 3714 .seealso: MatSetValuesBlocked() 3715 @*/ 3716 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3717 { 3718 /* convert input arguments to C version */ 3719 Mat mat = *matin; 3720 PetscInt m = *min, n = *nin; 3721 InsertMode addv = *addvin; 3722 3723 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3724 const MatScalar *value; 3725 MatScalar *barray=baij->barray; 3726 PetscTruth roworiented = baij->roworiented; 3727 PetscErrorCode ierr; 3728 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3729 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3730 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3731 3732 PetscFunctionBegin; 3733 /* tasks normally handled by MatSetValuesBlocked() */ 3734 if (mat->insertmode == NOT_SET_VALUES) { 3735 mat->insertmode = addv; 3736 } 3737 #if defined(PETSC_USE_DEBUG) 3738 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3739 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3740 #endif 3741 if (mat->assembled) { 3742 mat->was_assembled = PETSC_TRUE; 3743 mat->assembled = PETSC_FALSE; 3744 } 3745 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3746 3747 3748 if(!barray) { 3749 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3750 baij->barray = barray; 3751 } 3752 3753 if (roworiented) { 3754 stepval = (n-1)*bs; 3755 } else { 3756 stepval = (m-1)*bs; 3757 } 3758 for (i=0; i<m; i++) { 3759 if (im[i] < 0) continue; 3760 #if defined(PETSC_USE_DEBUG) 3761 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3762 #endif 3763 if (im[i] >= rstart && im[i] < rend) { 3764 row = im[i] - rstart; 3765 for (j=0; j<n; j++) { 3766 /* If NumCol = 1 then a copy is not required */ 3767 if ((roworiented) && (n == 1)) { 3768 barray = (MatScalar*)v + i*bs2; 3769 } else if((!roworiented) && (m == 1)) { 3770 barray = (MatScalar*)v + j*bs2; 3771 } else { /* Here a copy is required */ 3772 if (roworiented) { 3773 value = v + i*(stepval+bs)*bs + j*bs; 3774 } else { 3775 value = v + j*(stepval+bs)*bs + i*bs; 3776 } 3777 for (ii=0; ii<bs; ii++,value+=stepval) { 3778 for (jj=0; jj<bs; jj++) { 3779 *barray++ = *value++; 3780 } 3781 } 3782 barray -=bs2; 3783 } 3784 3785 if (in[j] >= cstart && in[j] < cend){ 3786 col = in[j] - cstart; 3787 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3788 } 3789 else if (in[j] < 0) continue; 3790 #if defined(PETSC_USE_DEBUG) 3791 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3792 #endif 3793 else { 3794 if (mat->was_assembled) { 3795 if (!baij->colmap) { 3796 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3797 } 3798 3799 #if defined(PETSC_USE_DEBUG) 3800 #if defined (PETSC_USE_CTABLE) 3801 { PetscInt data; 3802 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3803 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3804 } 3805 #else 3806 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3807 #endif 3808 #endif 3809 #if defined (PETSC_USE_CTABLE) 3810 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3811 col = (col - 1)/bs; 3812 #else 3813 col = (baij->colmap[in[j]] - 1)/bs; 3814 #endif 3815 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3816 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3817 col = in[j]; 3818 } 3819 } 3820 else col = in[j]; 3821 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3822 } 3823 } 3824 } else { 3825 if (!baij->donotstash) { 3826 if (roworiented) { 3827 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3828 } else { 3829 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3830 } 3831 } 3832 } 3833 } 3834 3835 /* task normally handled by MatSetValuesBlocked() */ 3836 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3837 PetscFunctionReturn(0); 3838 } 3839