1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 8 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 9 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 10 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 11 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 12 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 13 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 16 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 19 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 20 { 21 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 22 PetscErrorCode ierr; 23 PetscInt i,*idxb = 0; 24 PetscScalar *va,*vb; 25 Vec vtmp; 26 27 PetscFunctionBegin; 28 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 29 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 30 if (idx) { 31 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 32 } 33 34 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 35 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 36 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 37 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 38 39 for (i=0; i<A->rmap->n; i++){ 40 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 41 } 42 43 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 44 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 45 if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);} 46 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 47 PetscFunctionReturn(0); 48 } 49 50 EXTERN_C_BEGIN 51 #undef __FUNCT__ 52 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 53 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 54 { 55 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 56 PetscErrorCode ierr; 57 58 PetscFunctionBegin; 59 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 60 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 61 PetscFunctionReturn(0); 62 } 63 EXTERN_C_END 64 65 EXTERN_C_BEGIN 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 68 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 69 { 70 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 75 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 76 PetscFunctionReturn(0); 77 } 78 EXTERN_C_END 79 80 /* 81 Local utility routine that creates a mapping from the global column 82 number to the local number in the off-diagonal part of the local 83 storage of the matrix. This is done in a non scalable way since the 84 length of colmap equals the global matrix length. 85 */ 86 #undef __FUNCT__ 87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 89 { 90 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 91 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 92 PetscErrorCode ierr; 93 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 94 95 PetscFunctionBegin; 96 #if defined (PETSC_USE_CTABLE) 97 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 98 for (i=0; i<nbs; i++){ 99 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 100 } 101 #else 102 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 103 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 105 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 106 #endif 107 PetscFunctionReturn(0); 108 } 109 110 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 111 { \ 112 \ 113 brow = row/bs; \ 114 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 115 rmax = aimax[brow]; nrow = ailen[brow]; \ 116 bcol = col/bs; \ 117 ridx = row % bs; cidx = col % bs; \ 118 low = 0; high = nrow; \ 119 while (high-low > 3) { \ 120 t = (low+high)/2; \ 121 if (rp[t] > bcol) high = t; \ 122 else low = t; \ 123 } \ 124 for (_i=low; _i<high; _i++) { \ 125 if (rp[_i] > bcol) break; \ 126 if (rp[_i] == bcol) { \ 127 bap = ap + bs2*_i + bs*cidx + ridx; \ 128 if (addv == ADD_VALUES) *bap += value; \ 129 else *bap = value; \ 130 goto a_noinsert; \ 131 } \ 132 } \ 133 if (a->nonew == 1) goto a_noinsert; \ 134 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 135 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 136 N = nrow++ - 1; \ 137 /* shift up all the later entries in this row */ \ 138 for (ii=N; ii>=_i; ii--) { \ 139 rp[ii+1] = rp[ii]; \ 140 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 141 } \ 142 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 143 rp[_i] = bcol; \ 144 ap[bs2*_i + bs*cidx + ridx] = value; \ 145 a_noinsert:; \ 146 ailen[brow] = nrow; \ 147 } 148 149 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 150 { \ 151 brow = row/bs; \ 152 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 153 rmax = bimax[brow]; nrow = bilen[brow]; \ 154 bcol = col/bs; \ 155 ridx = row % bs; cidx = col % bs; \ 156 low = 0; high = nrow; \ 157 while (high-low > 3) { \ 158 t = (low+high)/2; \ 159 if (rp[t] > bcol) high = t; \ 160 else low = t; \ 161 } \ 162 for (_i=low; _i<high; _i++) { \ 163 if (rp[_i] > bcol) break; \ 164 if (rp[_i] == bcol) { \ 165 bap = ap + bs2*_i + bs*cidx + ridx; \ 166 if (addv == ADD_VALUES) *bap += value; \ 167 else *bap = value; \ 168 goto b_noinsert; \ 169 } \ 170 } \ 171 if (b->nonew == 1) goto b_noinsert; \ 172 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 173 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 174 CHKMEMQ;\ 175 N = nrow++ - 1; \ 176 /* shift up all the later entries in this row */ \ 177 for (ii=N; ii>=_i; ii--) { \ 178 rp[ii+1] = rp[ii]; \ 179 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 180 } \ 181 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 182 rp[_i] = bcol; \ 183 ap[bs2*_i + bs*cidx + ridx] = value; \ 184 b_noinsert:; \ 185 bilen[brow] = nrow; \ 186 } 187 188 #undef __FUNCT__ 189 #define __FUNCT__ "MatSetValues_MPIBAIJ" 190 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 191 { 192 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 193 MatScalar value; 194 PetscTruth roworiented = baij->roworiented; 195 PetscErrorCode ierr; 196 PetscInt i,j,row,col; 197 PetscInt rstart_orig=mat->rmap->rstart; 198 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 199 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 200 201 /* Some Variables required in the macro */ 202 Mat A = baij->A; 203 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 204 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 205 MatScalar *aa=a->a; 206 207 Mat B = baij->B; 208 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 209 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 210 MatScalar *ba=b->a; 211 212 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 213 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 214 MatScalar *ap,*bap; 215 216 PetscFunctionBegin; 217 if (v) PetscValidScalarPointer(v,6); 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_DEBUG) 221 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 222 #endif 223 if (im[i] >= rstart_orig && im[i] < rend_orig) { 224 row = im[i] - rstart_orig; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart_orig && in[j] < cend_orig){ 227 col = in[j] - cstart_orig; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 230 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 231 } else if (in[j] < 0) continue; 232 #if defined(PETSC_USE_DEBUG) 233 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1); 234 #endif 235 else { 236 if (mat->was_assembled) { 237 if (!baij->colmap) { 238 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 239 } 240 #if defined (PETSC_USE_CTABLE) 241 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 242 col = col - 1; 243 #else 244 col = baij->colmap[in[j]/bs] - 1; 245 #endif 246 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 247 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 248 col = in[j]; 249 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 250 B = baij->B; 251 b = (Mat_SeqBAIJ*)(B)->data; 252 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 253 ba=b->a; 254 } else col += in[j]%bs; 255 } else col = in[j]; 256 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 257 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 258 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 259 } 260 } 261 } else { 262 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 263 if (!baij->donotstash) { 264 if (roworiented) { 265 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 266 } else { 267 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 268 } 269 } 270 } 271 } 272 PetscFunctionReturn(0); 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 277 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 278 { 279 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 280 const PetscScalar *value; 281 MatScalar *barray=baij->barray; 282 PetscTruth roworiented = baij->roworiented; 283 PetscErrorCode ierr; 284 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 285 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 286 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 287 288 PetscFunctionBegin; 289 if(!barray) { 290 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 291 baij->barray = barray; 292 } 293 294 if (roworiented) { 295 stepval = (n-1)*bs; 296 } else { 297 stepval = (m-1)*bs; 298 } 299 for (i=0; i<m; i++) { 300 if (im[i] < 0) continue; 301 #if defined(PETSC_USE_DEBUG) 302 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 303 #endif 304 if (im[i] >= rstart && im[i] < rend) { 305 row = im[i] - rstart; 306 for (j=0; j<n; j++) { 307 /* If NumCol = 1 then a copy is not required */ 308 if ((roworiented) && (n == 1)) { 309 barray = (MatScalar*)v + i*bs2; 310 } else if((!roworiented) && (m == 1)) { 311 barray = (MatScalar*)v + j*bs2; 312 } else { /* Here a copy is required */ 313 if (roworiented) { 314 value = v + (i*(stepval+bs) + j)*bs; 315 } else { 316 value = v + (j*(stepval+bs) + i)*bs; 317 } 318 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 319 for (jj=0; jj<bs; jj++) { 320 barray[jj] = value[jj]; 321 } 322 barray += bs; 323 } 324 barray -= bs2; 325 } 326 327 if (in[j] >= cstart && in[j] < cend){ 328 col = in[j] - cstart; 329 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 330 } 331 else if (in[j] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 334 #endif 335 else { 336 if (mat->was_assembled) { 337 if (!baij->colmap) { 338 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 339 } 340 341 #if defined(PETSC_USE_DEBUG) 342 #if defined (PETSC_USE_CTABLE) 343 { PetscInt data; 344 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 345 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 346 } 347 #else 348 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 349 #endif 350 #endif 351 #if defined (PETSC_USE_CTABLE) 352 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 353 col = (col - 1)/bs; 354 #else 355 col = (baij->colmap[in[j]] - 1)/bs; 356 #endif 357 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 358 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 359 col = in[j]; 360 } 361 } 362 else col = in[j]; 363 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 364 } 365 } 366 } else { 367 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 368 if (!baij->donotstash) { 369 if (roworiented) { 370 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 371 } else { 372 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 373 } 374 } 375 } 376 } 377 PetscFunctionReturn(0); 378 } 379 380 #define HASH_KEY 0.6180339887 381 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 382 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 383 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 384 #undef __FUNCT__ 385 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 386 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 387 { 388 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 389 PetscTruth roworiented = baij->roworiented; 390 PetscErrorCode ierr; 391 PetscInt i,j,row,col; 392 PetscInt rstart_orig=mat->rmap->rstart; 393 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 394 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 395 PetscReal tmp; 396 MatScalar **HD = baij->hd,value; 397 #if defined(PETSC_USE_DEBUG) 398 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 399 #endif 400 401 PetscFunctionBegin; 402 if (v) PetscValidScalarPointer(v,6); 403 for (i=0; i<m; i++) { 404 #if defined(PETSC_USE_DEBUG) 405 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 406 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 407 #endif 408 row = im[i]; 409 if (row >= rstart_orig && row < rend_orig) { 410 for (j=0; j<n; j++) { 411 col = in[j]; 412 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 413 /* Look up PetscInto the Hash Table */ 414 key = (row/bs)*Nbs+(col/bs)+1; 415 h1 = HASH(size,key,tmp); 416 417 418 idx = h1; 419 #if defined(PETSC_USE_DEBUG) 420 insert_ct++; 421 total_ct++; 422 if (HT[idx] != key) { 423 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 424 if (idx == size) { 425 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 426 if (idx == h1) { 427 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 428 } 429 } 430 } 431 #else 432 if (HT[idx] != key) { 433 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 434 if (idx == size) { 435 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 436 if (idx == h1) { 437 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 438 } 439 } 440 } 441 #endif 442 /* A HASH table entry is found, so insert the values at the correct address */ 443 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 444 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 445 } 446 } else { 447 if (!baij->donotstash) { 448 if (roworiented) { 449 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 450 } else { 451 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 452 } 453 } 454 } 455 } 456 #if defined(PETSC_USE_DEBUG) 457 baij->ht_total_ct = total_ct; 458 baij->ht_insert_ct = insert_ct; 459 #endif 460 PetscFunctionReturn(0); 461 } 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 465 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 466 { 467 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 468 PetscTruth roworiented = baij->roworiented; 469 PetscErrorCode ierr; 470 PetscInt i,j,ii,jj,row,col; 471 PetscInt rstart=baij->rstartbs; 472 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 473 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 474 PetscReal tmp; 475 MatScalar **HD = baij->hd,*baij_a; 476 const PetscScalar *v_t,*value; 477 #if defined(PETSC_USE_DEBUG) 478 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 479 #endif 480 481 PetscFunctionBegin; 482 483 if (roworiented) { 484 stepval = (n-1)*bs; 485 } else { 486 stepval = (m-1)*bs; 487 } 488 for (i=0; i<m; i++) { 489 #if defined(PETSC_USE_DEBUG) 490 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 491 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 492 #endif 493 row = im[i]; 494 v_t = v + i*nbs2; 495 if (row >= rstart && row < rend) { 496 for (j=0; j<n; j++) { 497 col = in[j]; 498 499 /* Look up into the Hash Table */ 500 key = row*Nbs+col+1; 501 h1 = HASH(size,key,tmp); 502 503 idx = h1; 504 #if defined(PETSC_USE_DEBUG) 505 total_ct++; 506 insert_ct++; 507 if (HT[idx] != key) { 508 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 509 if (idx == size) { 510 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 511 if (idx == h1) { 512 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 513 } 514 } 515 } 516 #else 517 if (HT[idx] != key) { 518 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 519 if (idx == size) { 520 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 521 if (idx == h1) { 522 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 523 } 524 } 525 } 526 #endif 527 baij_a = HD[idx]; 528 if (roworiented) { 529 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 530 /* value = v + (i*(stepval+bs)+j)*bs; */ 531 value = v_t; 532 v_t += bs; 533 if (addv == ADD_VALUES) { 534 for (ii=0; ii<bs; ii++,value+=stepval) { 535 for (jj=ii; jj<bs2; jj+=bs) { 536 baij_a[jj] += *value++; 537 } 538 } 539 } else { 540 for (ii=0; ii<bs; ii++,value+=stepval) { 541 for (jj=ii; jj<bs2; jj+=bs) { 542 baij_a[jj] = *value++; 543 } 544 } 545 } 546 } else { 547 value = v + j*(stepval+bs)*bs + i*bs; 548 if (addv == ADD_VALUES) { 549 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 550 for (jj=0; jj<bs; jj++) { 551 baij_a[jj] += *value++; 552 } 553 } 554 } else { 555 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 556 for (jj=0; jj<bs; jj++) { 557 baij_a[jj] = *value++; 558 } 559 } 560 } 561 } 562 } 563 } else { 564 if (!baij->donotstash) { 565 if (roworiented) { 566 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 567 } else { 568 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 569 } 570 } 571 } 572 } 573 #if defined(PETSC_USE_DEBUG) 574 baij->ht_total_ct = total_ct; 575 baij->ht_insert_ct = insert_ct; 576 #endif 577 PetscFunctionReturn(0); 578 } 579 580 #undef __FUNCT__ 581 #define __FUNCT__ "MatGetValues_MPIBAIJ" 582 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 583 { 584 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 585 PetscErrorCode ierr; 586 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 587 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 588 589 PetscFunctionBegin; 590 for (i=0; i<m; i++) { 591 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 592 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 593 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 594 row = idxm[i] - bsrstart; 595 for (j=0; j<n; j++) { 596 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 597 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 598 if (idxn[j] >= bscstart && idxn[j] < bscend){ 599 col = idxn[j] - bscstart; 600 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 601 } else { 602 if (!baij->colmap) { 603 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 604 } 605 #if defined (PETSC_USE_CTABLE) 606 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 607 data --; 608 #else 609 data = baij->colmap[idxn[j]/bs]-1; 610 #endif 611 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 612 else { 613 col = data + idxn[j]%bs; 614 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 615 } 616 } 617 } 618 } else { 619 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 620 } 621 } 622 PetscFunctionReturn(0); 623 } 624 625 #undef __FUNCT__ 626 #define __FUNCT__ "MatNorm_MPIBAIJ" 627 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 628 { 629 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 630 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 631 PetscErrorCode ierr; 632 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 633 PetscReal sum = 0.0; 634 MatScalar *v; 635 636 PetscFunctionBegin; 637 if (baij->size == 1) { 638 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 639 } else { 640 if (type == NORM_FROBENIUS) { 641 v = amat->a; 642 nz = amat->nz*bs2; 643 for (i=0; i<nz; i++) { 644 #if defined(PETSC_USE_COMPLEX) 645 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 646 #else 647 sum += (*v)*(*v); v++; 648 #endif 649 } 650 v = bmat->a; 651 nz = bmat->nz*bs2; 652 for (i=0; i<nz; i++) { 653 #if defined(PETSC_USE_COMPLEX) 654 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 655 #else 656 sum += (*v)*(*v); v++; 657 #endif 658 } 659 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 660 *nrm = sqrt(*nrm); 661 } else if (type == NORM_1) { /* max column sum */ 662 PetscReal *tmp,*tmp2; 663 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 664 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 665 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 666 v = amat->a; jj = amat->j; 667 for (i=0; i<amat->nz; i++) { 668 for (j=0; j<bs; j++){ 669 col = bs*(cstart + *jj) + j; /* column index */ 670 for (row=0; row<bs; row++){ 671 tmp[col] += PetscAbsScalar(*v); v++; 672 } 673 } 674 jj++; 675 } 676 v = bmat->a; jj = bmat->j; 677 for (i=0; i<bmat->nz; i++) { 678 for (j=0; j<bs; j++){ 679 col = bs*garray[*jj] + j; 680 for (row=0; row<bs; row++){ 681 tmp[col] += PetscAbsScalar(*v); v++; 682 } 683 } 684 jj++; 685 } 686 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 687 *nrm = 0.0; 688 for (j=0; j<mat->cmap->N; j++) { 689 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 690 } 691 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 692 } else if (type == NORM_INFINITY) { /* max row sum */ 693 PetscReal *sums; 694 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 695 sum = 0.0; 696 for (j=0; j<amat->mbs; j++) { 697 for (row=0; row<bs; row++) sums[row] = 0.0; 698 v = amat->a + bs2*amat->i[j]; 699 nz = amat->i[j+1]-amat->i[j]; 700 for (i=0; i<nz; i++) { 701 for (col=0; col<bs; col++){ 702 for (row=0; row<bs; row++){ 703 sums[row] += PetscAbsScalar(*v); v++; 704 } 705 } 706 } 707 v = bmat->a + bs2*bmat->i[j]; 708 nz = bmat->i[j+1]-bmat->i[j]; 709 for (i=0; i<nz; i++) { 710 for (col=0; col<bs; col++){ 711 for (row=0; row<bs; row++){ 712 sums[row] += PetscAbsScalar(*v); v++; 713 } 714 } 715 } 716 for (row=0; row<bs; row++){ 717 if (sums[row] > sum) sum = sums[row]; 718 } 719 } 720 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 721 ierr = PetscFree(sums);CHKERRQ(ierr); 722 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 723 } 724 PetscFunctionReturn(0); 725 } 726 727 /* 728 Creates the hash table, and sets the table 729 This table is created only once. 730 If new entried need to be added to the matrix 731 then the hash table has to be destroyed and 732 recreated. 733 */ 734 #undef __FUNCT__ 735 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 736 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 737 { 738 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 739 Mat A = baij->A,B=baij->B; 740 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 741 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 742 PetscErrorCode ierr; 743 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 744 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 745 PetscInt *HT,key; 746 MatScalar **HD; 747 PetscReal tmp; 748 #if defined(PETSC_USE_INFO) 749 PetscInt ct=0,max=0; 750 #endif 751 752 PetscFunctionBegin; 753 if (baij->ht) PetscFunctionReturn(0); 754 755 baij->ht_size = (PetscInt)(factor*nz); 756 ht_size = baij->ht_size; 757 758 /* Allocate Memory for Hash Table */ 759 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 760 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 761 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 762 HD = baij->hd; 763 HT = baij->ht; 764 765 /* Loop Over A */ 766 for (i=0; i<a->mbs; i++) { 767 for (j=ai[i]; j<ai[i+1]; j++) { 768 row = i+rstart; 769 col = aj[j]+cstart; 770 771 key = row*Nbs + col + 1; 772 h1 = HASH(ht_size,key,tmp); 773 for (k=0; k<ht_size; k++){ 774 if (!HT[(h1+k)%ht_size]) { 775 HT[(h1+k)%ht_size] = key; 776 HD[(h1+k)%ht_size] = a->a + j*bs2; 777 break; 778 #if defined(PETSC_USE_INFO) 779 } else { 780 ct++; 781 #endif 782 } 783 } 784 #if defined(PETSC_USE_INFO) 785 if (k> max) max = k; 786 #endif 787 } 788 } 789 /* Loop Over B */ 790 for (i=0; i<b->mbs; i++) { 791 for (j=bi[i]; j<bi[i+1]; j++) { 792 row = i+rstart; 793 col = garray[bj[j]]; 794 key = row*Nbs + col + 1; 795 h1 = HASH(ht_size,key,tmp); 796 for (k=0; k<ht_size; k++){ 797 if (!HT[(h1+k)%ht_size]) { 798 HT[(h1+k)%ht_size] = key; 799 HD[(h1+k)%ht_size] = b->a + j*bs2; 800 break; 801 #if defined(PETSC_USE_INFO) 802 } else { 803 ct++; 804 #endif 805 } 806 } 807 #if defined(PETSC_USE_INFO) 808 if (k> max) max = k; 809 #endif 810 } 811 } 812 813 /* Print Summary */ 814 #if defined(PETSC_USE_INFO) 815 for (i=0,j=0; i<ht_size; i++) { 816 if (HT[i]) {j++;} 817 } 818 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 819 #endif 820 PetscFunctionReturn(0); 821 } 822 823 #undef __FUNCT__ 824 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 825 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 826 { 827 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 828 PetscErrorCode ierr; 829 PetscInt nstash,reallocs; 830 InsertMode addv; 831 832 PetscFunctionBegin; 833 if (baij->donotstash || mat->nooffprocentries) { 834 PetscFunctionReturn(0); 835 } 836 837 /* make sure all processors are either in INSERTMODE or ADDMODE */ 838 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 839 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 840 mat->insertmode = addv; /* in case this processor had no cache */ 841 842 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 843 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 844 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 845 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 846 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 847 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 848 PetscFunctionReturn(0); 849 } 850 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 853 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 854 { 855 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 856 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 857 PetscErrorCode ierr; 858 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 859 PetscInt *row,*col; 860 PetscTruth r1,r2,r3,other_disassembled; 861 MatScalar *val; 862 InsertMode addv = mat->insertmode; 863 PetscMPIInt n; 864 865 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 866 PetscFunctionBegin; 867 if (!baij->donotstash && !mat->nooffprocentries) { 868 while (1) { 869 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 870 if (!flg) break; 871 872 for (i=0; i<n;) { 873 /* Now identify the consecutive vals belonging to the same row */ 874 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 875 if (j < n) ncols = j-i; 876 else ncols = n-i; 877 /* Now assemble all these values with a single function call */ 878 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 879 i = j; 880 } 881 } 882 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 883 /* Now process the block-stash. Since the values are stashed column-oriented, 884 set the roworiented flag to column oriented, and after MatSetValues() 885 restore the original flags */ 886 r1 = baij->roworiented; 887 r2 = a->roworiented; 888 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 889 baij->roworiented = PETSC_FALSE; 890 a->roworiented = PETSC_FALSE; 891 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 892 while (1) { 893 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 894 if (!flg) break; 895 896 for (i=0; i<n;) { 897 /* Now identify the consecutive vals belonging to the same row */ 898 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 899 if (j < n) ncols = j-i; 900 else ncols = n-i; 901 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 902 i = j; 903 } 904 } 905 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 906 baij->roworiented = r1; 907 a->roworiented = r2; 908 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 909 } 910 911 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 912 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 913 914 /* determine if any processor has disassembled, if so we must 915 also disassemble ourselfs, in order that we may reassemble. */ 916 /* 917 if nonzero structure of submatrix B cannot change then we know that 918 no processor disassembled thus we can skip this stuff 919 */ 920 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 921 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 922 if (mat->was_assembled && !other_disassembled) { 923 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 924 } 925 } 926 927 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 928 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 929 } 930 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 931 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 932 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 933 934 #if defined(PETSC_USE_INFO) 935 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 936 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 937 baij->ht_total_ct = 0; 938 baij->ht_insert_ct = 0; 939 } 940 #endif 941 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 942 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 943 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 944 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 945 } 946 947 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 948 baij->rowvalues = 0; 949 PetscFunctionReturn(0); 950 } 951 952 #undef __FUNCT__ 953 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 954 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 955 { 956 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 957 PetscErrorCode ierr; 958 PetscMPIInt size = baij->size,rank = baij->rank; 959 PetscInt bs = mat->rmap->bs; 960 PetscTruth iascii,isdraw; 961 PetscViewer sviewer; 962 PetscViewerFormat format; 963 964 PetscFunctionBegin; 965 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 966 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 967 if (iascii) { 968 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 969 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 970 MatInfo info; 971 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 972 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 973 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 974 rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 975 mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 976 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 977 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 978 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 979 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 980 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 981 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 982 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 983 PetscFunctionReturn(0); 984 } else if (format == PETSC_VIEWER_ASCII_INFO) { 985 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 986 PetscFunctionReturn(0); 987 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 988 PetscFunctionReturn(0); 989 } 990 } 991 992 if (isdraw) { 993 PetscDraw draw; 994 PetscTruth isnull; 995 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 996 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 997 } 998 999 if (size == 1) { 1000 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1001 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1002 } else { 1003 /* assemble the entire matrix onto first processor. */ 1004 Mat A; 1005 Mat_SeqBAIJ *Aloc; 1006 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1007 MatScalar *a; 1008 1009 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1010 /* Perhaps this should be the type of mat? */ 1011 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1012 if (!rank) { 1013 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1014 } else { 1015 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1016 } 1017 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1018 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1019 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1020 1021 /* copy over the A part */ 1022 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1023 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1024 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1025 1026 for (i=0; i<mbs; i++) { 1027 rvals[0] = bs*(baij->rstartbs + i); 1028 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1029 for (j=ai[i]; j<ai[i+1]; j++) { 1030 col = (baij->cstartbs+aj[j])*bs; 1031 for (k=0; k<bs; k++) { 1032 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1033 col++; a += bs; 1034 } 1035 } 1036 } 1037 /* copy over the B part */ 1038 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1039 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1040 for (i=0; i<mbs; i++) { 1041 rvals[0] = bs*(baij->rstartbs + i); 1042 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1043 for (j=ai[i]; j<ai[i+1]; j++) { 1044 col = baij->garray[aj[j]]*bs; 1045 for (k=0; k<bs; k++) { 1046 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1047 col++; a += bs; 1048 } 1049 } 1050 } 1051 ierr = PetscFree(rvals);CHKERRQ(ierr); 1052 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1053 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1054 /* 1055 Everyone has to call to draw the matrix since the graphics waits are 1056 synchronized across all processors that share the PetscDraw object 1057 */ 1058 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1059 if (!rank) { 1060 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1061 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1062 } 1063 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1064 ierr = MatDestroy(A);CHKERRQ(ierr); 1065 } 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatView_MPIBAIJ" 1071 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1072 { 1073 PetscErrorCode ierr; 1074 PetscTruth iascii,isdraw,issocket,isbinary; 1075 1076 PetscFunctionBegin; 1077 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1078 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1079 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1080 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1081 if (iascii || isdraw || issocket || isbinary) { 1082 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1083 } else { 1084 SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1085 } 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1091 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1092 { 1093 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1094 PetscErrorCode ierr; 1095 1096 PetscFunctionBegin; 1097 #if defined(PETSC_USE_LOG) 1098 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1099 #endif 1100 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1101 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1102 if (baij->A){ierr = MatDestroy(baij->A);CHKERRQ(ierr);} 1103 if (baij->B){ierr = MatDestroy(baij->B);CHKERRQ(ierr);} 1104 #if defined (PETSC_USE_CTABLE) 1105 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 1106 #else 1107 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1108 #endif 1109 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1110 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1111 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1112 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1113 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1114 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1115 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1116 ierr = PetscFree(baij);CHKERRQ(ierr); 1117 1118 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1119 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1120 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1121 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1122 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1123 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1124 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1125 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1126 PetscFunctionReturn(0); 1127 } 1128 1129 #undef __FUNCT__ 1130 #define __FUNCT__ "MatMult_MPIBAIJ" 1131 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1132 { 1133 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1134 PetscErrorCode ierr; 1135 PetscInt nt; 1136 1137 PetscFunctionBegin; 1138 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1139 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1140 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1141 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1142 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1143 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1144 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1145 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1146 PetscFunctionReturn(0); 1147 } 1148 1149 #undef __FUNCT__ 1150 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1151 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1152 { 1153 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1154 PetscErrorCode ierr; 1155 1156 PetscFunctionBegin; 1157 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1158 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1159 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1160 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1161 PetscFunctionReturn(0); 1162 } 1163 1164 #undef __FUNCT__ 1165 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1166 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1167 { 1168 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1169 PetscErrorCode ierr; 1170 PetscTruth merged; 1171 1172 PetscFunctionBegin; 1173 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1174 /* do nondiagonal part */ 1175 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1176 if (!merged) { 1177 /* send it on its way */ 1178 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1179 /* do local part */ 1180 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1181 /* receive remote parts: note this assumes the values are not actually */ 1182 /* inserted in yy until the next line */ 1183 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1184 } else { 1185 /* do local part */ 1186 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1187 /* send it on its way */ 1188 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1189 /* values actually were received in the Begin() but we need to call this nop */ 1190 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1191 } 1192 PetscFunctionReturn(0); 1193 } 1194 1195 #undef __FUNCT__ 1196 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1197 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1198 { 1199 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1200 PetscErrorCode ierr; 1201 1202 PetscFunctionBegin; 1203 /* do nondiagonal part */ 1204 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1205 /* send it on its way */ 1206 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1207 /* do local part */ 1208 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1209 /* receive remote parts: note this assumes the values are not actually */ 1210 /* inserted in yy until the next line, which is true for my implementation*/ 1211 /* but is not perhaps always true. */ 1212 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1213 PetscFunctionReturn(0); 1214 } 1215 1216 /* 1217 This only works correctly for square matrices where the subblock A->A is the 1218 diagonal block 1219 */ 1220 #undef __FUNCT__ 1221 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1222 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1223 { 1224 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1225 PetscErrorCode ierr; 1226 1227 PetscFunctionBegin; 1228 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1229 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1230 PetscFunctionReturn(0); 1231 } 1232 1233 #undef __FUNCT__ 1234 #define __FUNCT__ "MatScale_MPIBAIJ" 1235 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1236 { 1237 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1238 PetscErrorCode ierr; 1239 1240 PetscFunctionBegin; 1241 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1242 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1243 PetscFunctionReturn(0); 1244 } 1245 1246 #undef __FUNCT__ 1247 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1248 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1249 { 1250 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1251 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1252 PetscErrorCode ierr; 1253 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1254 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1255 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1256 1257 PetscFunctionBegin; 1258 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1259 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1260 mat->getrowactive = PETSC_TRUE; 1261 1262 if (!mat->rowvalues && (idx || v)) { 1263 /* 1264 allocate enough space to hold information from the longest row. 1265 */ 1266 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1267 PetscInt max = 1,mbs = mat->mbs,tmp; 1268 for (i=0; i<mbs; i++) { 1269 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1270 if (max < tmp) { max = tmp; } 1271 } 1272 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1273 } 1274 lrow = row - brstart; 1275 1276 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1277 if (!v) {pvA = 0; pvB = 0;} 1278 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1279 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1280 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1281 nztot = nzA + nzB; 1282 1283 cmap = mat->garray; 1284 if (v || idx) { 1285 if (nztot) { 1286 /* Sort by increasing column numbers, assuming A and B already sorted */ 1287 PetscInt imark = -1; 1288 if (v) { 1289 *v = v_p = mat->rowvalues; 1290 for (i=0; i<nzB; i++) { 1291 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1292 else break; 1293 } 1294 imark = i; 1295 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1296 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1297 } 1298 if (idx) { 1299 *idx = idx_p = mat->rowindices; 1300 if (imark > -1) { 1301 for (i=0; i<imark; i++) { 1302 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1303 } 1304 } else { 1305 for (i=0; i<nzB; i++) { 1306 if (cmap[cworkB[i]/bs] < cstart) 1307 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1308 else break; 1309 } 1310 imark = i; 1311 } 1312 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1313 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1314 } 1315 } else { 1316 if (idx) *idx = 0; 1317 if (v) *v = 0; 1318 } 1319 } 1320 *nz = nztot; 1321 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1322 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1323 PetscFunctionReturn(0); 1324 } 1325 1326 #undef __FUNCT__ 1327 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1328 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1329 { 1330 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1331 1332 PetscFunctionBegin; 1333 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1334 baij->getrowactive = PETSC_FALSE; 1335 PetscFunctionReturn(0); 1336 } 1337 1338 #undef __FUNCT__ 1339 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1340 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1341 { 1342 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1343 PetscErrorCode ierr; 1344 1345 PetscFunctionBegin; 1346 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1347 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1348 PetscFunctionReturn(0); 1349 } 1350 1351 #undef __FUNCT__ 1352 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1353 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1354 { 1355 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1356 Mat A = a->A,B = a->B; 1357 PetscErrorCode ierr; 1358 PetscReal isend[5],irecv[5]; 1359 1360 PetscFunctionBegin; 1361 info->block_size = (PetscReal)matin->rmap->bs; 1362 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1363 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1364 isend[3] = info->memory; isend[4] = info->mallocs; 1365 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1366 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1367 isend[3] += info->memory; isend[4] += info->mallocs; 1368 if (flag == MAT_LOCAL) { 1369 info->nz_used = isend[0]; 1370 info->nz_allocated = isend[1]; 1371 info->nz_unneeded = isend[2]; 1372 info->memory = isend[3]; 1373 info->mallocs = isend[4]; 1374 } else if (flag == MAT_GLOBAL_MAX) { 1375 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1376 info->nz_used = irecv[0]; 1377 info->nz_allocated = irecv[1]; 1378 info->nz_unneeded = irecv[2]; 1379 info->memory = irecv[3]; 1380 info->mallocs = irecv[4]; 1381 } else if (flag == MAT_GLOBAL_SUM) { 1382 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1383 info->nz_used = irecv[0]; 1384 info->nz_allocated = irecv[1]; 1385 info->nz_unneeded = irecv[2]; 1386 info->memory = irecv[3]; 1387 info->mallocs = irecv[4]; 1388 } else { 1389 SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1390 } 1391 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1392 info->fill_ratio_needed = 0; 1393 info->factor_mallocs = 0; 1394 PetscFunctionReturn(0); 1395 } 1396 1397 #undef __FUNCT__ 1398 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1399 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg) 1400 { 1401 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1402 PetscErrorCode ierr; 1403 1404 PetscFunctionBegin; 1405 switch (op) { 1406 case MAT_NEW_NONZERO_LOCATIONS: 1407 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1408 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1409 case MAT_KEEP_NONZERO_PATTERN: 1410 case MAT_NEW_NONZERO_LOCATION_ERR: 1411 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1412 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1413 break; 1414 case MAT_ROW_ORIENTED: 1415 a->roworiented = flg; 1416 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1417 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1418 break; 1419 case MAT_NEW_DIAGONALS: 1420 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1421 break; 1422 case MAT_IGNORE_OFF_PROC_ENTRIES: 1423 a->donotstash = flg; 1424 break; 1425 case MAT_USE_HASH_TABLE: 1426 a->ht_flag = flg; 1427 break; 1428 case MAT_SYMMETRIC: 1429 case MAT_STRUCTURALLY_SYMMETRIC: 1430 case MAT_HERMITIAN: 1431 case MAT_SYMMETRY_ETERNAL: 1432 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1433 break; 1434 default: 1435 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op); 1436 } 1437 PetscFunctionReturn(0); 1438 } 1439 1440 #undef __FUNCT__ 1441 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1442 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1443 { 1444 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1445 Mat_SeqBAIJ *Aloc; 1446 Mat B; 1447 PetscErrorCode ierr; 1448 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1449 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1450 MatScalar *a; 1451 1452 PetscFunctionBegin; 1453 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1454 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1455 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1456 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1457 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1458 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1459 } else { 1460 B = *matout; 1461 } 1462 1463 /* copy over the A part */ 1464 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1465 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1466 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1467 1468 for (i=0; i<mbs; i++) { 1469 rvals[0] = bs*(baij->rstartbs + i); 1470 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1471 for (j=ai[i]; j<ai[i+1]; j++) { 1472 col = (baij->cstartbs+aj[j])*bs; 1473 for (k=0; k<bs; k++) { 1474 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1475 col++; a += bs; 1476 } 1477 } 1478 } 1479 /* copy over the B part */ 1480 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1481 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1482 for (i=0; i<mbs; i++) { 1483 rvals[0] = bs*(baij->rstartbs + i); 1484 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1485 for (j=ai[i]; j<ai[i+1]; j++) { 1486 col = baij->garray[aj[j]]*bs; 1487 for (k=0; k<bs; k++) { 1488 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1489 col++; a += bs; 1490 } 1491 } 1492 } 1493 ierr = PetscFree(rvals);CHKERRQ(ierr); 1494 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1495 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1496 1497 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1498 *matout = B; 1499 } else { 1500 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1501 } 1502 PetscFunctionReturn(0); 1503 } 1504 1505 #undef __FUNCT__ 1506 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1507 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1508 { 1509 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1510 Mat a = baij->A,b = baij->B; 1511 PetscErrorCode ierr; 1512 PetscInt s1,s2,s3; 1513 1514 PetscFunctionBegin; 1515 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1516 if (rr) { 1517 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1518 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1519 /* Overlap communication with computation. */ 1520 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1521 } 1522 if (ll) { 1523 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1524 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1525 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1526 } 1527 /* scale the diagonal block */ 1528 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1529 1530 if (rr) { 1531 /* Do a scatter end and then right scale the off-diagonal block */ 1532 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1533 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1534 } 1535 1536 PetscFunctionReturn(0); 1537 } 1538 1539 #undef __FUNCT__ 1540 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1541 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 1542 { 1543 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1544 PetscErrorCode ierr; 1545 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1546 PetscInt i,*owners = A->rmap->range; 1547 PetscInt *nprocs,j,idx,nsends,row; 1548 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1549 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1550 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1551 MPI_Comm comm = ((PetscObject)A)->comm; 1552 MPI_Request *send_waits,*recv_waits; 1553 MPI_Status recv_status,*send_status; 1554 #if defined(PETSC_DEBUG) 1555 PetscTruth found = PETSC_FALSE; 1556 #endif 1557 1558 PetscFunctionBegin; 1559 /* first count number of contributors to each processor */ 1560 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1561 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1562 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1563 j = 0; 1564 for (i=0; i<N; i++) { 1565 if (lastidx > (idx = rows[i])) j = 0; 1566 lastidx = idx; 1567 for (; j<size; j++) { 1568 if (idx >= owners[j] && idx < owners[j+1]) { 1569 nprocs[2*j]++; 1570 nprocs[2*j+1] = 1; 1571 owner[i] = j; 1572 #if defined(PETSC_DEBUG) 1573 found = PETSC_TRUE; 1574 #endif 1575 break; 1576 } 1577 } 1578 #if defined(PETSC_DEBUG) 1579 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1580 found = PETSC_FALSE; 1581 #endif 1582 } 1583 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1584 1585 /* inform other processors of number of messages and max length*/ 1586 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1587 1588 /* post receives: */ 1589 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1590 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1591 for (i=0; i<nrecvs; i++) { 1592 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1593 } 1594 1595 /* do sends: 1596 1) starts[i] gives the starting index in svalues for stuff going to 1597 the ith processor 1598 */ 1599 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1600 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1601 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1602 starts[0] = 0; 1603 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1604 for (i=0; i<N; i++) { 1605 svalues[starts[owner[i]]++] = rows[i]; 1606 } 1607 1608 starts[0] = 0; 1609 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1610 count = 0; 1611 for (i=0; i<size; i++) { 1612 if (nprocs[2*i+1]) { 1613 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1614 } 1615 } 1616 ierr = PetscFree(starts);CHKERRQ(ierr); 1617 1618 base = owners[rank]; 1619 1620 /* wait on receives */ 1621 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1622 count = nrecvs; 1623 slen = 0; 1624 while (count) { 1625 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1626 /* unpack receives into our local space */ 1627 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1628 source[imdex] = recv_status.MPI_SOURCE; 1629 lens[imdex] = n; 1630 slen += n; 1631 count--; 1632 } 1633 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1634 1635 /* move the data into the send scatter */ 1636 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1637 count = 0; 1638 for (i=0; i<nrecvs; i++) { 1639 values = rvalues + i*nmax; 1640 for (j=0; j<lens[i]; j++) { 1641 lrows[count++] = values[j] - base; 1642 } 1643 } 1644 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1645 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1646 ierr = PetscFree(owner);CHKERRQ(ierr); 1647 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1648 1649 /* actually zap the local rows */ 1650 /* 1651 Zero the required rows. If the "diagonal block" of the matrix 1652 is square and the user wishes to set the diagonal we use separate 1653 code so that MatSetValues() is not called for each diagonal allocating 1654 new memory, thus calling lots of mallocs and slowing things down. 1655 1656 */ 1657 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1658 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1659 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1660 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1661 } else if (diag != 0.0) { 1662 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1663 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1664 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1665 for (i=0; i<slen; i++) { 1666 row = lrows[i] + rstart_bs; 1667 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1668 } 1669 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1670 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1671 } else { 1672 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1673 } 1674 1675 ierr = PetscFree(lrows);CHKERRQ(ierr); 1676 1677 /* wait on sends */ 1678 if (nsends) { 1679 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1680 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1681 ierr = PetscFree(send_status);CHKERRQ(ierr); 1682 } 1683 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1684 ierr = PetscFree(svalues);CHKERRQ(ierr); 1685 1686 PetscFunctionReturn(0); 1687 } 1688 1689 #undef __FUNCT__ 1690 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1691 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1692 { 1693 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1694 PetscErrorCode ierr; 1695 1696 PetscFunctionBegin; 1697 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1698 PetscFunctionReturn(0); 1699 } 1700 1701 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1702 1703 #undef __FUNCT__ 1704 #define __FUNCT__ "MatEqual_MPIBAIJ" 1705 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1706 { 1707 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1708 Mat a,b,c,d; 1709 PetscTruth flg; 1710 PetscErrorCode ierr; 1711 1712 PetscFunctionBegin; 1713 a = matA->A; b = matA->B; 1714 c = matB->A; d = matB->B; 1715 1716 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1717 if (flg) { 1718 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1719 } 1720 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1721 PetscFunctionReturn(0); 1722 } 1723 1724 #undef __FUNCT__ 1725 #define __FUNCT__ "MatCopy_MPIBAIJ" 1726 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1727 { 1728 PetscErrorCode ierr; 1729 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1730 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1731 1732 PetscFunctionBegin; 1733 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1734 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1735 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1736 } else { 1737 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1738 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1739 } 1740 PetscFunctionReturn(0); 1741 } 1742 1743 #undef __FUNCT__ 1744 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1745 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1746 { 1747 PetscErrorCode ierr; 1748 1749 PetscFunctionBegin; 1750 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1751 PetscFunctionReturn(0); 1752 } 1753 1754 #undef __FUNCT__ 1755 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1756 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1757 { 1758 PetscErrorCode ierr; 1759 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1760 PetscBLASInt bnz,one=1; 1761 Mat_SeqBAIJ *x,*y; 1762 1763 PetscFunctionBegin; 1764 if (str == SAME_NONZERO_PATTERN) { 1765 PetscScalar alpha = a; 1766 x = (Mat_SeqBAIJ *)xx->A->data; 1767 y = (Mat_SeqBAIJ *)yy->A->data; 1768 bnz = PetscBLASIntCast(x->nz); 1769 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1770 x = (Mat_SeqBAIJ *)xx->B->data; 1771 y = (Mat_SeqBAIJ *)yy->B->data; 1772 bnz = PetscBLASIntCast(x->nz); 1773 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1774 } else { 1775 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1776 } 1777 PetscFunctionReturn(0); 1778 } 1779 1780 #undef __FUNCT__ 1781 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ" 1782 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs) 1783 { 1784 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1785 PetscInt rbs,cbs; 1786 PetscErrorCode ierr; 1787 1788 PetscFunctionBegin; 1789 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1790 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1791 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 1792 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 1793 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 1794 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 1795 PetscFunctionReturn(0); 1796 } 1797 1798 #undef __FUNCT__ 1799 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1800 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1801 { 1802 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1803 PetscErrorCode ierr; 1804 1805 PetscFunctionBegin; 1806 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1807 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1808 PetscFunctionReturn(0); 1809 } 1810 1811 #undef __FUNCT__ 1812 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1813 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1814 { 1815 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1816 PetscErrorCode ierr; 1817 1818 PetscFunctionBegin; 1819 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1820 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1821 PetscFunctionReturn(0); 1822 } 1823 1824 #undef __FUNCT__ 1825 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 1826 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1827 { 1828 PetscErrorCode ierr; 1829 IS iscol_local; 1830 PetscInt csize; 1831 1832 PetscFunctionBegin; 1833 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1834 if (call == MAT_REUSE_MATRIX) { 1835 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1836 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1837 } else { 1838 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1839 } 1840 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1841 if (call == MAT_INITIAL_MATRIX) { 1842 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1843 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 1844 } 1845 PetscFunctionReturn(0); 1846 } 1847 1848 #undef __FUNCT__ 1849 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 1850 /* 1851 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1852 in local and then by concatenating the local matrices the end result. 1853 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 1854 */ 1855 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 1856 { 1857 PetscErrorCode ierr; 1858 PetscMPIInt rank,size; 1859 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 1860 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 1861 Mat *local,M,Mreuse; 1862 MatScalar *vwork,*aa; 1863 MPI_Comm comm = ((PetscObject)mat)->comm; 1864 Mat_SeqBAIJ *aij; 1865 1866 1867 PetscFunctionBegin; 1868 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1869 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1870 1871 if (call == MAT_REUSE_MATRIX) { 1872 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1873 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1874 local = &Mreuse; 1875 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1876 } else { 1877 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1878 Mreuse = *local; 1879 ierr = PetscFree(local);CHKERRQ(ierr); 1880 } 1881 1882 /* 1883 m - number of local rows 1884 n - number of columns (same on all processors) 1885 rstart - first row in new global matrix generated 1886 */ 1887 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 1888 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1889 m = m/bs; 1890 n = n/bs; 1891 1892 if (call == MAT_INITIAL_MATRIX) { 1893 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1894 ii = aij->i; 1895 jj = aij->j; 1896 1897 /* 1898 Determine the number of non-zeros in the diagonal and off-diagonal 1899 portions of the matrix in order to do correct preallocation 1900 */ 1901 1902 /* first get start and end of "diagonal" columns */ 1903 if (csize == PETSC_DECIDE) { 1904 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 1905 if (mglobal == n*bs) { /* square matrix */ 1906 nlocal = m; 1907 } else { 1908 nlocal = n/size + ((n % size) > rank); 1909 } 1910 } else { 1911 nlocal = csize/bs; 1912 } 1913 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1914 rstart = rend - nlocal; 1915 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 1916 1917 /* next, compute all the lengths */ 1918 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 1919 olens = dlens + m; 1920 for (i=0; i<m; i++) { 1921 jend = ii[i+1] - ii[i]; 1922 olen = 0; 1923 dlen = 0; 1924 for (j=0; j<jend; j++) { 1925 if (*jj < rstart || *jj >= rend) olen++; 1926 else dlen++; 1927 jj++; 1928 } 1929 olens[i] = olen; 1930 dlens[i] = dlen; 1931 } 1932 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 1933 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 1934 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 1935 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 1936 ierr = PetscFree(dlens);CHKERRQ(ierr); 1937 } else { 1938 PetscInt ml,nl; 1939 1940 M = *newmat; 1941 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1942 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1943 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1944 /* 1945 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1946 rather than the slower MatSetValues(). 1947 */ 1948 M->was_assembled = PETSC_TRUE; 1949 M->assembled = PETSC_FALSE; 1950 } 1951 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1952 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1953 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1954 ii = aij->i; 1955 jj = aij->j; 1956 aa = aij->a; 1957 for (i=0; i<m; i++) { 1958 row = rstart/bs + i; 1959 nz = ii[i+1] - ii[i]; 1960 cwork = jj; jj += nz; 1961 vwork = aa; aa += nz; 1962 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1963 } 1964 1965 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1966 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1967 *newmat = M; 1968 1969 /* save submatrix used in processor for next request */ 1970 if (call == MAT_INITIAL_MATRIX) { 1971 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 1972 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 1973 } 1974 1975 PetscFunctionReturn(0); 1976 } 1977 1978 #undef __FUNCT__ 1979 #define __FUNCT__ "MatPermute_MPIBAIJ" 1980 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 1981 { 1982 MPI_Comm comm,pcomm; 1983 PetscInt first,local_size,nrows; 1984 const PetscInt *rows; 1985 PetscMPIInt size; 1986 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1987 PetscErrorCode ierr; 1988 1989 PetscFunctionBegin; 1990 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1991 /* make a collective version of 'rowp' */ 1992 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1993 if (pcomm==comm) { 1994 crowp = rowp; 1995 } else { 1996 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1997 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1998 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1999 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2000 } 2001 /* collect the global row permutation and invert it */ 2002 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2003 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2004 if (pcomm!=comm) { 2005 ierr = ISDestroy(crowp);CHKERRQ(ierr); 2006 } 2007 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2008 /* get the local target indices */ 2009 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2010 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2011 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2012 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 2013 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2014 ierr = ISDestroy(irowp);CHKERRQ(ierr); 2015 /* the column permutation is so much easier; 2016 make a local version of 'colp' and invert it */ 2017 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2018 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2019 if (size==1) { 2020 lcolp = colp; 2021 } else { 2022 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2023 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2024 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 2025 } 2026 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2027 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2028 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2029 if (size>1) { 2030 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2031 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 2032 } 2033 /* now we just get the submatrix */ 2034 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2035 /* clean up */ 2036 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 2037 ierr = ISDestroy(icolp);CHKERRQ(ierr); 2038 PetscFunctionReturn(0); 2039 } 2040 2041 #undef __FUNCT__ 2042 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2043 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2044 { 2045 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2046 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2047 2048 PetscFunctionBegin; 2049 if (nghosts) { *nghosts = B->nbs;} 2050 if (ghosts) {*ghosts = baij->garray;} 2051 PetscFunctionReturn(0); 2052 } 2053 2054 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat); 2055 2056 #undef __FUNCT__ 2057 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2058 /* 2059 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2060 */ 2061 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2062 { 2063 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2064 PetscErrorCode ierr; 2065 PetscMPIInt size,*ncolsonproc,*disp,nn; 2066 PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col; 2067 const PetscInt *is; 2068 PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj; 2069 PetscInt *rowhit,M,cstart,cend,colb; 2070 PetscInt *columnsforrow,l; 2071 IS *isa; 2072 PetscTruth done,flg; 2073 ISLocalToGlobalMapping map = mat->bmapping; 2074 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2075 2076 PetscFunctionBegin; 2077 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2078 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2079 2080 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2081 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2082 M = mat->rmap->n/bs; 2083 cstart = mat->cmap->rstart/bs; 2084 cend = mat->cmap->rend/bs; 2085 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2086 c->N = mat->cmap->N/bs; 2087 c->m = mat->rmap->n/bs; 2088 c->rstart = mat->rmap->rstart/bs; 2089 2090 c->ncolors = nis; 2091 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2092 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2093 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2094 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2095 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2096 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2097 2098 /* Allow access to data structures of local part of matrix */ 2099 if (!baij->colmap) { 2100 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2101 } 2102 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2103 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2104 2105 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2106 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2107 2108 for (i=0; i<nis; i++) { 2109 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2110 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2111 c->ncolumns[i] = n; 2112 if (n) { 2113 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2114 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2115 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2116 } else { 2117 c->columns[i] = 0; 2118 } 2119 2120 if (ctype == IS_COLORING_GLOBAL){ 2121 /* Determine the total (parallel) number of columns of this color */ 2122 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2123 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2124 2125 nn = PetscMPIIntCast(n); 2126 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2127 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2128 if (!nctot) { 2129 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2130 } 2131 2132 disp[0] = 0; 2133 for (j=1; j<size; j++) { 2134 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2135 } 2136 2137 /* Get complete list of columns for color on each processor */ 2138 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2139 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2140 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2141 } else if (ctype == IS_COLORING_GHOSTED){ 2142 /* Determine local number of columns of this color on this process, including ghost points */ 2143 nctot = n; 2144 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2145 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2146 } else { 2147 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2148 } 2149 2150 /* 2151 Mark all rows affect by these columns 2152 */ 2153 /* Temporary option to allow for debugging/testing */ 2154 flg = PETSC_FALSE; 2155 ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2156 if (!flg) {/*-----------------------------------------------------------------------------*/ 2157 /* crude, fast version */ 2158 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2159 /* loop over columns*/ 2160 for (j=0; j<nctot; j++) { 2161 if (ctype == IS_COLORING_GHOSTED) { 2162 col = ltog[cols[j]]; 2163 } else { 2164 col = cols[j]; 2165 } 2166 if (col >= cstart && col < cend) { 2167 /* column is in diagonal block of matrix */ 2168 rows = A_cj + A_ci[col-cstart]; 2169 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2170 } else { 2171 #if defined (PETSC_USE_CTABLE) 2172 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2173 colb --; 2174 #else 2175 colb = baij->colmap[col] - 1; 2176 #endif 2177 if (colb == -1) { 2178 m = 0; 2179 } else { 2180 colb = colb/bs; 2181 rows = B_cj + B_ci[colb]; 2182 m = B_ci[colb+1] - B_ci[colb]; 2183 } 2184 } 2185 /* loop over columns marking them in rowhit */ 2186 for (k=0; k<m; k++) { 2187 rowhit[*rows++] = col + 1; 2188 } 2189 } 2190 2191 /* count the number of hits */ 2192 nrows = 0; 2193 for (j=0; j<M; j++) { 2194 if (rowhit[j]) nrows++; 2195 } 2196 c->nrows[i] = nrows; 2197 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2198 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2199 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2200 nrows = 0; 2201 for (j=0; j<M; j++) { 2202 if (rowhit[j]) { 2203 c->rows[i][nrows] = j; 2204 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2205 nrows++; 2206 } 2207 } 2208 } else {/*-------------------------------------------------------------------------------*/ 2209 /* slow version, using rowhit as a linked list */ 2210 PetscInt currentcol,fm,mfm; 2211 rowhit[M] = M; 2212 nrows = 0; 2213 /* loop over columns*/ 2214 for (j=0; j<nctot; j++) { 2215 if (ctype == IS_COLORING_GHOSTED) { 2216 col = ltog[cols[j]]; 2217 } else { 2218 col = cols[j]; 2219 } 2220 if (col >= cstart && col < cend) { 2221 /* column is in diagonal block of matrix */ 2222 rows = A_cj + A_ci[col-cstart]; 2223 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2224 } else { 2225 #if defined (PETSC_USE_CTABLE) 2226 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2227 colb --; 2228 #else 2229 colb = baij->colmap[col] - 1; 2230 #endif 2231 if (colb == -1) { 2232 m = 0; 2233 } else { 2234 colb = colb/bs; 2235 rows = B_cj + B_ci[colb]; 2236 m = B_ci[colb+1] - B_ci[colb]; 2237 } 2238 } 2239 2240 /* loop over columns marking them in rowhit */ 2241 fm = M; /* fm points to first entry in linked list */ 2242 for (k=0; k<m; k++) { 2243 currentcol = *rows++; 2244 /* is it already in the list? */ 2245 do { 2246 mfm = fm; 2247 fm = rowhit[fm]; 2248 } while (fm < currentcol); 2249 /* not in list so add it */ 2250 if (fm != currentcol) { 2251 nrows++; 2252 columnsforrow[currentcol] = col; 2253 /* next three lines insert new entry into linked list */ 2254 rowhit[mfm] = currentcol; 2255 rowhit[currentcol] = fm; 2256 fm = currentcol; 2257 /* fm points to present position in list since we know the columns are sorted */ 2258 } else { 2259 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2260 } 2261 } 2262 } 2263 c->nrows[i] = nrows; 2264 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2265 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2266 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2267 /* now store the linked list of rows into c->rows[i] */ 2268 nrows = 0; 2269 fm = rowhit[M]; 2270 do { 2271 c->rows[i][nrows] = fm; 2272 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2273 fm = rowhit[fm]; 2274 } while (fm < M); 2275 } /* ---------------------------------------------------------------------------------------*/ 2276 ierr = PetscFree(cols);CHKERRQ(ierr); 2277 } 2278 2279 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2280 /* 2281 vscale will contain the "diagonal" on processor scalings followed by the off processor 2282 */ 2283 if (ctype == IS_COLORING_GLOBAL) { 2284 PetscInt *garray; 2285 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2286 for (i=0; i<baij->B->cmap->n/bs; i++) { 2287 for (j=0; j<bs; j++) { 2288 garray[i*bs+j] = bs*baij->garray[i]+j; 2289 } 2290 } 2291 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2292 ierr = PetscFree(garray);CHKERRQ(ierr); 2293 CHKMEMQ; 2294 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2295 for (k=0; k<c->ncolors; k++) { 2296 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2297 for (l=0; l<c->nrows[k]; l++) { 2298 col = c->columnsforrow[k][l]; 2299 if (col >= cstart && col < cend) { 2300 /* column is in diagonal block of matrix */ 2301 colb = col - cstart; 2302 } else { 2303 /* column is in "off-processor" part */ 2304 #if defined (PETSC_USE_CTABLE) 2305 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2306 colb --; 2307 #else 2308 colb = baij->colmap[col] - 1; 2309 #endif 2310 colb = colb/bs; 2311 colb += cend - cstart; 2312 } 2313 c->vscaleforrow[k][l] = colb; 2314 } 2315 } 2316 } else if (ctype == IS_COLORING_GHOSTED) { 2317 /* Get gtol mapping */ 2318 PetscInt N = mat->cmap->N, *gtol; 2319 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2320 for (i=0; i<N; i++) gtol[i] = -1; 2321 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2322 2323 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2324 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2325 for (k=0; k<c->ncolors; k++) { 2326 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2327 for (l=0; l<c->nrows[k]; l++) { 2328 col = c->columnsforrow[k][l]; /* global column index */ 2329 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2330 } 2331 } 2332 ierr = PetscFree(gtol);CHKERRQ(ierr); 2333 } 2334 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2335 2336 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2337 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2338 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2339 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2340 CHKMEMQ; 2341 PetscFunctionReturn(0); 2342 } 2343 2344 #undef __FUNCT__ 2345 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ" 2346 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat) 2347 { 2348 Mat B; 2349 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2350 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2351 Mat_SeqAIJ *b; 2352 PetscErrorCode ierr; 2353 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2354 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2355 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2356 2357 PetscFunctionBegin; 2358 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2359 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2360 2361 /* ---------------------------------------------------------------- 2362 Tell every processor the number of nonzeros per row 2363 */ 2364 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2365 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2366 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2367 } 2368 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2369 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2370 displs = recvcounts + size; 2371 for (i=0; i<size; i++) { 2372 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2373 displs[i] = A->rmap->range[i]/bs; 2374 } 2375 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2376 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2377 #else 2378 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2379 #endif 2380 /* --------------------------------------------------------------- 2381 Create the sequential matrix of the same type as the local block diagonal 2382 */ 2383 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2384 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2385 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2386 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2387 b = (Mat_SeqAIJ *)B->data; 2388 2389 /*-------------------------------------------------------------------- 2390 Copy my part of matrix column indices over 2391 */ 2392 sendcount = ad->nz + bd->nz; 2393 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2394 a_jsendbuf = ad->j; 2395 b_jsendbuf = bd->j; 2396 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2397 cnt = 0; 2398 for (i=0; i<n; i++) { 2399 2400 /* put in lower diagonal portion */ 2401 m = bd->i[i+1] - bd->i[i]; 2402 while (m > 0) { 2403 /* is it above diagonal (in bd (compressed) numbering) */ 2404 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2405 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2406 m--; 2407 } 2408 2409 /* put in diagonal portion */ 2410 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2411 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2412 } 2413 2414 /* put in upper diagonal portion */ 2415 while (m-- > 0) { 2416 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2417 } 2418 } 2419 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2420 2421 /*-------------------------------------------------------------------- 2422 Gather all column indices to all processors 2423 */ 2424 for (i=0; i<size; i++) { 2425 recvcounts[i] = 0; 2426 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2427 recvcounts[i] += lens[j]; 2428 } 2429 } 2430 displs[0] = 0; 2431 for (i=1; i<size; i++) { 2432 displs[i] = displs[i-1] + recvcounts[i-1]; 2433 } 2434 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2435 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2436 #else 2437 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2438 #endif 2439 /*-------------------------------------------------------------------- 2440 Assemble the matrix into useable form (note numerical values not yet set) 2441 */ 2442 /* set the b->ilen (length of each row) values */ 2443 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2444 /* set the b->i indices */ 2445 b->i[0] = 0; 2446 for (i=1; i<=A->rmap->N/bs; i++) { 2447 b->i[i] = b->i[i-1] + lens[i-1]; 2448 } 2449 ierr = PetscFree(lens);CHKERRQ(ierr); 2450 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2451 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2452 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2453 2454 if (A->symmetric){ 2455 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2456 } else if (A->hermitian) { 2457 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2458 } else if (A->structurally_symmetric) { 2459 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2460 } 2461 *newmat = B; 2462 PetscFunctionReturn(0); 2463 } 2464 2465 #undef __FUNCT__ 2466 #define __FUNCT__ "MatSOR_MPIBAIJ" 2467 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2468 { 2469 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2470 PetscErrorCode ierr; 2471 Vec bb1 = 0; 2472 2473 PetscFunctionBegin; 2474 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2475 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2476 } 2477 2478 if (flag == SOR_APPLY_UPPER) { 2479 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2480 PetscFunctionReturn(0); 2481 } 2482 2483 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2484 if (flag & SOR_ZERO_INITIAL_GUESS) { 2485 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2486 its--; 2487 } 2488 2489 while (its--) { 2490 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2491 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2492 2493 /* update rhs: bb1 = bb - B*x */ 2494 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2495 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2496 2497 /* local sweep */ 2498 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2499 } 2500 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2501 if (flag & SOR_ZERO_INITIAL_GUESS) { 2502 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2503 its--; 2504 } 2505 while (its--) { 2506 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2507 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2508 2509 /* update rhs: bb1 = bb - B*x */ 2510 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2511 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2512 2513 /* local sweep */ 2514 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2515 } 2516 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2517 if (flag & SOR_ZERO_INITIAL_GUESS) { 2518 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2519 its--; 2520 } 2521 while (its--) { 2522 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2523 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2524 2525 /* update rhs: bb1 = bb - B*x */ 2526 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2527 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2528 2529 /* local sweep */ 2530 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2531 } 2532 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2533 2534 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 2535 PetscFunctionReturn(0); 2536 } 2537 2538 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2539 2540 2541 /* -------------------------------------------------------------------*/ 2542 static struct _MatOps MatOps_Values = { 2543 MatSetValues_MPIBAIJ, 2544 MatGetRow_MPIBAIJ, 2545 MatRestoreRow_MPIBAIJ, 2546 MatMult_MPIBAIJ, 2547 /* 4*/ MatMultAdd_MPIBAIJ, 2548 MatMultTranspose_MPIBAIJ, 2549 MatMultTransposeAdd_MPIBAIJ, 2550 0, 2551 0, 2552 0, 2553 /*10*/ 0, 2554 0, 2555 0, 2556 MatSOR_MPIBAIJ, 2557 MatTranspose_MPIBAIJ, 2558 /*15*/ MatGetInfo_MPIBAIJ, 2559 MatEqual_MPIBAIJ, 2560 MatGetDiagonal_MPIBAIJ, 2561 MatDiagonalScale_MPIBAIJ, 2562 MatNorm_MPIBAIJ, 2563 /*20*/ MatAssemblyBegin_MPIBAIJ, 2564 MatAssemblyEnd_MPIBAIJ, 2565 MatSetOption_MPIBAIJ, 2566 MatZeroEntries_MPIBAIJ, 2567 /*24*/ MatZeroRows_MPIBAIJ, 2568 0, 2569 0, 2570 0, 2571 0, 2572 /*29*/ MatSetUpPreallocation_MPIBAIJ, 2573 0, 2574 0, 2575 0, 2576 0, 2577 /*34*/ MatDuplicate_MPIBAIJ, 2578 0, 2579 0, 2580 0, 2581 0, 2582 /*39*/ MatAXPY_MPIBAIJ, 2583 MatGetSubMatrices_MPIBAIJ, 2584 MatIncreaseOverlap_MPIBAIJ, 2585 MatGetValues_MPIBAIJ, 2586 MatCopy_MPIBAIJ, 2587 /*44*/ 0, 2588 MatScale_MPIBAIJ, 2589 0, 2590 0, 2591 0, 2592 /*49*/ MatSetBlockSize_MPIBAIJ, 2593 0, 2594 0, 2595 0, 2596 0, 2597 /*54*/ MatFDColoringCreate_MPIBAIJ, 2598 0, 2599 MatSetUnfactored_MPIBAIJ, 2600 MatPermute_MPIBAIJ, 2601 MatSetValuesBlocked_MPIBAIJ, 2602 /*59*/ MatGetSubMatrix_MPIBAIJ, 2603 MatDestroy_MPIBAIJ, 2604 MatView_MPIBAIJ, 2605 0, 2606 0, 2607 /*64*/ 0, 2608 0, 2609 0, 2610 0, 2611 0, 2612 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2613 0, 2614 0, 2615 0, 2616 0, 2617 /*74*/ 0, 2618 MatFDColoringApply_BAIJ, 2619 0, 2620 0, 2621 0, 2622 /*79*/ 0, 2623 0, 2624 0, 2625 0, 2626 MatLoad_MPIBAIJ, 2627 /*84*/ 0, 2628 0, 2629 0, 2630 0, 2631 0, 2632 /*89*/ 0, 2633 0, 2634 0, 2635 0, 2636 0, 2637 /*94*/ 0, 2638 0, 2639 0, 2640 0, 2641 0, 2642 /*99*/ 0, 2643 0, 2644 0, 2645 0, 2646 0, 2647 /*104*/0, 2648 MatRealPart_MPIBAIJ, 2649 MatImaginaryPart_MPIBAIJ, 2650 0, 2651 0, 2652 /*109*/0, 2653 0, 2654 0, 2655 0, 2656 0, 2657 /*114*/MatGetSeqNonzerostructure_MPIBAIJ, 2658 0, 2659 MatGetGhosts_MPIBAIJ, 2660 0, 2661 0, 2662 /*119*/0, 2663 0, 2664 0, 2665 0 2666 }; 2667 2668 EXTERN_C_BEGIN 2669 #undef __FUNCT__ 2670 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2671 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2672 { 2673 PetscFunctionBegin; 2674 *a = ((Mat_MPIBAIJ *)A->data)->A; 2675 *iscopy = PETSC_FALSE; 2676 PetscFunctionReturn(0); 2677 } 2678 EXTERN_C_END 2679 2680 EXTERN_C_BEGIN 2681 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2682 EXTERN_C_END 2683 2684 EXTERN_C_BEGIN 2685 #undef __FUNCT__ 2686 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2687 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2688 { 2689 PetscInt m,rstart,cstart,cend; 2690 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2691 const PetscInt *JJ=0; 2692 PetscScalar *values=0; 2693 PetscErrorCode ierr; 2694 2695 PetscFunctionBegin; 2696 2697 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2698 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2699 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2700 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2701 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2702 m = B->rmap->n/bs; 2703 rstart = B->rmap->rstart/bs; 2704 cstart = B->cmap->rstart/bs; 2705 cend = B->cmap->rend/bs; 2706 2707 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2708 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2709 for (i=0; i<m; i++) { 2710 nz = ii[i+1] - ii[i]; 2711 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2712 nz_max = PetscMax(nz_max,nz); 2713 JJ = jj + ii[i]; 2714 for (j=0; j<nz; j++) { 2715 if (*JJ >= cstart) break; 2716 JJ++; 2717 } 2718 d = 0; 2719 for (; j<nz; j++) { 2720 if (*JJ++ >= cend) break; 2721 d++; 2722 } 2723 d_nnz[i] = d; 2724 o_nnz[i] = nz - d; 2725 } 2726 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2727 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2728 2729 values = (PetscScalar*)V; 2730 if (!values) { 2731 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2732 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2733 } 2734 for (i=0; i<m; i++) { 2735 PetscInt row = i + rstart; 2736 PetscInt ncols = ii[i+1] - ii[i]; 2737 const PetscInt *icols = jj + ii[i]; 2738 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2739 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2740 } 2741 2742 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2743 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2744 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2745 2746 PetscFunctionReturn(0); 2747 } 2748 EXTERN_C_END 2749 2750 #undef __FUNCT__ 2751 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2752 /*@C 2753 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2754 (the default parallel PETSc format). 2755 2756 Collective on MPI_Comm 2757 2758 Input Parameters: 2759 + A - the matrix 2760 . i - the indices into j for the start of each local row (starts with zero) 2761 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2762 - v - optional values in the matrix 2763 2764 Level: developer 2765 2766 .keywords: matrix, aij, compressed row, sparse, parallel 2767 2768 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2769 @*/ 2770 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2771 { 2772 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2773 2774 PetscFunctionBegin; 2775 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2776 if (f) { 2777 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2778 } 2779 PetscFunctionReturn(0); 2780 } 2781 2782 EXTERN_C_BEGIN 2783 #undef __FUNCT__ 2784 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2785 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2786 { 2787 Mat_MPIBAIJ *b; 2788 PetscErrorCode ierr; 2789 PetscInt i, newbs = PetscAbs(bs); 2790 2791 PetscFunctionBegin; 2792 if (bs < 0) { 2793 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2794 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2795 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2796 bs = PetscAbs(bs); 2797 } 2798 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 2799 bs = newbs; 2800 2801 2802 if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2803 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2804 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2805 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2806 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2807 2808 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2809 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2810 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2811 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2812 2813 if (d_nnz) { 2814 for (i=0; i<B->rmap->n/bs; i++) { 2815 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2816 } 2817 } 2818 if (o_nnz) { 2819 for (i=0; i<B->rmap->n/bs; i++) { 2820 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2821 } 2822 } 2823 2824 b = (Mat_MPIBAIJ*)B->data; 2825 b->bs2 = bs*bs; 2826 b->mbs = B->rmap->n/bs; 2827 b->nbs = B->cmap->n/bs; 2828 b->Mbs = B->rmap->N/bs; 2829 b->Nbs = B->cmap->N/bs; 2830 2831 for (i=0; i<=b->size; i++) { 2832 b->rangebs[i] = B->rmap->range[i]/bs; 2833 } 2834 b->rstartbs = B->rmap->rstart/bs; 2835 b->rendbs = B->rmap->rend/bs; 2836 b->cstartbs = B->cmap->rstart/bs; 2837 b->cendbs = B->cmap->rend/bs; 2838 2839 if (!B->preallocated) { 2840 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2841 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2842 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2843 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2844 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2845 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2846 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2847 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2848 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 2849 } 2850 2851 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2852 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2853 B->preallocated = PETSC_TRUE; 2854 PetscFunctionReturn(0); 2855 } 2856 EXTERN_C_END 2857 2858 EXTERN_C_BEGIN 2859 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2860 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2861 EXTERN_C_END 2862 2863 2864 EXTERN_C_BEGIN 2865 #undef __FUNCT__ 2866 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 2867 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj) 2868 { 2869 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2870 PetscErrorCode ierr; 2871 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2872 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2873 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2874 2875 PetscFunctionBegin; 2876 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 2877 ii[0] = 0; 2878 CHKMEMQ; 2879 for (i=0; i<M; i++) { 2880 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2881 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2882 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2883 /* remove one from count of matrix has diagonal */ 2884 for (j=id[i]; j<id[i+1]; j++) { 2885 if (jd[j] == i) {ii[i+1]--;break;} 2886 } 2887 CHKMEMQ; 2888 } 2889 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2890 cnt = 0; 2891 for (i=0; i<M; i++) { 2892 for (j=io[i]; j<io[i+1]; j++) { 2893 if (garray[jo[j]] > rstart) break; 2894 jj[cnt++] = garray[jo[j]]; 2895 CHKMEMQ; 2896 } 2897 for (k=id[i]; k<id[i+1]; k++) { 2898 if (jd[k] != i) { 2899 jj[cnt++] = rstart + jd[k]; 2900 CHKMEMQ; 2901 } 2902 } 2903 for (;j<io[i+1]; j++) { 2904 jj[cnt++] = garray[jo[j]]; 2905 CHKMEMQ; 2906 } 2907 } 2908 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 2909 PetscFunctionReturn(0); 2910 } 2911 EXTERN_C_END 2912 2913 EXTERN_C_BEGIN 2914 #if defined(PETSC_HAVE_MUMPS) 2915 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 2916 #endif 2917 EXTERN_C_END 2918 2919 /*MC 2920 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2921 2922 Options Database Keys: 2923 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2924 . -mat_block_size <bs> - set the blocksize used to store the matrix 2925 - -mat_use_hash_table <fact> 2926 2927 Level: beginner 2928 2929 .seealso: MatCreateMPIBAIJ 2930 M*/ 2931 2932 EXTERN_C_BEGIN 2933 #undef __FUNCT__ 2934 #define __FUNCT__ "MatCreate_MPIBAIJ" 2935 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2936 { 2937 Mat_MPIBAIJ *b; 2938 PetscErrorCode ierr; 2939 PetscTruth flg; 2940 2941 PetscFunctionBegin; 2942 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2943 B->data = (void*)b; 2944 2945 2946 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2947 B->mapping = 0; 2948 B->assembled = PETSC_FALSE; 2949 2950 B->insertmode = NOT_SET_VALUES; 2951 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 2952 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 2953 2954 /* build local table of row and column ownerships */ 2955 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 2956 2957 /* build cache for off array entries formed */ 2958 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 2959 b->donotstash = PETSC_FALSE; 2960 b->colmap = PETSC_NULL; 2961 b->garray = PETSC_NULL; 2962 b->roworiented = PETSC_TRUE; 2963 2964 /* stuff used in block assembly */ 2965 b->barray = 0; 2966 2967 /* stuff used for matrix vector multiply */ 2968 b->lvec = 0; 2969 b->Mvctx = 0; 2970 2971 /* stuff for MatGetRow() */ 2972 b->rowindices = 0; 2973 b->rowvalues = 0; 2974 b->getrowactive = PETSC_FALSE; 2975 2976 /* hash table stuff */ 2977 b->ht = 0; 2978 b->hd = 0; 2979 b->ht_size = 0; 2980 b->ht_flag = PETSC_FALSE; 2981 b->ht_fact = 0; 2982 b->ht_total_ct = 0; 2983 b->ht_insert_ct = 0; 2984 2985 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 2986 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2987 if (flg) { 2988 PetscReal fact = 1.39; 2989 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2990 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 2991 if (fact <= 1.0) fact = 1.39; 2992 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2993 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2994 } 2995 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2996 2997 #if defined(PETSC_HAVE_MUMPS) 2998 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 2999 #endif 3000 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3001 "MatConvert_MPIBAIJ_MPIAdj", 3002 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3003 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3004 "MatStoreValues_MPIBAIJ", 3005 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3006 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3007 "MatRetrieveValues_MPIBAIJ", 3008 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3009 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3010 "MatGetDiagonalBlock_MPIBAIJ", 3011 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3012 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3013 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3014 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3015 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3016 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3017 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3018 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3019 "MatDiagonalScaleLocal_MPIBAIJ", 3020 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3021 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3022 "MatSetHashTableFactor_MPIBAIJ", 3023 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3024 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3025 PetscFunctionReturn(0); 3026 } 3027 EXTERN_C_END 3028 3029 /*MC 3030 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3031 3032 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3033 and MATMPIBAIJ otherwise. 3034 3035 Options Database Keys: 3036 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3037 3038 Level: beginner 3039 3040 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3041 M*/ 3042 3043 EXTERN_C_BEGIN 3044 #undef __FUNCT__ 3045 #define __FUNCT__ "MatCreate_BAIJ" 3046 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 3047 { 3048 PetscErrorCode ierr; 3049 PetscMPIInt size; 3050 3051 PetscFunctionBegin; 3052 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3053 if (size == 1) { 3054 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 3055 } else { 3056 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 3057 } 3058 PetscFunctionReturn(0); 3059 } 3060 EXTERN_C_END 3061 3062 #undef __FUNCT__ 3063 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3064 /*@C 3065 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3066 (block compressed row). For good matrix assembly performance 3067 the user should preallocate the matrix storage by setting the parameters 3068 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3069 performance can be increased by more than a factor of 50. 3070 3071 Collective on Mat 3072 3073 Input Parameters: 3074 + A - the matrix 3075 . bs - size of blockk 3076 . d_nz - number of block nonzeros per block row in diagonal portion of local 3077 submatrix (same for all local rows) 3078 . d_nnz - array containing the number of block nonzeros in the various block rows 3079 of the in diagonal portion of the local (possibly different for each block 3080 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3081 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3082 submatrix (same for all local rows). 3083 - o_nnz - array containing the number of nonzeros in the various block rows of the 3084 off-diagonal portion of the local submatrix (possibly different for 3085 each block row) or PETSC_NULL. 3086 3087 If the *_nnz parameter is given then the *_nz parameter is ignored 3088 3089 Options Database Keys: 3090 + -mat_block_size - size of the blocks to use 3091 - -mat_use_hash_table <fact> 3092 3093 Notes: 3094 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3095 than it must be used on all processors that share the object for that argument. 3096 3097 Storage Information: 3098 For a square global matrix we define each processor's diagonal portion 3099 to be its local rows and the corresponding columns (a square submatrix); 3100 each processor's off-diagonal portion encompasses the remainder of the 3101 local matrix (a rectangular submatrix). 3102 3103 The user can specify preallocated storage for the diagonal part of 3104 the local submatrix with either d_nz or d_nnz (not both). Set 3105 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3106 memory allocation. Likewise, specify preallocated storage for the 3107 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3108 3109 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3110 the figure below we depict these three local rows and all columns (0-11). 3111 3112 .vb 3113 0 1 2 3 4 5 6 7 8 9 10 11 3114 ------------------- 3115 row 3 | o o o d d d o o o o o o 3116 row 4 | o o o d d d o o o o o o 3117 row 5 | o o o d d d o o o o o o 3118 ------------------- 3119 .ve 3120 3121 Thus, any entries in the d locations are stored in the d (diagonal) 3122 submatrix, and any entries in the o locations are stored in the 3123 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3124 stored simply in the MATSEQBAIJ format for compressed row storage. 3125 3126 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3127 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3128 In general, for PDE problems in which most nonzeros are near the diagonal, 3129 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3130 or you will get TERRIBLE performance; see the users' manual chapter on 3131 matrices. 3132 3133 You can call MatGetInfo() to get information on how effective the preallocation was; 3134 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3135 You can also run with the option -info and look for messages with the string 3136 malloc in them to see if additional memory allocation was needed. 3137 3138 Level: intermediate 3139 3140 .keywords: matrix, block, aij, compressed row, sparse, parallel 3141 3142 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 3143 @*/ 3144 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3145 { 3146 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3147 3148 PetscFunctionBegin; 3149 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3150 if (f) { 3151 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3152 } 3153 PetscFunctionReturn(0); 3154 } 3155 3156 #undef __FUNCT__ 3157 #define __FUNCT__ "MatCreateMPIBAIJ" 3158 /*@C 3159 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 3160 (block compressed row). For good matrix assembly performance 3161 the user should preallocate the matrix storage by setting the parameters 3162 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3163 performance can be increased by more than a factor of 50. 3164 3165 Collective on MPI_Comm 3166 3167 Input Parameters: 3168 + comm - MPI communicator 3169 . bs - size of blockk 3170 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3171 This value should be the same as the local size used in creating the 3172 y vector for the matrix-vector product y = Ax. 3173 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3174 This value should be the same as the local size used in creating the 3175 x vector for the matrix-vector product y = Ax. 3176 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3177 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3178 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3179 submatrix (same for all local rows) 3180 . d_nnz - array containing the number of nonzero blocks in the various block rows 3181 of the in diagonal portion of the local (possibly different for each block 3182 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3183 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3184 submatrix (same for all local rows). 3185 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3186 off-diagonal portion of the local submatrix (possibly different for 3187 each block row) or PETSC_NULL. 3188 3189 Output Parameter: 3190 . A - the matrix 3191 3192 Options Database Keys: 3193 + -mat_block_size - size of the blocks to use 3194 - -mat_use_hash_table <fact> 3195 3196 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3197 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3198 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3199 3200 Notes: 3201 If the *_nnz parameter is given then the *_nz parameter is ignored 3202 3203 A nonzero block is any block that as 1 or more nonzeros in it 3204 3205 The user MUST specify either the local or global matrix dimensions 3206 (possibly both). 3207 3208 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3209 than it must be used on all processors that share the object for that argument. 3210 3211 Storage Information: 3212 For a square global matrix we define each processor's diagonal portion 3213 to be its local rows and the corresponding columns (a square submatrix); 3214 each processor's off-diagonal portion encompasses the remainder of the 3215 local matrix (a rectangular submatrix). 3216 3217 The user can specify preallocated storage for the diagonal part of 3218 the local submatrix with either d_nz or d_nnz (not both). Set 3219 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3220 memory allocation. Likewise, specify preallocated storage for the 3221 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3222 3223 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3224 the figure below we depict these three local rows and all columns (0-11). 3225 3226 .vb 3227 0 1 2 3 4 5 6 7 8 9 10 11 3228 ------------------- 3229 row 3 | o o o d d d o o o o o o 3230 row 4 | o o o d d d o o o o o o 3231 row 5 | o o o d d d o o o o o o 3232 ------------------- 3233 .ve 3234 3235 Thus, any entries in the d locations are stored in the d (diagonal) 3236 submatrix, and any entries in the o locations are stored in the 3237 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3238 stored simply in the MATSEQBAIJ format for compressed row storage. 3239 3240 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3241 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3242 In general, for PDE problems in which most nonzeros are near the diagonal, 3243 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3244 or you will get TERRIBLE performance; see the users' manual chapter on 3245 matrices. 3246 3247 Level: intermediate 3248 3249 .keywords: matrix, block, aij, compressed row, sparse, parallel 3250 3251 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3252 @*/ 3253 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3254 { 3255 PetscErrorCode ierr; 3256 PetscMPIInt size; 3257 3258 PetscFunctionBegin; 3259 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3260 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3261 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3262 if (size > 1) { 3263 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3264 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3265 } else { 3266 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3267 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3268 } 3269 PetscFunctionReturn(0); 3270 } 3271 3272 #undef __FUNCT__ 3273 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3274 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3275 { 3276 Mat mat; 3277 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3278 PetscErrorCode ierr; 3279 PetscInt len=0; 3280 3281 PetscFunctionBegin; 3282 *newmat = 0; 3283 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3284 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3285 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3286 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3287 3288 mat->factortype = matin->factortype; 3289 mat->preallocated = PETSC_TRUE; 3290 mat->assembled = PETSC_TRUE; 3291 mat->insertmode = NOT_SET_VALUES; 3292 3293 a = (Mat_MPIBAIJ*)mat->data; 3294 mat->rmap->bs = matin->rmap->bs; 3295 a->bs2 = oldmat->bs2; 3296 a->mbs = oldmat->mbs; 3297 a->nbs = oldmat->nbs; 3298 a->Mbs = oldmat->Mbs; 3299 a->Nbs = oldmat->Nbs; 3300 3301 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3302 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3303 3304 a->size = oldmat->size; 3305 a->rank = oldmat->rank; 3306 a->donotstash = oldmat->donotstash; 3307 a->roworiented = oldmat->roworiented; 3308 a->rowindices = 0; 3309 a->rowvalues = 0; 3310 a->getrowactive = PETSC_FALSE; 3311 a->barray = 0; 3312 a->rstartbs = oldmat->rstartbs; 3313 a->rendbs = oldmat->rendbs; 3314 a->cstartbs = oldmat->cstartbs; 3315 a->cendbs = oldmat->cendbs; 3316 3317 /* hash table stuff */ 3318 a->ht = 0; 3319 a->hd = 0; 3320 a->ht_size = 0; 3321 a->ht_flag = oldmat->ht_flag; 3322 a->ht_fact = oldmat->ht_fact; 3323 a->ht_total_ct = 0; 3324 a->ht_insert_ct = 0; 3325 3326 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3327 if (oldmat->colmap) { 3328 #if defined (PETSC_USE_CTABLE) 3329 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3330 #else 3331 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3332 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3333 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3334 #endif 3335 } else a->colmap = 0; 3336 3337 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3338 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3339 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3340 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3341 } else a->garray = 0; 3342 3343 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3344 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3345 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3346 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3347 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3348 3349 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3350 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3351 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3352 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3353 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3354 *newmat = mat; 3355 3356 PetscFunctionReturn(0); 3357 } 3358 3359 #undef __FUNCT__ 3360 #define __FUNCT__ "MatLoad_MPIBAIJ" 3361 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3362 { 3363 PetscErrorCode ierr; 3364 int fd; 3365 PetscInt i,nz,j,rstart,rend; 3366 PetscScalar *vals,*buf; 3367 MPI_Comm comm = ((PetscObject)viewer)->comm; 3368 MPI_Status status; 3369 PetscMPIInt rank,size,maxnz; 3370 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3371 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3372 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3373 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3374 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3375 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3376 3377 PetscFunctionBegin; 3378 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3379 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3380 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3381 3382 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3383 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3384 if (!rank) { 3385 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3386 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3387 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3388 } 3389 3390 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3391 3392 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3393 M = header[1]; N = header[2]; 3394 3395 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3396 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3397 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3398 3399 /* If global sizes are set, check if they are consistent with that given in the file */ 3400 if (sizesset) { 3401 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3402 } 3403 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3404 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3405 3406 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3407 3408 /* 3409 This code adds extra rows to make sure the number of rows is 3410 divisible by the blocksize 3411 */ 3412 Mbs = M/bs; 3413 extra_rows = bs - M + bs*Mbs; 3414 if (extra_rows == bs) extra_rows = 0; 3415 else Mbs++; 3416 if (extra_rows && !rank) { 3417 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3418 } 3419 3420 /* determine ownership of all rows */ 3421 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3422 mbs = Mbs/size + ((Mbs % size) > rank); 3423 m = mbs*bs; 3424 } else { /* User set */ 3425 m = newmat->rmap->n; 3426 mbs = m/bs; 3427 } 3428 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3429 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3430 3431 /* process 0 needs enough room for process with most rows */ 3432 if (!rank) { 3433 mmax = rowners[1]; 3434 for (i=2; i<size; i++) { 3435 mmax = PetscMax(mmax,rowners[i]); 3436 } 3437 mmax*=bs; 3438 } else mmax = m; 3439 3440 rowners[0] = 0; 3441 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3442 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3443 rstart = rowners[rank]; 3444 rend = rowners[rank+1]; 3445 3446 /* distribute row lengths to all processors */ 3447 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3448 if (!rank) { 3449 mend = m; 3450 if (size == 1) mend = mend - extra_rows; 3451 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3452 for (j=mend; j<m; j++) locrowlens[j] = 1; 3453 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3454 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3455 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3456 for (j=0; j<m; j++) { 3457 procsnz[0] += locrowlens[j]; 3458 } 3459 for (i=1; i<size; i++) { 3460 mend = browners[i+1] - browners[i]; 3461 if (i == size-1) mend = mend - extra_rows; 3462 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3463 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3464 /* calculate the number of nonzeros on each processor */ 3465 for (j=0; j<browners[i+1]-browners[i]; j++) { 3466 procsnz[i] += rowlengths[j]; 3467 } 3468 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3469 } 3470 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3471 } else { 3472 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3473 } 3474 3475 if (!rank) { 3476 /* determine max buffer needed and allocate it */ 3477 maxnz = procsnz[0]; 3478 for (i=1; i<size; i++) { 3479 maxnz = PetscMax(maxnz,procsnz[i]); 3480 } 3481 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3482 3483 /* read in my part of the matrix column indices */ 3484 nz = procsnz[0]; 3485 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3486 mycols = ibuf; 3487 if (size == 1) nz -= extra_rows; 3488 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3489 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3490 3491 /* read in every ones (except the last) and ship off */ 3492 for (i=1; i<size-1; i++) { 3493 nz = procsnz[i]; 3494 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3495 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3496 } 3497 /* read in the stuff for the last proc */ 3498 if (size != 1) { 3499 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3500 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3501 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3502 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3503 } 3504 ierr = PetscFree(cols);CHKERRQ(ierr); 3505 } else { 3506 /* determine buffer space needed for message */ 3507 nz = 0; 3508 for (i=0; i<m; i++) { 3509 nz += locrowlens[i]; 3510 } 3511 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3512 mycols = ibuf; 3513 /* receive message of column indices*/ 3514 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3515 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3516 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3517 } 3518 3519 /* loop over local rows, determining number of off diagonal entries */ 3520 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3521 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3522 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3523 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3524 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3525 rowcount = 0; nzcount = 0; 3526 for (i=0; i<mbs; i++) { 3527 dcount = 0; 3528 odcount = 0; 3529 for (j=0; j<bs; j++) { 3530 kmax = locrowlens[rowcount]; 3531 for (k=0; k<kmax; k++) { 3532 tmp = mycols[nzcount++]/bs; 3533 if (!mask[tmp]) { 3534 mask[tmp] = 1; 3535 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3536 else masked1[dcount++] = tmp; 3537 } 3538 } 3539 rowcount++; 3540 } 3541 3542 dlens[i] = dcount; 3543 odlens[i] = odcount; 3544 3545 /* zero out the mask elements we set */ 3546 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3547 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3548 } 3549 3550 3551 if (!sizesset) { 3552 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3553 } 3554 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3555 3556 if (!rank) { 3557 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3558 /* read in my part of the matrix numerical values */ 3559 nz = procsnz[0]; 3560 vals = buf; 3561 mycols = ibuf; 3562 if (size == 1) nz -= extra_rows; 3563 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3564 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3565 3566 /* insert into matrix */ 3567 jj = rstart*bs; 3568 for (i=0; i<m; i++) { 3569 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3570 mycols += locrowlens[i]; 3571 vals += locrowlens[i]; 3572 jj++; 3573 } 3574 /* read in other processors (except the last one) and ship out */ 3575 for (i=1; i<size-1; i++) { 3576 nz = procsnz[i]; 3577 vals = buf; 3578 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3579 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3580 } 3581 /* the last proc */ 3582 if (size != 1){ 3583 nz = procsnz[i] - extra_rows; 3584 vals = buf; 3585 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3586 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3587 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3588 } 3589 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3590 } else { 3591 /* receive numeric values */ 3592 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3593 3594 /* receive message of values*/ 3595 vals = buf; 3596 mycols = ibuf; 3597 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 3598 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3599 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3600 3601 /* insert into matrix */ 3602 jj = rstart*bs; 3603 for (i=0; i<m; i++) { 3604 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3605 mycols += locrowlens[i]; 3606 vals += locrowlens[i]; 3607 jj++; 3608 } 3609 } 3610 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3611 ierr = PetscFree(buf);CHKERRQ(ierr); 3612 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3613 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3614 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3615 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3616 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3617 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3618 3619 PetscFunctionReturn(0); 3620 } 3621 3622 #undef __FUNCT__ 3623 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3624 /*@ 3625 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3626 3627 Input Parameters: 3628 . mat - the matrix 3629 . fact - factor 3630 3631 Not Collective, each process can use a different factor 3632 3633 Level: advanced 3634 3635 Notes: 3636 This can also be set by the command line option: -mat_use_hash_table <fact> 3637 3638 .keywords: matrix, hashtable, factor, HT 3639 3640 .seealso: MatSetOption() 3641 @*/ 3642 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3643 { 3644 PetscErrorCode ierr,(*f)(Mat,PetscReal); 3645 3646 PetscFunctionBegin; 3647 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 3648 if (f) { 3649 ierr = (*f)(mat,fact);CHKERRQ(ierr); 3650 } 3651 PetscFunctionReturn(0); 3652 } 3653 3654 EXTERN_C_BEGIN 3655 #undef __FUNCT__ 3656 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3657 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3658 { 3659 Mat_MPIBAIJ *baij; 3660 3661 PetscFunctionBegin; 3662 baij = (Mat_MPIBAIJ*)mat->data; 3663 baij->ht_fact = fact; 3664 PetscFunctionReturn(0); 3665 } 3666 EXTERN_C_END 3667 3668 #undef __FUNCT__ 3669 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3670 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3671 { 3672 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3673 PetscFunctionBegin; 3674 *Ad = a->A; 3675 *Ao = a->B; 3676 *colmap = a->garray; 3677 PetscFunctionReturn(0); 3678 } 3679 3680 /* 3681 Special version for direct calls from Fortran (to eliminate two function call overheads 3682 */ 3683 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3684 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3685 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3686 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3687 #endif 3688 3689 #undef __FUNCT__ 3690 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3691 /*@C 3692 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3693 3694 Collective on Mat 3695 3696 Input Parameters: 3697 + mat - the matrix 3698 . min - number of input rows 3699 . im - input rows 3700 . nin - number of input columns 3701 . in - input columns 3702 . v - numerical values input 3703 - addvin - INSERT_VALUES or ADD_VALUES 3704 3705 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3706 3707 Level: advanced 3708 3709 .seealso: MatSetValuesBlocked() 3710 @*/ 3711 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3712 { 3713 /* convert input arguments to C version */ 3714 Mat mat = *matin; 3715 PetscInt m = *min, n = *nin; 3716 InsertMode addv = *addvin; 3717 3718 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3719 const MatScalar *value; 3720 MatScalar *barray=baij->barray; 3721 PetscTruth roworiented = baij->roworiented; 3722 PetscErrorCode ierr; 3723 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3724 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3725 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3726 3727 PetscFunctionBegin; 3728 /* tasks normally handled by MatSetValuesBlocked() */ 3729 if (mat->insertmode == NOT_SET_VALUES) { 3730 mat->insertmode = addv; 3731 } 3732 #if defined(PETSC_USE_DEBUG) 3733 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3734 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3735 #endif 3736 if (mat->assembled) { 3737 mat->was_assembled = PETSC_TRUE; 3738 mat->assembled = PETSC_FALSE; 3739 } 3740 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3741 3742 3743 if(!barray) { 3744 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3745 baij->barray = barray; 3746 } 3747 3748 if (roworiented) { 3749 stepval = (n-1)*bs; 3750 } else { 3751 stepval = (m-1)*bs; 3752 } 3753 for (i=0; i<m; i++) { 3754 if (im[i] < 0) continue; 3755 #if defined(PETSC_USE_DEBUG) 3756 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3757 #endif 3758 if (im[i] >= rstart && im[i] < rend) { 3759 row = im[i] - rstart; 3760 for (j=0; j<n; j++) { 3761 /* If NumCol = 1 then a copy is not required */ 3762 if ((roworiented) && (n == 1)) { 3763 barray = (MatScalar*)v + i*bs2; 3764 } else if((!roworiented) && (m == 1)) { 3765 barray = (MatScalar*)v + j*bs2; 3766 } else { /* Here a copy is required */ 3767 if (roworiented) { 3768 value = v + i*(stepval+bs)*bs + j*bs; 3769 } else { 3770 value = v + j*(stepval+bs)*bs + i*bs; 3771 } 3772 for (ii=0; ii<bs; ii++,value+=stepval) { 3773 for (jj=0; jj<bs; jj++) { 3774 *barray++ = *value++; 3775 } 3776 } 3777 barray -=bs2; 3778 } 3779 3780 if (in[j] >= cstart && in[j] < cend){ 3781 col = in[j] - cstart; 3782 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3783 } 3784 else if (in[j] < 0) continue; 3785 #if defined(PETSC_USE_DEBUG) 3786 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3787 #endif 3788 else { 3789 if (mat->was_assembled) { 3790 if (!baij->colmap) { 3791 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3792 } 3793 3794 #if defined(PETSC_USE_DEBUG) 3795 #if defined (PETSC_USE_CTABLE) 3796 { PetscInt data; 3797 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3798 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3799 } 3800 #else 3801 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3802 #endif 3803 #endif 3804 #if defined (PETSC_USE_CTABLE) 3805 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3806 col = (col - 1)/bs; 3807 #else 3808 col = (baij->colmap[in[j]] - 1)/bs; 3809 #endif 3810 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3811 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3812 col = in[j]; 3813 } 3814 } 3815 else col = in[j]; 3816 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3817 } 3818 } 3819 } else { 3820 if (!baij->donotstash) { 3821 if (roworiented) { 3822 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3823 } else { 3824 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3825 } 3826 } 3827 } 3828 } 3829 3830 /* task normally handled by MatSetValuesBlocked() */ 3831 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3832 PetscFunctionReturn(0); 3833 } 3834