1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 7 extern PetscErrorCode DisAssemble_MPIBAIJ(Mat); 8 extern PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 9 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 10 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 11 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 12 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 13 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 15 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 16 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 19 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 20 { 21 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 22 PetscErrorCode ierr; 23 PetscInt i,*idxb = 0; 24 PetscScalar *va,*vb; 25 Vec vtmp; 26 27 PetscFunctionBegin; 28 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 29 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 30 if (idx) { 31 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 32 } 33 34 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 35 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 36 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 37 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 38 39 for (i=0; i<A->rmap->n; i++){ 40 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 41 } 42 43 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 44 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 45 ierr = PetscFree(idxb);CHKERRQ(ierr); 46 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 47 PetscFunctionReturn(0); 48 } 49 50 EXTERN_C_BEGIN 51 #undef __FUNCT__ 52 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 53 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 54 { 55 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 56 PetscErrorCode ierr; 57 58 PetscFunctionBegin; 59 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 60 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 61 PetscFunctionReturn(0); 62 } 63 EXTERN_C_END 64 65 EXTERN_C_BEGIN 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 68 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 69 { 70 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 75 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 76 PetscFunctionReturn(0); 77 } 78 EXTERN_C_END 79 80 /* 81 Local utility routine that creates a mapping from the global column 82 number to the local number in the off-diagonal part of the local 83 storage of the matrix. This is done in a non scalable way since the 84 length of colmap equals the global matrix length. 85 */ 86 #undef __FUNCT__ 87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 89 { 90 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 91 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 92 PetscErrorCode ierr; 93 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 94 95 PetscFunctionBegin; 96 #if defined (PETSC_USE_CTABLE) 97 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 98 for (i=0; i<nbs; i++){ 99 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 100 } 101 #else 102 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 103 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 105 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 106 #endif 107 PetscFunctionReturn(0); 108 } 109 110 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 111 { \ 112 \ 113 brow = row/bs; \ 114 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 115 rmax = aimax[brow]; nrow = ailen[brow]; \ 116 bcol = col/bs; \ 117 ridx = row % bs; cidx = col % bs; \ 118 low = 0; high = nrow; \ 119 while (high-low > 3) { \ 120 t = (low+high)/2; \ 121 if (rp[t] > bcol) high = t; \ 122 else low = t; \ 123 } \ 124 for (_i=low; _i<high; _i++) { \ 125 if (rp[_i] > bcol) break; \ 126 if (rp[_i] == bcol) { \ 127 bap = ap + bs2*_i + bs*cidx + ridx; \ 128 if (addv == ADD_VALUES) *bap += value; \ 129 else *bap = value; \ 130 goto a_noinsert; \ 131 } \ 132 } \ 133 if (a->nonew == 1) goto a_noinsert; \ 134 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 135 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 136 N = nrow++ - 1; \ 137 /* shift up all the later entries in this row */ \ 138 for (ii=N; ii>=_i; ii--) { \ 139 rp[ii+1] = rp[ii]; \ 140 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 141 } \ 142 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 143 rp[_i] = bcol; \ 144 ap[bs2*_i + bs*cidx + ridx] = value; \ 145 a_noinsert:; \ 146 ailen[brow] = nrow; \ 147 } 148 149 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 150 { \ 151 brow = row/bs; \ 152 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 153 rmax = bimax[brow]; nrow = bilen[brow]; \ 154 bcol = col/bs; \ 155 ridx = row % bs; cidx = col % bs; \ 156 low = 0; high = nrow; \ 157 while (high-low > 3) { \ 158 t = (low+high)/2; \ 159 if (rp[t] > bcol) high = t; \ 160 else low = t; \ 161 } \ 162 for (_i=low; _i<high; _i++) { \ 163 if (rp[_i] > bcol) break; \ 164 if (rp[_i] == bcol) { \ 165 bap = ap + bs2*_i + bs*cidx + ridx; \ 166 if (addv == ADD_VALUES) *bap += value; \ 167 else *bap = value; \ 168 goto b_noinsert; \ 169 } \ 170 } \ 171 if (b->nonew == 1) goto b_noinsert; \ 172 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 173 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 174 CHKMEMQ;\ 175 N = nrow++ - 1; \ 176 /* shift up all the later entries in this row */ \ 177 for (ii=N; ii>=_i; ii--) { \ 178 rp[ii+1] = rp[ii]; \ 179 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 180 } \ 181 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 182 rp[_i] = bcol; \ 183 ap[bs2*_i + bs*cidx + ridx] = value; \ 184 b_noinsert:; \ 185 bilen[brow] = nrow; \ 186 } 187 188 #undef __FUNCT__ 189 #define __FUNCT__ "MatSetValues_MPIBAIJ" 190 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 191 { 192 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 193 MatScalar value; 194 PetscBool roworiented = baij->roworiented; 195 PetscErrorCode ierr; 196 PetscInt i,j,row,col; 197 PetscInt rstart_orig=mat->rmap->rstart; 198 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 199 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 200 201 /* Some Variables required in the macro */ 202 Mat A = baij->A; 203 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 204 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 205 MatScalar *aa=a->a; 206 207 Mat B = baij->B; 208 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 209 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 210 MatScalar *ba=b->a; 211 212 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 213 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 214 MatScalar *ap,*bap; 215 216 PetscFunctionBegin; 217 if (v) PetscValidScalarPointer(v,6); 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_DEBUG) 221 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 222 #endif 223 if (im[i] >= rstart_orig && im[i] < rend_orig) { 224 row = im[i] - rstart_orig; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart_orig && in[j] < cend_orig){ 227 col = in[j] - cstart_orig; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 230 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 231 } else if (in[j] < 0) continue; 232 #if defined(PETSC_USE_DEBUG) 233 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 234 #endif 235 else { 236 if (mat->was_assembled) { 237 if (!baij->colmap) { 238 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 239 } 240 #if defined (PETSC_USE_CTABLE) 241 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 242 col = col - 1; 243 #else 244 col = baij->colmap[in[j]/bs] - 1; 245 #endif 246 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 247 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 248 col = in[j]; 249 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 250 B = baij->B; 251 b = (Mat_SeqBAIJ*)(B)->data; 252 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 253 ba=b->a; 254 } else col += in[j]%bs; 255 } else col = in[j]; 256 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 257 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 258 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 259 } 260 } 261 } else { 262 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 263 if (!baij->donotstash) { 264 if (roworiented) { 265 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 266 } else { 267 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 268 } 269 } 270 } 271 } 272 PetscFunctionReturn(0); 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 277 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 278 { 279 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 280 const PetscScalar *value; 281 MatScalar *barray=baij->barray; 282 PetscBool roworiented = baij->roworiented; 283 PetscErrorCode ierr; 284 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 285 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 286 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 287 288 PetscFunctionBegin; 289 if(!barray) { 290 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 291 baij->barray = barray; 292 } 293 294 if (roworiented) { 295 stepval = (n-1)*bs; 296 } else { 297 stepval = (m-1)*bs; 298 } 299 for (i=0; i<m; i++) { 300 if (im[i] < 0) continue; 301 #if defined(PETSC_USE_DEBUG) 302 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 303 #endif 304 if (im[i] >= rstart && im[i] < rend) { 305 row = im[i] - rstart; 306 for (j=0; j<n; j++) { 307 /* If NumCol = 1 then a copy is not required */ 308 if ((roworiented) && (n == 1)) { 309 barray = (MatScalar*)v + i*bs2; 310 } else if((!roworiented) && (m == 1)) { 311 barray = (MatScalar*)v + j*bs2; 312 } else { /* Here a copy is required */ 313 if (roworiented) { 314 value = v + (i*(stepval+bs) + j)*bs; 315 } else { 316 value = v + (j*(stepval+bs) + i)*bs; 317 } 318 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 319 for (jj=0; jj<bs; jj++) { 320 barray[jj] = value[jj]; 321 } 322 barray += bs; 323 } 324 barray -= bs2; 325 } 326 327 if (in[j] >= cstart && in[j] < cend){ 328 col = in[j] - cstart; 329 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 330 } 331 else if (in[j] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 334 #endif 335 else { 336 if (mat->was_assembled) { 337 if (!baij->colmap) { 338 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 339 } 340 341 #if defined(PETSC_USE_DEBUG) 342 #if defined (PETSC_USE_CTABLE) 343 { PetscInt data; 344 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 345 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 346 } 347 #else 348 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 349 #endif 350 #endif 351 #if defined (PETSC_USE_CTABLE) 352 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 353 col = (col - 1)/bs; 354 #else 355 col = (baij->colmap[in[j]] - 1)/bs; 356 #endif 357 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 358 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 359 col = in[j]; 360 } 361 } 362 else col = in[j]; 363 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 364 } 365 } 366 } else { 367 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 368 if (!baij->donotstash) { 369 if (roworiented) { 370 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 371 } else { 372 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 373 } 374 } 375 } 376 } 377 PetscFunctionReturn(0); 378 } 379 380 #define HASH_KEY 0.6180339887 381 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 382 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 383 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 384 #undef __FUNCT__ 385 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 386 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 387 { 388 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 389 PetscBool roworiented = baij->roworiented; 390 PetscErrorCode ierr; 391 PetscInt i,j,row,col; 392 PetscInt rstart_orig=mat->rmap->rstart; 393 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 394 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 395 PetscReal tmp; 396 MatScalar **HD = baij->hd,value; 397 #if defined(PETSC_USE_DEBUG) 398 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 399 #endif 400 401 PetscFunctionBegin; 402 if (v) PetscValidScalarPointer(v,6); 403 for (i=0; i<m; i++) { 404 #if defined(PETSC_USE_DEBUG) 405 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 406 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 407 #endif 408 row = im[i]; 409 if (row >= rstart_orig && row < rend_orig) { 410 for (j=0; j<n; j++) { 411 col = in[j]; 412 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 413 /* Look up PetscInto the Hash Table */ 414 key = (row/bs)*Nbs+(col/bs)+1; 415 h1 = HASH(size,key,tmp); 416 417 418 idx = h1; 419 #if defined(PETSC_USE_DEBUG) 420 insert_ct++; 421 total_ct++; 422 if (HT[idx] != key) { 423 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 424 if (idx == size) { 425 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 426 if (idx == h1) { 427 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 428 } 429 } 430 } 431 #else 432 if (HT[idx] != key) { 433 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 434 if (idx == size) { 435 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 436 if (idx == h1) { 437 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 438 } 439 } 440 } 441 #endif 442 /* A HASH table entry is found, so insert the values at the correct address */ 443 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 444 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 445 } 446 } else { 447 if (!baij->donotstash) { 448 if (roworiented) { 449 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 450 } else { 451 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 452 } 453 } 454 } 455 } 456 #if defined(PETSC_USE_DEBUG) 457 baij->ht_total_ct = total_ct; 458 baij->ht_insert_ct = insert_ct; 459 #endif 460 PetscFunctionReturn(0); 461 } 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 465 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 466 { 467 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 468 PetscBool roworiented = baij->roworiented; 469 PetscErrorCode ierr; 470 PetscInt i,j,ii,jj,row,col; 471 PetscInt rstart=baij->rstartbs; 472 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 473 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 474 PetscReal tmp; 475 MatScalar **HD = baij->hd,*baij_a; 476 const PetscScalar *v_t,*value; 477 #if defined(PETSC_USE_DEBUG) 478 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 479 #endif 480 481 PetscFunctionBegin; 482 483 if (roworiented) { 484 stepval = (n-1)*bs; 485 } else { 486 stepval = (m-1)*bs; 487 } 488 for (i=0; i<m; i++) { 489 #if defined(PETSC_USE_DEBUG) 490 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 491 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 492 #endif 493 row = im[i]; 494 v_t = v + i*nbs2; 495 if (row >= rstart && row < rend) { 496 for (j=0; j<n; j++) { 497 col = in[j]; 498 499 /* Look up into the Hash Table */ 500 key = row*Nbs+col+1; 501 h1 = HASH(size,key,tmp); 502 503 idx = h1; 504 #if defined(PETSC_USE_DEBUG) 505 total_ct++; 506 insert_ct++; 507 if (HT[idx] != key) { 508 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 509 if (idx == size) { 510 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 511 if (idx == h1) { 512 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 513 } 514 } 515 } 516 #else 517 if (HT[idx] != key) { 518 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 519 if (idx == size) { 520 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 521 if (idx == h1) { 522 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 523 } 524 } 525 } 526 #endif 527 baij_a = HD[idx]; 528 if (roworiented) { 529 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 530 /* value = v + (i*(stepval+bs)+j)*bs; */ 531 value = v_t; 532 v_t += bs; 533 if (addv == ADD_VALUES) { 534 for (ii=0; ii<bs; ii++,value+=stepval) { 535 for (jj=ii; jj<bs2; jj+=bs) { 536 baij_a[jj] += *value++; 537 } 538 } 539 } else { 540 for (ii=0; ii<bs; ii++,value+=stepval) { 541 for (jj=ii; jj<bs2; jj+=bs) { 542 baij_a[jj] = *value++; 543 } 544 } 545 } 546 } else { 547 value = v + j*(stepval+bs)*bs + i*bs; 548 if (addv == ADD_VALUES) { 549 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 550 for (jj=0; jj<bs; jj++) { 551 baij_a[jj] += *value++; 552 } 553 } 554 } else { 555 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 556 for (jj=0; jj<bs; jj++) { 557 baij_a[jj] = *value++; 558 } 559 } 560 } 561 } 562 } 563 } else { 564 if (!baij->donotstash) { 565 if (roworiented) { 566 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 567 } else { 568 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 569 } 570 } 571 } 572 } 573 #if defined(PETSC_USE_DEBUG) 574 baij->ht_total_ct = total_ct; 575 baij->ht_insert_ct = insert_ct; 576 #endif 577 PetscFunctionReturn(0); 578 } 579 580 #undef __FUNCT__ 581 #define __FUNCT__ "MatGetValues_MPIBAIJ" 582 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 583 { 584 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 585 PetscErrorCode ierr; 586 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 587 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 588 589 PetscFunctionBegin; 590 for (i=0; i<m; i++) { 591 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 592 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 593 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 594 row = idxm[i] - bsrstart; 595 for (j=0; j<n; j++) { 596 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 597 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 598 if (idxn[j] >= bscstart && idxn[j] < bscend){ 599 col = idxn[j] - bscstart; 600 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 601 } else { 602 if (!baij->colmap) { 603 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 604 } 605 #if defined (PETSC_USE_CTABLE) 606 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 607 data --; 608 #else 609 data = baij->colmap[idxn[j]/bs]-1; 610 #endif 611 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 612 else { 613 col = data + idxn[j]%bs; 614 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 615 } 616 } 617 } 618 } else { 619 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 620 } 621 } 622 PetscFunctionReturn(0); 623 } 624 625 #undef __FUNCT__ 626 #define __FUNCT__ "MatNorm_MPIBAIJ" 627 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 628 { 629 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 630 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 631 PetscErrorCode ierr; 632 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 633 PetscReal sum = 0.0; 634 MatScalar *v; 635 636 PetscFunctionBegin; 637 if (baij->size == 1) { 638 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 639 } else { 640 if (type == NORM_FROBENIUS) { 641 v = amat->a; 642 nz = amat->nz*bs2; 643 for (i=0; i<nz; i++) { 644 #if defined(PETSC_USE_COMPLEX) 645 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 646 #else 647 sum += (*v)*(*v); v++; 648 #endif 649 } 650 v = bmat->a; 651 nz = bmat->nz*bs2; 652 for (i=0; i<nz; i++) { 653 #if defined(PETSC_USE_COMPLEX) 654 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 655 #else 656 sum += (*v)*(*v); v++; 657 #endif 658 } 659 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 660 *nrm = sqrt(*nrm); 661 } else if (type == NORM_1) { /* max column sum */ 662 PetscReal *tmp,*tmp2; 663 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 664 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 665 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 666 v = amat->a; jj = amat->j; 667 for (i=0; i<amat->nz; i++) { 668 for (j=0; j<bs; j++){ 669 col = bs*(cstart + *jj) + j; /* column index */ 670 for (row=0; row<bs; row++){ 671 tmp[col] += PetscAbsScalar(*v); v++; 672 } 673 } 674 jj++; 675 } 676 v = bmat->a; jj = bmat->j; 677 for (i=0; i<bmat->nz; i++) { 678 for (j=0; j<bs; j++){ 679 col = bs*garray[*jj] + j; 680 for (row=0; row<bs; row++){ 681 tmp[col] += PetscAbsScalar(*v); v++; 682 } 683 } 684 jj++; 685 } 686 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 687 *nrm = 0.0; 688 for (j=0; j<mat->cmap->N; j++) { 689 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 690 } 691 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 692 } else if (type == NORM_INFINITY) { /* max row sum */ 693 PetscReal *sums; 694 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 695 sum = 0.0; 696 for (j=0; j<amat->mbs; j++) { 697 for (row=0; row<bs; row++) sums[row] = 0.0; 698 v = amat->a + bs2*amat->i[j]; 699 nz = amat->i[j+1]-amat->i[j]; 700 for (i=0; i<nz; i++) { 701 for (col=0; col<bs; col++){ 702 for (row=0; row<bs; row++){ 703 sums[row] += PetscAbsScalar(*v); v++; 704 } 705 } 706 } 707 v = bmat->a + bs2*bmat->i[j]; 708 nz = bmat->i[j+1]-bmat->i[j]; 709 for (i=0; i<nz; i++) { 710 for (col=0; col<bs; col++){ 711 for (row=0; row<bs; row++){ 712 sums[row] += PetscAbsScalar(*v); v++; 713 } 714 } 715 } 716 for (row=0; row<bs; row++){ 717 if (sums[row] > sum) sum = sums[row]; 718 } 719 } 720 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 721 ierr = PetscFree(sums);CHKERRQ(ierr); 722 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 723 } 724 PetscFunctionReturn(0); 725 } 726 727 /* 728 Creates the hash table, and sets the table 729 This table is created only once. 730 If new entried need to be added to the matrix 731 then the hash table has to be destroyed and 732 recreated. 733 */ 734 #undef __FUNCT__ 735 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 736 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 737 { 738 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 739 Mat A = baij->A,B=baij->B; 740 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 741 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 742 PetscErrorCode ierr; 743 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 744 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 745 PetscInt *HT,key; 746 MatScalar **HD; 747 PetscReal tmp; 748 #if defined(PETSC_USE_INFO) 749 PetscInt ct=0,max=0; 750 #endif 751 752 PetscFunctionBegin; 753 if (baij->ht) PetscFunctionReturn(0); 754 755 baij->ht_size = (PetscInt)(factor*nz); 756 ht_size = baij->ht_size; 757 758 /* Allocate Memory for Hash Table */ 759 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 760 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 761 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 762 HD = baij->hd; 763 HT = baij->ht; 764 765 /* Loop Over A */ 766 for (i=0; i<a->mbs; i++) { 767 for (j=ai[i]; j<ai[i+1]; j++) { 768 row = i+rstart; 769 col = aj[j]+cstart; 770 771 key = row*Nbs + col + 1; 772 h1 = HASH(ht_size,key,tmp); 773 for (k=0; k<ht_size; k++){ 774 if (!HT[(h1+k)%ht_size]) { 775 HT[(h1+k)%ht_size] = key; 776 HD[(h1+k)%ht_size] = a->a + j*bs2; 777 break; 778 #if defined(PETSC_USE_INFO) 779 } else { 780 ct++; 781 #endif 782 } 783 } 784 #if defined(PETSC_USE_INFO) 785 if (k> max) max = k; 786 #endif 787 } 788 } 789 /* Loop Over B */ 790 for (i=0; i<b->mbs; i++) { 791 for (j=bi[i]; j<bi[i+1]; j++) { 792 row = i+rstart; 793 col = garray[bj[j]]; 794 key = row*Nbs + col + 1; 795 h1 = HASH(ht_size,key,tmp); 796 for (k=0; k<ht_size; k++){ 797 if (!HT[(h1+k)%ht_size]) { 798 HT[(h1+k)%ht_size] = key; 799 HD[(h1+k)%ht_size] = b->a + j*bs2; 800 break; 801 #if defined(PETSC_USE_INFO) 802 } else { 803 ct++; 804 #endif 805 } 806 } 807 #if defined(PETSC_USE_INFO) 808 if (k> max) max = k; 809 #endif 810 } 811 } 812 813 /* Print Summary */ 814 #if defined(PETSC_USE_INFO) 815 for (i=0,j=0; i<ht_size; i++) { 816 if (HT[i]) {j++;} 817 } 818 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 819 #endif 820 PetscFunctionReturn(0); 821 } 822 823 #undef __FUNCT__ 824 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 825 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 826 { 827 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 828 PetscErrorCode ierr; 829 PetscInt nstash,reallocs; 830 InsertMode addv; 831 832 PetscFunctionBegin; 833 if (baij->donotstash || mat->nooffprocentries) { 834 PetscFunctionReturn(0); 835 } 836 837 /* make sure all processors are either in INSERTMODE or ADDMODE */ 838 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 839 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 840 mat->insertmode = addv; /* in case this processor had no cache */ 841 842 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 843 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 844 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 845 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 846 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 847 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 848 PetscFunctionReturn(0); 849 } 850 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 853 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 854 { 855 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 856 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 857 PetscErrorCode ierr; 858 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 859 PetscInt *row,*col; 860 PetscBool r1,r2,r3,other_disassembled; 861 MatScalar *val; 862 InsertMode addv = mat->insertmode; 863 PetscMPIInt n; 864 865 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 866 PetscFunctionBegin; 867 if (!baij->donotstash && !mat->nooffprocentries) { 868 while (1) { 869 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 870 if (!flg) break; 871 872 for (i=0; i<n;) { 873 /* Now identify the consecutive vals belonging to the same row */ 874 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 875 if (j < n) ncols = j-i; 876 else ncols = n-i; 877 /* Now assemble all these values with a single function call */ 878 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 879 i = j; 880 } 881 } 882 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 883 /* Now process the block-stash. Since the values are stashed column-oriented, 884 set the roworiented flag to column oriented, and after MatSetValues() 885 restore the original flags */ 886 r1 = baij->roworiented; 887 r2 = a->roworiented; 888 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 889 baij->roworiented = PETSC_FALSE; 890 a->roworiented = PETSC_FALSE; 891 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 892 while (1) { 893 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 894 if (!flg) break; 895 896 for (i=0; i<n;) { 897 /* Now identify the consecutive vals belonging to the same row */ 898 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 899 if (j < n) ncols = j-i; 900 else ncols = n-i; 901 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 902 i = j; 903 } 904 } 905 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 906 baij->roworiented = r1; 907 a->roworiented = r2; 908 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 909 } 910 911 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 912 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 913 914 /* determine if any processor has disassembled, if so we must 915 also disassemble ourselfs, in order that we may reassemble. */ 916 /* 917 if nonzero structure of submatrix B cannot change then we know that 918 no processor disassembled thus we can skip this stuff 919 */ 920 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 921 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 922 if (mat->was_assembled && !other_disassembled) { 923 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 924 } 925 } 926 927 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 928 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 929 } 930 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 931 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 932 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 933 934 #if defined(PETSC_USE_INFO) 935 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 936 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 937 baij->ht_total_ct = 0; 938 baij->ht_insert_ct = 0; 939 } 940 #endif 941 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 942 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 943 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 944 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 945 } 946 947 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 948 baij->rowvalues = 0; 949 PetscFunctionReturn(0); 950 } 951 952 #undef __FUNCT__ 953 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 954 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 955 { 956 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 957 PetscErrorCode ierr; 958 PetscMPIInt size = baij->size,rank = baij->rank; 959 PetscInt bs = mat->rmap->bs; 960 PetscBool iascii,isdraw; 961 PetscViewer sviewer; 962 PetscViewerFormat format; 963 964 PetscFunctionBegin; 965 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 966 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 967 if (iascii) { 968 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 969 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 970 MatInfo info; 971 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 972 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 973 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 974 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 975 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 976 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 977 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 978 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 979 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 980 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 981 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 982 PetscFunctionReturn(0); 983 } else if (format == PETSC_VIEWER_ASCII_INFO) { 984 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 985 PetscFunctionReturn(0); 986 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 987 PetscFunctionReturn(0); 988 } 989 } 990 991 if (isdraw) { 992 PetscDraw draw; 993 PetscBool isnull; 994 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 995 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 996 } 997 998 if (size == 1) { 999 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1000 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1001 } else { 1002 /* assemble the entire matrix onto first processor. */ 1003 Mat A; 1004 Mat_SeqBAIJ *Aloc; 1005 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1006 MatScalar *a; 1007 1008 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1009 /* Perhaps this should be the type of mat? */ 1010 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1011 if (!rank) { 1012 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1013 } else { 1014 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1015 } 1016 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1017 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1018 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1019 1020 /* copy over the A part */ 1021 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1022 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1023 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1024 1025 for (i=0; i<mbs; i++) { 1026 rvals[0] = bs*(baij->rstartbs + i); 1027 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1028 for (j=ai[i]; j<ai[i+1]; j++) { 1029 col = (baij->cstartbs+aj[j])*bs; 1030 for (k=0; k<bs; k++) { 1031 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1032 col++; a += bs; 1033 } 1034 } 1035 } 1036 /* copy over the B part */ 1037 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1038 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1039 for (i=0; i<mbs; i++) { 1040 rvals[0] = bs*(baij->rstartbs + i); 1041 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1042 for (j=ai[i]; j<ai[i+1]; j++) { 1043 col = baij->garray[aj[j]]*bs; 1044 for (k=0; k<bs; k++) { 1045 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1046 col++; a += bs; 1047 } 1048 } 1049 } 1050 ierr = PetscFree(rvals);CHKERRQ(ierr); 1051 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1052 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1053 /* 1054 Everyone has to call to draw the matrix since the graphics waits are 1055 synchronized across all processors that share the PetscDraw object 1056 */ 1057 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1058 if (!rank) { 1059 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1060 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1061 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1062 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1063 } 1064 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1065 ierr = MatDestroy(A);CHKERRQ(ierr); 1066 } 1067 PetscFunctionReturn(0); 1068 } 1069 1070 #undef __FUNCT__ 1071 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1072 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1073 { 1074 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1075 Mat_SeqBAIJ* A = (Mat_SeqBAIJ*)a->A->data; 1076 Mat_SeqBAIJ* B = (Mat_SeqBAIJ*)a->B->data; 1077 PetscErrorCode ierr; 1078 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1079 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1080 int fd; 1081 PetscScalar *column_values; 1082 FILE *file; 1083 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1084 PetscInt message_count,flowcontrolcount; 1085 1086 PetscFunctionBegin; 1087 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1088 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1089 nz = bs2*(A->nz + B->nz); 1090 rlen = mat->rmap->n; 1091 if (!rank) { 1092 header[0] = MAT_FILE_CLASSID; 1093 header[1] = mat->rmap->N; 1094 header[2] = mat->cmap->N; 1095 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1096 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1097 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1098 /* get largest number of rows any processor has */ 1099 range = mat->rmap->range; 1100 for (i=1; i<size; i++) { 1101 rlen = PetscMax(rlen,range[i+1] - range[i]); 1102 } 1103 } else { 1104 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1105 } 1106 1107 ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr); 1108 /* compute lengths of each row */ 1109 for (i=0; i<a->mbs; i++) { 1110 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1111 } 1112 /* store the row lengths to the file */ 1113 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1114 if (!rank) { 1115 MPI_Status status; 1116 ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr); 1117 rlen = (range[1] - range[0])/bs; 1118 for (i=0; i<rlen; i++) { 1119 for (j=0; j<bs; j++) { 1120 row_lens[i*bs+j] = bs*crow_lens[i]; 1121 } 1122 } 1123 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1124 for (i=1; i<size; i++) { 1125 rlen = (range[i+1] - range[i])/bs; 1126 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1127 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1128 for (k=0; k<rlen; k++) { 1129 for (j=0; j<bs; j++) { 1130 row_lens[k*bs+j] = bs*crow_lens[k]; 1131 } 1132 } 1133 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1134 } 1135 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1136 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1137 } else { 1138 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1139 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1140 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1141 } 1142 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1143 1144 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1145 information needed to make it for each row from a block row. This does require more communication but still not more than 1146 the communication needed for the nonzero values */ 1147 nzmax = nz; /* space a largest processor needs */ 1148 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1149 ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1150 cnt = 0; 1151 for (i=0; i<a->mbs; i++) { 1152 pcnt = cnt; 1153 for (j=B->i[i]; j<B->i[i+1]; j++) { 1154 if ( (col = garray[B->j[j]]) > cstart) break; 1155 for (l=0; l<bs; l++) { 1156 column_indices[cnt++] = bs*col+l; 1157 } 1158 } 1159 for (k=A->i[i]; k<A->i[i+1]; k++) { 1160 for (l=0; l<bs; l++) { 1161 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1162 } 1163 } 1164 for (; j<B->i[i+1]; j++) { 1165 for (l=0; l<bs; l++) { 1166 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1167 } 1168 } 1169 len = cnt - pcnt; 1170 for (k=1; k<bs; k++) { 1171 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1172 cnt += len; 1173 } 1174 } 1175 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1176 1177 /* store the columns to the file */ 1178 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1179 if (!rank) { 1180 MPI_Status status; 1181 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1182 for (i=1; i<size; i++) { 1183 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1184 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1185 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1186 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1187 } 1188 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1189 } else { 1190 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1191 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1192 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1193 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1194 } 1195 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1196 1197 /* load up the numerical values */ 1198 ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1199 cnt = 0; 1200 for (i=0; i<a->mbs; i++) { 1201 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1202 for (j=B->i[i]; j<B->i[i+1]; j++) { 1203 if ( garray[B->j[j]] > cstart) break; 1204 for (l=0; l<bs; l++) { 1205 for (ll=0; ll<bs; ll++) { 1206 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1207 } 1208 } 1209 cnt += bs; 1210 } 1211 for (k=A->i[i]; k<A->i[i+1]; k++) { 1212 for (l=0; l<bs; l++) { 1213 for (ll=0; ll<bs; ll++) { 1214 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1215 } 1216 } 1217 cnt += bs; 1218 } 1219 for (; j<B->i[i+1]; j++) { 1220 for (l=0; l<bs; l++) { 1221 for (ll=0; ll<bs; ll++) { 1222 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1223 } 1224 } 1225 cnt += bs; 1226 } 1227 cnt += (bs-1)*rlen; 1228 } 1229 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1230 1231 /* store the column values to the file */ 1232 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1233 if (!rank) { 1234 MPI_Status status; 1235 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1236 for (i=1; i<size; i++) { 1237 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1238 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1239 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1240 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1241 } 1242 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1243 } else { 1244 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1245 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1246 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1247 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1248 } 1249 ierr = PetscFree(column_values);CHKERRQ(ierr); 1250 1251 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1252 if (file) { 1253 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1254 } 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatView_MPIBAIJ" 1260 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1261 { 1262 PetscErrorCode ierr; 1263 PetscBool iascii,isdraw,issocket,isbinary; 1264 1265 PetscFunctionBegin; 1266 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1267 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1268 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1269 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1270 if (iascii || isdraw || issocket) { 1271 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1272 } else if (isbinary) { 1273 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1274 } else { 1275 SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1276 } 1277 PetscFunctionReturn(0); 1278 } 1279 1280 #undef __FUNCT__ 1281 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1282 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1283 { 1284 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1285 PetscErrorCode ierr; 1286 1287 PetscFunctionBegin; 1288 #if defined(PETSC_USE_LOG) 1289 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1290 #endif 1291 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1292 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1293 if (baij->A){ierr = MatDestroy(baij->A);CHKERRQ(ierr);} 1294 if (baij->B){ierr = MatDestroy(baij->B);CHKERRQ(ierr);} 1295 #if defined (PETSC_USE_CTABLE) 1296 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 1297 #else 1298 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1299 #endif 1300 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1301 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1302 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1303 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1304 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1305 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1306 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1307 ierr = PetscFree(baij);CHKERRQ(ierr); 1308 1309 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1310 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1311 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1312 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1313 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1314 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1315 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1316 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1317 PetscFunctionReturn(0); 1318 } 1319 1320 #undef __FUNCT__ 1321 #define __FUNCT__ "MatMult_MPIBAIJ" 1322 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1323 { 1324 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1325 PetscErrorCode ierr; 1326 PetscInt nt; 1327 1328 PetscFunctionBegin; 1329 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1330 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1331 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1332 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1333 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1334 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1335 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1336 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1337 PetscFunctionReturn(0); 1338 } 1339 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1342 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1343 { 1344 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1345 PetscErrorCode ierr; 1346 1347 PetscFunctionBegin; 1348 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1349 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1350 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1351 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1352 PetscFunctionReturn(0); 1353 } 1354 1355 #undef __FUNCT__ 1356 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1357 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1358 { 1359 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1360 PetscErrorCode ierr; 1361 PetscBool merged; 1362 1363 PetscFunctionBegin; 1364 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1365 /* do nondiagonal part */ 1366 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1367 if (!merged) { 1368 /* send it on its way */ 1369 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1370 /* do local part */ 1371 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1372 /* receive remote parts: note this assumes the values are not actually */ 1373 /* inserted in yy until the next line */ 1374 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1375 } else { 1376 /* do local part */ 1377 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1378 /* send it on its way */ 1379 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1380 /* values actually were received in the Begin() but we need to call this nop */ 1381 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1382 } 1383 PetscFunctionReturn(0); 1384 } 1385 1386 #undef __FUNCT__ 1387 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1388 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1389 { 1390 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1391 PetscErrorCode ierr; 1392 1393 PetscFunctionBegin; 1394 /* do nondiagonal part */ 1395 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1396 /* send it on its way */ 1397 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1398 /* do local part */ 1399 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1400 /* receive remote parts: note this assumes the values are not actually */ 1401 /* inserted in yy until the next line, which is true for my implementation*/ 1402 /* but is not perhaps always true. */ 1403 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1404 PetscFunctionReturn(0); 1405 } 1406 1407 /* 1408 This only works correctly for square matrices where the subblock A->A is the 1409 diagonal block 1410 */ 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1413 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1414 { 1415 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1416 PetscErrorCode ierr; 1417 1418 PetscFunctionBegin; 1419 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1420 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1421 PetscFunctionReturn(0); 1422 } 1423 1424 #undef __FUNCT__ 1425 #define __FUNCT__ "MatScale_MPIBAIJ" 1426 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1427 { 1428 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1429 PetscErrorCode ierr; 1430 1431 PetscFunctionBegin; 1432 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1433 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1434 PetscFunctionReturn(0); 1435 } 1436 1437 #undef __FUNCT__ 1438 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1439 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1440 { 1441 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1442 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1443 PetscErrorCode ierr; 1444 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1445 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1446 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1447 1448 PetscFunctionBegin; 1449 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1450 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1451 mat->getrowactive = PETSC_TRUE; 1452 1453 if (!mat->rowvalues && (idx || v)) { 1454 /* 1455 allocate enough space to hold information from the longest row. 1456 */ 1457 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1458 PetscInt max = 1,mbs = mat->mbs,tmp; 1459 for (i=0; i<mbs; i++) { 1460 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1461 if (max < tmp) { max = tmp; } 1462 } 1463 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1464 } 1465 lrow = row - brstart; 1466 1467 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1468 if (!v) {pvA = 0; pvB = 0;} 1469 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1470 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1471 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1472 nztot = nzA + nzB; 1473 1474 cmap = mat->garray; 1475 if (v || idx) { 1476 if (nztot) { 1477 /* Sort by increasing column numbers, assuming A and B already sorted */ 1478 PetscInt imark = -1; 1479 if (v) { 1480 *v = v_p = mat->rowvalues; 1481 for (i=0; i<nzB; i++) { 1482 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1483 else break; 1484 } 1485 imark = i; 1486 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1487 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1488 } 1489 if (idx) { 1490 *idx = idx_p = mat->rowindices; 1491 if (imark > -1) { 1492 for (i=0; i<imark; i++) { 1493 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1494 } 1495 } else { 1496 for (i=0; i<nzB; i++) { 1497 if (cmap[cworkB[i]/bs] < cstart) 1498 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1499 else break; 1500 } 1501 imark = i; 1502 } 1503 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1504 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1505 } 1506 } else { 1507 if (idx) *idx = 0; 1508 if (v) *v = 0; 1509 } 1510 } 1511 *nz = nztot; 1512 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1513 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1514 PetscFunctionReturn(0); 1515 } 1516 1517 #undef __FUNCT__ 1518 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1519 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1520 { 1521 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1522 1523 PetscFunctionBegin; 1524 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1525 baij->getrowactive = PETSC_FALSE; 1526 PetscFunctionReturn(0); 1527 } 1528 1529 #undef __FUNCT__ 1530 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1531 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1532 { 1533 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1534 PetscErrorCode ierr; 1535 1536 PetscFunctionBegin; 1537 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1538 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1539 PetscFunctionReturn(0); 1540 } 1541 1542 #undef __FUNCT__ 1543 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1544 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1545 { 1546 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1547 Mat A = a->A,B = a->B; 1548 PetscErrorCode ierr; 1549 PetscReal isend[5],irecv[5]; 1550 1551 PetscFunctionBegin; 1552 info->block_size = (PetscReal)matin->rmap->bs; 1553 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1554 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1555 isend[3] = info->memory; isend[4] = info->mallocs; 1556 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1557 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1558 isend[3] += info->memory; isend[4] += info->mallocs; 1559 if (flag == MAT_LOCAL) { 1560 info->nz_used = isend[0]; 1561 info->nz_allocated = isend[1]; 1562 info->nz_unneeded = isend[2]; 1563 info->memory = isend[3]; 1564 info->mallocs = isend[4]; 1565 } else if (flag == MAT_GLOBAL_MAX) { 1566 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1567 info->nz_used = irecv[0]; 1568 info->nz_allocated = irecv[1]; 1569 info->nz_unneeded = irecv[2]; 1570 info->memory = irecv[3]; 1571 info->mallocs = irecv[4]; 1572 } else if (flag == MAT_GLOBAL_SUM) { 1573 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1574 info->nz_used = irecv[0]; 1575 info->nz_allocated = irecv[1]; 1576 info->nz_unneeded = irecv[2]; 1577 info->memory = irecv[3]; 1578 info->mallocs = irecv[4]; 1579 } else { 1580 SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1581 } 1582 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1583 info->fill_ratio_needed = 0; 1584 info->factor_mallocs = 0; 1585 PetscFunctionReturn(0); 1586 } 1587 1588 #undef __FUNCT__ 1589 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1590 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1591 { 1592 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1593 PetscErrorCode ierr; 1594 1595 PetscFunctionBegin; 1596 switch (op) { 1597 case MAT_NEW_NONZERO_LOCATIONS: 1598 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1599 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1600 case MAT_KEEP_NONZERO_PATTERN: 1601 case MAT_NEW_NONZERO_LOCATION_ERR: 1602 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1603 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1604 break; 1605 case MAT_ROW_ORIENTED: 1606 a->roworiented = flg; 1607 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1608 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1609 break; 1610 case MAT_NEW_DIAGONALS: 1611 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1612 break; 1613 case MAT_IGNORE_OFF_PROC_ENTRIES: 1614 a->donotstash = flg; 1615 break; 1616 case MAT_USE_HASH_TABLE: 1617 a->ht_flag = flg; 1618 break; 1619 case MAT_SYMMETRIC: 1620 case MAT_STRUCTURALLY_SYMMETRIC: 1621 case MAT_HERMITIAN: 1622 case MAT_SYMMETRY_ETERNAL: 1623 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1624 break; 1625 default: 1626 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op); 1627 } 1628 PetscFunctionReturn(0); 1629 } 1630 1631 #undef __FUNCT__ 1632 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1633 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1634 { 1635 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1636 Mat_SeqBAIJ *Aloc; 1637 Mat B; 1638 PetscErrorCode ierr; 1639 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1640 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1641 MatScalar *a; 1642 1643 PetscFunctionBegin; 1644 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1645 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1646 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1647 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1648 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1649 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1650 } else { 1651 B = *matout; 1652 } 1653 1654 /* copy over the A part */ 1655 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1656 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1657 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1658 1659 for (i=0; i<mbs; i++) { 1660 rvals[0] = bs*(baij->rstartbs + i); 1661 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1662 for (j=ai[i]; j<ai[i+1]; j++) { 1663 col = (baij->cstartbs+aj[j])*bs; 1664 for (k=0; k<bs; k++) { 1665 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1666 col++; a += bs; 1667 } 1668 } 1669 } 1670 /* copy over the B part */ 1671 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1672 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1673 for (i=0; i<mbs; i++) { 1674 rvals[0] = bs*(baij->rstartbs + i); 1675 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1676 for (j=ai[i]; j<ai[i+1]; j++) { 1677 col = baij->garray[aj[j]]*bs; 1678 for (k=0; k<bs; k++) { 1679 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1680 col++; a += bs; 1681 } 1682 } 1683 } 1684 ierr = PetscFree(rvals);CHKERRQ(ierr); 1685 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1686 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1687 1688 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1689 *matout = B; 1690 } else { 1691 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1692 } 1693 PetscFunctionReturn(0); 1694 } 1695 1696 #undef __FUNCT__ 1697 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1698 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1699 { 1700 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1701 Mat a = baij->A,b = baij->B; 1702 PetscErrorCode ierr; 1703 PetscInt s1,s2,s3; 1704 1705 PetscFunctionBegin; 1706 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1707 if (rr) { 1708 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1709 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1710 /* Overlap communication with computation. */ 1711 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1712 } 1713 if (ll) { 1714 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1715 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1716 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1717 } 1718 /* scale the diagonal block */ 1719 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1720 1721 if (rr) { 1722 /* Do a scatter end and then right scale the off-diagonal block */ 1723 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1724 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1725 } 1726 1727 PetscFunctionReturn(0); 1728 } 1729 1730 #undef __FUNCT__ 1731 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1732 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1733 { 1734 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1735 PetscErrorCode ierr; 1736 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1737 PetscInt i,*owners = A->rmap->range; 1738 PetscInt *nprocs,j,idx,nsends,row; 1739 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1740 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1741 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1742 MPI_Comm comm = ((PetscObject)A)->comm; 1743 MPI_Request *send_waits,*recv_waits; 1744 MPI_Status recv_status,*send_status; 1745 const PetscScalar *xx; 1746 PetscScalar *bb; 1747 #if defined(PETSC_DEBUG) 1748 PetscBool found = PETSC_FALSE; 1749 #endif 1750 1751 PetscFunctionBegin; 1752 /* first count number of contributors to each processor */ 1753 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1754 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1755 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1756 j = 0; 1757 for (i=0; i<N; i++) { 1758 if (lastidx > (idx = rows[i])) j = 0; 1759 lastidx = idx; 1760 for (; j<size; j++) { 1761 if (idx >= owners[j] && idx < owners[j+1]) { 1762 nprocs[2*j]++; 1763 nprocs[2*j+1] = 1; 1764 owner[i] = j; 1765 #if defined(PETSC_DEBUG) 1766 found = PETSC_TRUE; 1767 #endif 1768 break; 1769 } 1770 } 1771 #if defined(PETSC_DEBUG) 1772 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1773 found = PETSC_FALSE; 1774 #endif 1775 } 1776 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1777 1778 if (A->nooffproczerorows) { 1779 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 1780 nrecvs = nsends; 1781 nmax = N; 1782 } else { 1783 /* inform other processors of number of messages and max length*/ 1784 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1785 } 1786 1787 /* post receives: */ 1788 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1789 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1790 for (i=0; i<nrecvs; i++) { 1791 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1792 } 1793 1794 /* do sends: 1795 1) starts[i] gives the starting index in svalues for stuff going to 1796 the ith processor 1797 */ 1798 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1799 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1800 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1801 starts[0] = 0; 1802 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1803 for (i=0; i<N; i++) { 1804 svalues[starts[owner[i]]++] = rows[i]; 1805 } 1806 1807 starts[0] = 0; 1808 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1809 count = 0; 1810 for (i=0; i<size; i++) { 1811 if (nprocs[2*i+1]) { 1812 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1813 } 1814 } 1815 ierr = PetscFree(starts);CHKERRQ(ierr); 1816 1817 base = owners[rank]; 1818 1819 /* wait on receives */ 1820 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1821 count = nrecvs; 1822 slen = 0; 1823 while (count) { 1824 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1825 /* unpack receives into our local space */ 1826 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1827 source[imdex] = recv_status.MPI_SOURCE; 1828 lens[imdex] = n; 1829 slen += n; 1830 count--; 1831 } 1832 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1833 1834 /* move the data into the send scatter */ 1835 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1836 count = 0; 1837 for (i=0; i<nrecvs; i++) { 1838 values = rvalues + i*nmax; 1839 for (j=0; j<lens[i]; j++) { 1840 lrows[count++] = values[j] - base; 1841 } 1842 } 1843 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1844 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1845 ierr = PetscFree(owner);CHKERRQ(ierr); 1846 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1847 1848 /* fix right hand side if needed */ 1849 if (x && b) { 1850 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1851 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1852 for (i=0; i<slen; i++) { 1853 bb[lrows[i]] = diag*xx[lrows[i]]; 1854 } 1855 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1856 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1857 } 1858 1859 /* actually zap the local rows */ 1860 /* 1861 Zero the required rows. If the "diagonal block" of the matrix 1862 is square and the user wishes to set the diagonal we use separate 1863 code so that MatSetValues() is not called for each diagonal allocating 1864 new memory, thus calling lots of mallocs and slowing things down. 1865 1866 */ 1867 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1868 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1869 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1870 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 1871 } else if (diag != 0.0) { 1872 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1873 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1874 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1875 for (i=0; i<slen; i++) { 1876 row = lrows[i] + rstart_bs; 1877 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1878 } 1879 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1880 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1881 } else { 1882 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1883 } 1884 1885 ierr = PetscFree(lrows);CHKERRQ(ierr); 1886 1887 /* wait on sends */ 1888 if (nsends) { 1889 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1890 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1891 ierr = PetscFree(send_status);CHKERRQ(ierr); 1892 } 1893 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1894 ierr = PetscFree(svalues);CHKERRQ(ierr); 1895 1896 PetscFunctionReturn(0); 1897 } 1898 1899 #undef __FUNCT__ 1900 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1901 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1902 { 1903 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1904 PetscErrorCode ierr; 1905 1906 PetscFunctionBegin; 1907 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1908 PetscFunctionReturn(0); 1909 } 1910 1911 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1912 1913 #undef __FUNCT__ 1914 #define __FUNCT__ "MatEqual_MPIBAIJ" 1915 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1916 { 1917 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1918 Mat a,b,c,d; 1919 PetscBool flg; 1920 PetscErrorCode ierr; 1921 1922 PetscFunctionBegin; 1923 a = matA->A; b = matA->B; 1924 c = matB->A; d = matB->B; 1925 1926 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1927 if (flg) { 1928 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1929 } 1930 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1931 PetscFunctionReturn(0); 1932 } 1933 1934 #undef __FUNCT__ 1935 #define __FUNCT__ "MatCopy_MPIBAIJ" 1936 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1937 { 1938 PetscErrorCode ierr; 1939 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1940 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1941 1942 PetscFunctionBegin; 1943 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1944 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1945 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1946 } else { 1947 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1948 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1949 } 1950 PetscFunctionReturn(0); 1951 } 1952 1953 #undef __FUNCT__ 1954 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1955 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1956 { 1957 PetscErrorCode ierr; 1958 1959 PetscFunctionBegin; 1960 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1961 PetscFunctionReturn(0); 1962 } 1963 1964 #undef __FUNCT__ 1965 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1966 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1967 { 1968 PetscErrorCode ierr; 1969 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1970 PetscBLASInt bnz,one=1; 1971 Mat_SeqBAIJ *x,*y; 1972 1973 PetscFunctionBegin; 1974 if (str == SAME_NONZERO_PATTERN) { 1975 PetscScalar alpha = a; 1976 x = (Mat_SeqBAIJ *)xx->A->data; 1977 y = (Mat_SeqBAIJ *)yy->A->data; 1978 bnz = PetscBLASIntCast(x->nz); 1979 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1980 x = (Mat_SeqBAIJ *)xx->B->data; 1981 y = (Mat_SeqBAIJ *)yy->B->data; 1982 bnz = PetscBLASIntCast(x->nz); 1983 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1984 } else { 1985 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1986 } 1987 PetscFunctionReturn(0); 1988 } 1989 1990 #undef __FUNCT__ 1991 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ" 1992 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs) 1993 { 1994 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1995 PetscInt rbs,cbs; 1996 PetscErrorCode ierr; 1997 1998 PetscFunctionBegin; 1999 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 2000 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2001 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 2002 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 2003 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 2004 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 2005 PetscFunctionReturn(0); 2006 } 2007 2008 #undef __FUNCT__ 2009 #define __FUNCT__ "MatRealPart_MPIBAIJ" 2010 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 2011 { 2012 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2013 PetscErrorCode ierr; 2014 2015 PetscFunctionBegin; 2016 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2017 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2018 PetscFunctionReturn(0); 2019 } 2020 2021 #undef __FUNCT__ 2022 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2023 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2024 { 2025 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2026 PetscErrorCode ierr; 2027 2028 PetscFunctionBegin; 2029 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2030 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2031 PetscFunctionReturn(0); 2032 } 2033 2034 #undef __FUNCT__ 2035 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2036 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2037 { 2038 PetscErrorCode ierr; 2039 IS iscol_local; 2040 PetscInt csize; 2041 2042 PetscFunctionBegin; 2043 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2044 if (call == MAT_REUSE_MATRIX) { 2045 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2046 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2047 } else { 2048 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2049 } 2050 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2051 if (call == MAT_INITIAL_MATRIX) { 2052 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2053 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 2054 } 2055 PetscFunctionReturn(0); 2056 } 2057 2058 #undef __FUNCT__ 2059 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2060 /* 2061 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2062 in local and then by concatenating the local matrices the end result. 2063 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2064 */ 2065 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2066 { 2067 PetscErrorCode ierr; 2068 PetscMPIInt rank,size; 2069 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2070 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2071 Mat *local,M,Mreuse; 2072 MatScalar *vwork,*aa; 2073 MPI_Comm comm = ((PetscObject)mat)->comm; 2074 Mat_SeqBAIJ *aij; 2075 2076 2077 PetscFunctionBegin; 2078 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2079 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2080 2081 if (call == MAT_REUSE_MATRIX) { 2082 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2083 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2084 local = &Mreuse; 2085 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2086 } else { 2087 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2088 Mreuse = *local; 2089 ierr = PetscFree(local);CHKERRQ(ierr); 2090 } 2091 2092 /* 2093 m - number of local rows 2094 n - number of columns (same on all processors) 2095 rstart - first row in new global matrix generated 2096 */ 2097 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2098 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2099 m = m/bs; 2100 n = n/bs; 2101 2102 if (call == MAT_INITIAL_MATRIX) { 2103 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2104 ii = aij->i; 2105 jj = aij->j; 2106 2107 /* 2108 Determine the number of non-zeros in the diagonal and off-diagonal 2109 portions of the matrix in order to do correct preallocation 2110 */ 2111 2112 /* first get start and end of "diagonal" columns */ 2113 if (csize == PETSC_DECIDE) { 2114 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2115 if (mglobal == n*bs) { /* square matrix */ 2116 nlocal = m; 2117 } else { 2118 nlocal = n/size + ((n % size) > rank); 2119 } 2120 } else { 2121 nlocal = csize/bs; 2122 } 2123 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2124 rstart = rend - nlocal; 2125 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2126 2127 /* next, compute all the lengths */ 2128 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2129 olens = dlens + m; 2130 for (i=0; i<m; i++) { 2131 jend = ii[i+1] - ii[i]; 2132 olen = 0; 2133 dlen = 0; 2134 for (j=0; j<jend; j++) { 2135 if (*jj < rstart || *jj >= rend) olen++; 2136 else dlen++; 2137 jj++; 2138 } 2139 olens[i] = olen; 2140 dlens[i] = dlen; 2141 } 2142 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2143 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2144 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2145 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2146 ierr = PetscFree(dlens);CHKERRQ(ierr); 2147 } else { 2148 PetscInt ml,nl; 2149 2150 M = *newmat; 2151 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2152 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2153 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2154 /* 2155 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2156 rather than the slower MatSetValues(). 2157 */ 2158 M->was_assembled = PETSC_TRUE; 2159 M->assembled = PETSC_FALSE; 2160 } 2161 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2162 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2163 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2164 ii = aij->i; 2165 jj = aij->j; 2166 aa = aij->a; 2167 for (i=0; i<m; i++) { 2168 row = rstart/bs + i; 2169 nz = ii[i+1] - ii[i]; 2170 cwork = jj; jj += nz; 2171 vwork = aa; aa += nz; 2172 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2173 } 2174 2175 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2176 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2177 *newmat = M; 2178 2179 /* save submatrix used in processor for next request */ 2180 if (call == MAT_INITIAL_MATRIX) { 2181 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2182 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2183 } 2184 2185 PetscFunctionReturn(0); 2186 } 2187 2188 #undef __FUNCT__ 2189 #define __FUNCT__ "MatPermute_MPIBAIJ" 2190 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2191 { 2192 MPI_Comm comm,pcomm; 2193 PetscInt first,local_size,nrows; 2194 const PetscInt *rows; 2195 PetscMPIInt size; 2196 IS crowp,growp,irowp,lrowp,lcolp,icolp; 2197 PetscErrorCode ierr; 2198 2199 PetscFunctionBegin; 2200 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2201 /* make a collective version of 'rowp' */ 2202 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2203 if (pcomm==comm) { 2204 crowp = rowp; 2205 } else { 2206 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2207 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2208 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2209 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2210 } 2211 /* collect the global row permutation and invert it */ 2212 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2213 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2214 if (pcomm!=comm) { 2215 ierr = ISDestroy(crowp);CHKERRQ(ierr); 2216 } 2217 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2218 /* get the local target indices */ 2219 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2220 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2221 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2222 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 2223 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2224 ierr = ISDestroy(irowp);CHKERRQ(ierr); 2225 /* the column permutation is so much easier; 2226 make a local version of 'colp' and invert it */ 2227 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2228 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2229 if (size==1) { 2230 lcolp = colp; 2231 } else { 2232 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2233 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2234 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 2235 } 2236 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2237 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2238 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2239 if (size>1) { 2240 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2241 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 2242 } 2243 /* now we just get the submatrix */ 2244 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2245 /* clean up */ 2246 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 2247 ierr = ISDestroy(icolp);CHKERRQ(ierr); 2248 PetscFunctionReturn(0); 2249 } 2250 2251 #undef __FUNCT__ 2252 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2253 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2254 { 2255 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2256 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2257 2258 PetscFunctionBegin; 2259 if (nghosts) { *nghosts = B->nbs;} 2260 if (ghosts) {*ghosts = baij->garray;} 2261 PetscFunctionReturn(0); 2262 } 2263 2264 extern PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat); 2265 2266 #undef __FUNCT__ 2267 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2268 /* 2269 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2270 */ 2271 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2272 { 2273 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2274 PetscErrorCode ierr; 2275 PetscMPIInt size,*ncolsonproc,*disp,nn; 2276 PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col; 2277 const PetscInt *is; 2278 PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj; 2279 PetscInt *rowhit,M,cstart,cend,colb; 2280 PetscInt *columnsforrow,l; 2281 IS *isa; 2282 PetscBool done,flg; 2283 ISLocalToGlobalMapping map = mat->cbmapping; 2284 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2285 2286 PetscFunctionBegin; 2287 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2288 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2289 2290 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2291 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2292 M = mat->rmap->n/bs; 2293 cstart = mat->cmap->rstart/bs; 2294 cend = mat->cmap->rend/bs; 2295 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2296 c->N = mat->cmap->N/bs; 2297 c->m = mat->rmap->n/bs; 2298 c->rstart = mat->rmap->rstart/bs; 2299 2300 c->ncolors = nis; 2301 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2302 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2303 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2304 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2305 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2306 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2307 2308 /* Allow access to data structures of local part of matrix */ 2309 if (!baij->colmap) { 2310 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2311 } 2312 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2313 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2314 2315 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2316 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2317 2318 for (i=0; i<nis; i++) { 2319 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2320 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2321 c->ncolumns[i] = n; 2322 if (n) { 2323 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2324 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2325 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2326 } else { 2327 c->columns[i] = 0; 2328 } 2329 2330 if (ctype == IS_COLORING_GLOBAL){ 2331 /* Determine the total (parallel) number of columns of this color */ 2332 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2333 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2334 2335 nn = PetscMPIIntCast(n); 2336 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2337 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2338 if (!nctot) { 2339 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2340 } 2341 2342 disp[0] = 0; 2343 for (j=1; j<size; j++) { 2344 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2345 } 2346 2347 /* Get complete list of columns for color on each processor */ 2348 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2349 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2350 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2351 } else if (ctype == IS_COLORING_GHOSTED){ 2352 /* Determine local number of columns of this color on this process, including ghost points */ 2353 nctot = n; 2354 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2355 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2356 } else { 2357 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2358 } 2359 2360 /* 2361 Mark all rows affect by these columns 2362 */ 2363 /* Temporary option to allow for debugging/testing */ 2364 flg = PETSC_FALSE; 2365 ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2366 if (!flg) {/*-----------------------------------------------------------------------------*/ 2367 /* crude, fast version */ 2368 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2369 /* loop over columns*/ 2370 for (j=0; j<nctot; j++) { 2371 if (ctype == IS_COLORING_GHOSTED) { 2372 col = ltog[cols[j]]; 2373 } else { 2374 col = cols[j]; 2375 } 2376 if (col >= cstart && col < cend) { 2377 /* column is in diagonal block of matrix */ 2378 rows = A_cj + A_ci[col-cstart]; 2379 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2380 } else { 2381 #if defined (PETSC_USE_CTABLE) 2382 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2383 colb --; 2384 #else 2385 colb = baij->colmap[col] - 1; 2386 #endif 2387 if (colb == -1) { 2388 m = 0; 2389 } else { 2390 colb = colb/bs; 2391 rows = B_cj + B_ci[colb]; 2392 m = B_ci[colb+1] - B_ci[colb]; 2393 } 2394 } 2395 /* loop over columns marking them in rowhit */ 2396 for (k=0; k<m; k++) { 2397 rowhit[*rows++] = col + 1; 2398 } 2399 } 2400 2401 /* count the number of hits */ 2402 nrows = 0; 2403 for (j=0; j<M; j++) { 2404 if (rowhit[j]) nrows++; 2405 } 2406 c->nrows[i] = nrows; 2407 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2408 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2409 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2410 nrows = 0; 2411 for (j=0; j<M; j++) { 2412 if (rowhit[j]) { 2413 c->rows[i][nrows] = j; 2414 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2415 nrows++; 2416 } 2417 } 2418 } else {/*-------------------------------------------------------------------------------*/ 2419 /* slow version, using rowhit as a linked list */ 2420 PetscInt currentcol,fm,mfm; 2421 rowhit[M] = M; 2422 nrows = 0; 2423 /* loop over columns*/ 2424 for (j=0; j<nctot; j++) { 2425 if (ctype == IS_COLORING_GHOSTED) { 2426 col = ltog[cols[j]]; 2427 } else { 2428 col = cols[j]; 2429 } 2430 if (col >= cstart && col < cend) { 2431 /* column is in diagonal block of matrix */ 2432 rows = A_cj + A_ci[col-cstart]; 2433 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2434 } else { 2435 #if defined (PETSC_USE_CTABLE) 2436 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2437 colb --; 2438 #else 2439 colb = baij->colmap[col] - 1; 2440 #endif 2441 if (colb == -1) { 2442 m = 0; 2443 } else { 2444 colb = colb/bs; 2445 rows = B_cj + B_ci[colb]; 2446 m = B_ci[colb+1] - B_ci[colb]; 2447 } 2448 } 2449 2450 /* loop over columns marking them in rowhit */ 2451 fm = M; /* fm points to first entry in linked list */ 2452 for (k=0; k<m; k++) { 2453 currentcol = *rows++; 2454 /* is it already in the list? */ 2455 do { 2456 mfm = fm; 2457 fm = rowhit[fm]; 2458 } while (fm < currentcol); 2459 /* not in list so add it */ 2460 if (fm != currentcol) { 2461 nrows++; 2462 columnsforrow[currentcol] = col; 2463 /* next three lines insert new entry into linked list */ 2464 rowhit[mfm] = currentcol; 2465 rowhit[currentcol] = fm; 2466 fm = currentcol; 2467 /* fm points to present position in list since we know the columns are sorted */ 2468 } else { 2469 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2470 } 2471 } 2472 } 2473 c->nrows[i] = nrows; 2474 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2475 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2476 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2477 /* now store the linked list of rows into c->rows[i] */ 2478 nrows = 0; 2479 fm = rowhit[M]; 2480 do { 2481 c->rows[i][nrows] = fm; 2482 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2483 fm = rowhit[fm]; 2484 } while (fm < M); 2485 } /* ---------------------------------------------------------------------------------------*/ 2486 ierr = PetscFree(cols);CHKERRQ(ierr); 2487 } 2488 2489 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2490 /* 2491 vscale will contain the "diagonal" on processor scalings followed by the off processor 2492 */ 2493 if (ctype == IS_COLORING_GLOBAL) { 2494 PetscInt *garray; 2495 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2496 for (i=0; i<baij->B->cmap->n/bs; i++) { 2497 for (j=0; j<bs; j++) { 2498 garray[i*bs+j] = bs*baij->garray[i]+j; 2499 } 2500 } 2501 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2502 ierr = PetscFree(garray);CHKERRQ(ierr); 2503 CHKMEMQ; 2504 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2505 for (k=0; k<c->ncolors; k++) { 2506 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2507 for (l=0; l<c->nrows[k]; l++) { 2508 col = c->columnsforrow[k][l]; 2509 if (col >= cstart && col < cend) { 2510 /* column is in diagonal block of matrix */ 2511 colb = col - cstart; 2512 } else { 2513 /* column is in "off-processor" part */ 2514 #if defined (PETSC_USE_CTABLE) 2515 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2516 colb --; 2517 #else 2518 colb = baij->colmap[col] - 1; 2519 #endif 2520 colb = colb/bs; 2521 colb += cend - cstart; 2522 } 2523 c->vscaleforrow[k][l] = colb; 2524 } 2525 } 2526 } else if (ctype == IS_COLORING_GHOSTED) { 2527 /* Get gtol mapping */ 2528 PetscInt N = mat->cmap->N, *gtol; 2529 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2530 for (i=0; i<N; i++) gtol[i] = -1; 2531 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2532 2533 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2534 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2535 for (k=0; k<c->ncolors; k++) { 2536 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2537 for (l=0; l<c->nrows[k]; l++) { 2538 col = c->columnsforrow[k][l]; /* global column index */ 2539 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2540 } 2541 } 2542 ierr = PetscFree(gtol);CHKERRQ(ierr); 2543 } 2544 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2545 2546 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2547 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2548 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2549 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2550 CHKMEMQ; 2551 PetscFunctionReturn(0); 2552 } 2553 2554 #undef __FUNCT__ 2555 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ" 2556 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat) 2557 { 2558 Mat B; 2559 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2560 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2561 Mat_SeqAIJ *b; 2562 PetscErrorCode ierr; 2563 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2564 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2565 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2566 2567 PetscFunctionBegin; 2568 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2569 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2570 2571 /* ---------------------------------------------------------------- 2572 Tell every processor the number of nonzeros per row 2573 */ 2574 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2575 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2576 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2577 } 2578 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2579 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2580 displs = recvcounts + size; 2581 for (i=0; i<size; i++) { 2582 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2583 displs[i] = A->rmap->range[i]/bs; 2584 } 2585 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2586 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2587 #else 2588 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2589 #endif 2590 /* --------------------------------------------------------------- 2591 Create the sequential matrix of the same type as the local block diagonal 2592 */ 2593 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2594 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2595 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2596 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2597 b = (Mat_SeqAIJ *)B->data; 2598 2599 /*-------------------------------------------------------------------- 2600 Copy my part of matrix column indices over 2601 */ 2602 sendcount = ad->nz + bd->nz; 2603 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2604 a_jsendbuf = ad->j; 2605 b_jsendbuf = bd->j; 2606 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2607 cnt = 0; 2608 for (i=0; i<n; i++) { 2609 2610 /* put in lower diagonal portion */ 2611 m = bd->i[i+1] - bd->i[i]; 2612 while (m > 0) { 2613 /* is it above diagonal (in bd (compressed) numbering) */ 2614 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2615 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2616 m--; 2617 } 2618 2619 /* put in diagonal portion */ 2620 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2621 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2622 } 2623 2624 /* put in upper diagonal portion */ 2625 while (m-- > 0) { 2626 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2627 } 2628 } 2629 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2630 2631 /*-------------------------------------------------------------------- 2632 Gather all column indices to all processors 2633 */ 2634 for (i=0; i<size; i++) { 2635 recvcounts[i] = 0; 2636 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2637 recvcounts[i] += lens[j]; 2638 } 2639 } 2640 displs[0] = 0; 2641 for (i=1; i<size; i++) { 2642 displs[i] = displs[i-1] + recvcounts[i-1]; 2643 } 2644 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2645 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2646 #else 2647 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2648 #endif 2649 /*-------------------------------------------------------------------- 2650 Assemble the matrix into useable form (note numerical values not yet set) 2651 */ 2652 /* set the b->ilen (length of each row) values */ 2653 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2654 /* set the b->i indices */ 2655 b->i[0] = 0; 2656 for (i=1; i<=A->rmap->N/bs; i++) { 2657 b->i[i] = b->i[i-1] + lens[i-1]; 2658 } 2659 ierr = PetscFree(lens);CHKERRQ(ierr); 2660 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2661 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2662 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2663 2664 if (A->symmetric){ 2665 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2666 } else if (A->hermitian) { 2667 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2668 } else if (A->structurally_symmetric) { 2669 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2670 } 2671 *newmat = B; 2672 PetscFunctionReturn(0); 2673 } 2674 2675 #undef __FUNCT__ 2676 #define __FUNCT__ "MatSOR_MPIBAIJ" 2677 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2678 { 2679 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2680 PetscErrorCode ierr; 2681 Vec bb1 = 0; 2682 2683 PetscFunctionBegin; 2684 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2685 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2686 } 2687 2688 if (flag == SOR_APPLY_UPPER) { 2689 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2690 PetscFunctionReturn(0); 2691 } 2692 2693 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2694 if (flag & SOR_ZERO_INITIAL_GUESS) { 2695 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2696 its--; 2697 } 2698 2699 while (its--) { 2700 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2701 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2702 2703 /* update rhs: bb1 = bb - B*x */ 2704 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2705 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2706 2707 /* local sweep */ 2708 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2709 } 2710 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2711 if (flag & SOR_ZERO_INITIAL_GUESS) { 2712 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2713 its--; 2714 } 2715 while (its--) { 2716 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2717 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2718 2719 /* update rhs: bb1 = bb - B*x */ 2720 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2721 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2722 2723 /* local sweep */ 2724 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2725 } 2726 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2727 if (flag & SOR_ZERO_INITIAL_GUESS) { 2728 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2729 its--; 2730 } 2731 while (its--) { 2732 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2733 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2734 2735 /* update rhs: bb1 = bb - B*x */ 2736 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2737 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2738 2739 /* local sweep */ 2740 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2741 } 2742 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2743 2744 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 2745 PetscFunctionReturn(0); 2746 } 2747 2748 extern PetscErrorCode MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2749 2750 2751 /* -------------------------------------------------------------------*/ 2752 static struct _MatOps MatOps_Values = { 2753 MatSetValues_MPIBAIJ, 2754 MatGetRow_MPIBAIJ, 2755 MatRestoreRow_MPIBAIJ, 2756 MatMult_MPIBAIJ, 2757 /* 4*/ MatMultAdd_MPIBAIJ, 2758 MatMultTranspose_MPIBAIJ, 2759 MatMultTransposeAdd_MPIBAIJ, 2760 0, 2761 0, 2762 0, 2763 /*10*/ 0, 2764 0, 2765 0, 2766 MatSOR_MPIBAIJ, 2767 MatTranspose_MPIBAIJ, 2768 /*15*/ MatGetInfo_MPIBAIJ, 2769 MatEqual_MPIBAIJ, 2770 MatGetDiagonal_MPIBAIJ, 2771 MatDiagonalScale_MPIBAIJ, 2772 MatNorm_MPIBAIJ, 2773 /*20*/ MatAssemblyBegin_MPIBAIJ, 2774 MatAssemblyEnd_MPIBAIJ, 2775 MatSetOption_MPIBAIJ, 2776 MatZeroEntries_MPIBAIJ, 2777 /*24*/ MatZeroRows_MPIBAIJ, 2778 0, 2779 0, 2780 0, 2781 0, 2782 /*29*/ MatSetUpPreallocation_MPIBAIJ, 2783 0, 2784 0, 2785 0, 2786 0, 2787 /*34*/ MatDuplicate_MPIBAIJ, 2788 0, 2789 0, 2790 0, 2791 0, 2792 /*39*/ MatAXPY_MPIBAIJ, 2793 MatGetSubMatrices_MPIBAIJ, 2794 MatIncreaseOverlap_MPIBAIJ, 2795 MatGetValues_MPIBAIJ, 2796 MatCopy_MPIBAIJ, 2797 /*44*/ 0, 2798 MatScale_MPIBAIJ, 2799 0, 2800 0, 2801 0, 2802 /*49*/ MatSetBlockSize_MPIBAIJ, 2803 0, 2804 0, 2805 0, 2806 0, 2807 /*54*/ MatFDColoringCreate_MPIBAIJ, 2808 0, 2809 MatSetUnfactored_MPIBAIJ, 2810 MatPermute_MPIBAIJ, 2811 MatSetValuesBlocked_MPIBAIJ, 2812 /*59*/ MatGetSubMatrix_MPIBAIJ, 2813 MatDestroy_MPIBAIJ, 2814 MatView_MPIBAIJ, 2815 0, 2816 0, 2817 /*64*/ 0, 2818 0, 2819 0, 2820 0, 2821 0, 2822 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2823 0, 2824 0, 2825 0, 2826 0, 2827 /*74*/ 0, 2828 MatFDColoringApply_BAIJ, 2829 0, 2830 0, 2831 0, 2832 /*79*/ 0, 2833 0, 2834 0, 2835 0, 2836 MatLoad_MPIBAIJ, 2837 /*84*/ 0, 2838 0, 2839 0, 2840 0, 2841 0, 2842 /*89*/ 0, 2843 0, 2844 0, 2845 0, 2846 0, 2847 /*94*/ 0, 2848 0, 2849 0, 2850 0, 2851 0, 2852 /*99*/ 0, 2853 0, 2854 0, 2855 0, 2856 0, 2857 /*104*/0, 2858 MatRealPart_MPIBAIJ, 2859 MatImaginaryPart_MPIBAIJ, 2860 0, 2861 0, 2862 /*109*/0, 2863 0, 2864 0, 2865 0, 2866 0, 2867 /*114*/MatGetSeqNonzerostructure_MPIBAIJ, 2868 0, 2869 MatGetGhosts_MPIBAIJ, 2870 0, 2871 0, 2872 /*119*/0, 2873 0, 2874 0, 2875 0 2876 }; 2877 2878 EXTERN_C_BEGIN 2879 #undef __FUNCT__ 2880 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2881 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscBool *iscopy,MatReuse reuse,Mat *a) 2882 { 2883 PetscFunctionBegin; 2884 *a = ((Mat_MPIBAIJ *)A->data)->A; 2885 *iscopy = PETSC_FALSE; 2886 PetscFunctionReturn(0); 2887 } 2888 EXTERN_C_END 2889 2890 EXTERN_C_BEGIN 2891 extern PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2892 EXTERN_C_END 2893 2894 EXTERN_C_BEGIN 2895 #undef __FUNCT__ 2896 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2897 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2898 { 2899 PetscInt m,rstart,cstart,cend; 2900 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2901 const PetscInt *JJ=0; 2902 PetscScalar *values=0; 2903 PetscErrorCode ierr; 2904 2905 PetscFunctionBegin; 2906 2907 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2908 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2909 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2910 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2911 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2912 m = B->rmap->n/bs; 2913 rstart = B->rmap->rstart/bs; 2914 cstart = B->cmap->rstart/bs; 2915 cend = B->cmap->rend/bs; 2916 2917 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2918 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2919 for (i=0; i<m; i++) { 2920 nz = ii[i+1] - ii[i]; 2921 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2922 nz_max = PetscMax(nz_max,nz); 2923 JJ = jj + ii[i]; 2924 for (j=0; j<nz; j++) { 2925 if (*JJ >= cstart) break; 2926 JJ++; 2927 } 2928 d = 0; 2929 for (; j<nz; j++) { 2930 if (*JJ++ >= cend) break; 2931 d++; 2932 } 2933 d_nnz[i] = d; 2934 o_nnz[i] = nz - d; 2935 } 2936 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2937 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2938 2939 values = (PetscScalar*)V; 2940 if (!values) { 2941 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2942 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2943 } 2944 for (i=0; i<m; i++) { 2945 PetscInt row = i + rstart; 2946 PetscInt ncols = ii[i+1] - ii[i]; 2947 const PetscInt *icols = jj + ii[i]; 2948 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2949 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2950 } 2951 2952 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2953 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2954 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2955 2956 PetscFunctionReturn(0); 2957 } 2958 EXTERN_C_END 2959 2960 #undef __FUNCT__ 2961 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2962 /*@C 2963 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2964 (the default parallel PETSc format). 2965 2966 Collective on MPI_Comm 2967 2968 Input Parameters: 2969 + A - the matrix 2970 . bs - the block size 2971 . i - the indices into j for the start of each local row (starts with zero) 2972 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2973 - v - optional values in the matrix 2974 2975 Level: developer 2976 2977 .keywords: matrix, aij, compressed row, sparse, parallel 2978 2979 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2980 @*/ 2981 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2982 { 2983 PetscErrorCode ierr; 2984 2985 PetscFunctionBegin; 2986 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2987 PetscFunctionReturn(0); 2988 } 2989 2990 EXTERN_C_BEGIN 2991 #undef __FUNCT__ 2992 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2993 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2994 { 2995 Mat_MPIBAIJ *b; 2996 PetscErrorCode ierr; 2997 PetscInt i, newbs = PetscAbs(bs); 2998 2999 PetscFunctionBegin; 3000 if (bs < 0) { 3001 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 3002 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 3003 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3004 bs = PetscAbs(bs); 3005 } 3006 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 3007 bs = newbs; 3008 3009 3010 if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 3011 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3012 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3013 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3014 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3015 3016 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3017 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3018 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3019 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3020 3021 if (d_nnz) { 3022 for (i=0; i<B->rmap->n/bs; i++) { 3023 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 3024 } 3025 } 3026 if (o_nnz) { 3027 for (i=0; i<B->rmap->n/bs; i++) { 3028 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 3029 } 3030 } 3031 3032 b = (Mat_MPIBAIJ*)B->data; 3033 b->bs2 = bs*bs; 3034 b->mbs = B->rmap->n/bs; 3035 b->nbs = B->cmap->n/bs; 3036 b->Mbs = B->rmap->N/bs; 3037 b->Nbs = B->cmap->N/bs; 3038 3039 for (i=0; i<=b->size; i++) { 3040 b->rangebs[i] = B->rmap->range[i]/bs; 3041 } 3042 b->rstartbs = B->rmap->rstart/bs; 3043 b->rendbs = B->rmap->rend/bs; 3044 b->cstartbs = B->cmap->rstart/bs; 3045 b->cendbs = B->cmap->rend/bs; 3046 3047 if (!B->preallocated) { 3048 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3049 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3050 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3051 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3052 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3053 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3054 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3055 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3056 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 3057 } 3058 3059 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3060 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3061 B->preallocated = PETSC_TRUE; 3062 PetscFunctionReturn(0); 3063 } 3064 EXTERN_C_END 3065 3066 EXTERN_C_BEGIN 3067 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3068 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3069 EXTERN_C_END 3070 3071 3072 EXTERN_C_BEGIN 3073 #undef __FUNCT__ 3074 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3075 PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj) 3076 { 3077 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3078 PetscErrorCode ierr; 3079 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3080 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3081 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3082 3083 PetscFunctionBegin; 3084 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 3085 ii[0] = 0; 3086 CHKMEMQ; 3087 for (i=0; i<M; i++) { 3088 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3089 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3090 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3091 /* remove one from count of matrix has diagonal */ 3092 for (j=id[i]; j<id[i+1]; j++) { 3093 if (jd[j] == i) {ii[i+1]--;break;} 3094 } 3095 CHKMEMQ; 3096 } 3097 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3098 cnt = 0; 3099 for (i=0; i<M; i++) { 3100 for (j=io[i]; j<io[i+1]; j++) { 3101 if (garray[jo[j]] > rstart) break; 3102 jj[cnt++] = garray[jo[j]]; 3103 CHKMEMQ; 3104 } 3105 for (k=id[i]; k<id[i+1]; k++) { 3106 if (jd[k] != i) { 3107 jj[cnt++] = rstart + jd[k]; 3108 CHKMEMQ; 3109 } 3110 } 3111 for (;j<io[i+1]; j++) { 3112 jj[cnt++] = garray[jo[j]]; 3113 CHKMEMQ; 3114 } 3115 } 3116 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 3117 PetscFunctionReturn(0); 3118 } 3119 EXTERN_C_END 3120 3121 #include "../src/mat/impls/aij/mpi/mpiaij.h" 3122 EXTERN_C_BEGIN 3123 PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*); 3124 EXTERN_C_END 3125 3126 EXTERN_C_BEGIN 3127 #undef __FUNCT__ 3128 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 3129 PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat) 3130 { 3131 PetscErrorCode ierr; 3132 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3133 Mat B; 3134 Mat_MPIAIJ *b; 3135 3136 PetscFunctionBegin; 3137 if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled"); 3138 3139 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 3140 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3141 ierr = MatSetType(B,newtype);CHKERRQ(ierr); 3142 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 3143 b = (Mat_MPIAIJ*) B->data; 3144 3145 ierr = MatDestroy(b->A);CHKERRQ(ierr); 3146 ierr = MatDestroy(b->B);CHKERRQ(ierr); 3147 ierr = DisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 3148 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 3149 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 3150 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3151 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3152 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3153 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3154 if (reuse == MAT_REUSE_MATRIX) { 3155 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3156 } else { 3157 *newmat = B; 3158 } 3159 PetscFunctionReturn(0); 3160 } 3161 EXTERN_C_END 3162 3163 EXTERN_C_BEGIN 3164 #if defined(PETSC_HAVE_MUMPS) 3165 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3166 #endif 3167 EXTERN_C_END 3168 3169 /*MC 3170 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3171 3172 Options Database Keys: 3173 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3174 . -mat_block_size <bs> - set the blocksize used to store the matrix 3175 - -mat_use_hash_table <fact> 3176 3177 Level: beginner 3178 3179 .seealso: MatCreateMPIBAIJ 3180 M*/ 3181 3182 EXTERN_C_BEGIN 3183 #undef __FUNCT__ 3184 #define __FUNCT__ "MatCreate_MPIBAIJ" 3185 PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3186 { 3187 Mat_MPIBAIJ *b; 3188 PetscErrorCode ierr; 3189 PetscBool flg; 3190 3191 PetscFunctionBegin; 3192 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 3193 B->data = (void*)b; 3194 3195 3196 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3197 B->assembled = PETSC_FALSE; 3198 3199 B->insertmode = NOT_SET_VALUES; 3200 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 3201 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 3202 3203 /* build local table of row and column ownerships */ 3204 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 3205 3206 /* build cache for off array entries formed */ 3207 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 3208 b->donotstash = PETSC_FALSE; 3209 b->colmap = PETSC_NULL; 3210 b->garray = PETSC_NULL; 3211 b->roworiented = PETSC_TRUE; 3212 3213 /* stuff used in block assembly */ 3214 b->barray = 0; 3215 3216 /* stuff used for matrix vector multiply */ 3217 b->lvec = 0; 3218 b->Mvctx = 0; 3219 3220 /* stuff for MatGetRow() */ 3221 b->rowindices = 0; 3222 b->rowvalues = 0; 3223 b->getrowactive = PETSC_FALSE; 3224 3225 /* hash table stuff */ 3226 b->ht = 0; 3227 b->hd = 0; 3228 b->ht_size = 0; 3229 b->ht_flag = PETSC_FALSE; 3230 b->ht_fact = 0; 3231 b->ht_total_ct = 0; 3232 b->ht_insert_ct = 0; 3233 3234 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3235 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3236 if (flg) { 3237 PetscReal fact = 1.39; 3238 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3239 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 3240 if (fact <= 1.0) fact = 1.39; 3241 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3242 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3243 } 3244 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3245 3246 #if defined(PETSC_HAVE_MUMPS) 3247 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3248 #endif 3249 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3250 "MatConvert_MPIBAIJ_MPIAdj", 3251 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3252 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C", 3253 "MatConvert_MPIBAIJ_MPIAIJ", 3254 MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3255 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3256 "MatStoreValues_MPIBAIJ", 3257 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3258 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3259 "MatRetrieveValues_MPIBAIJ", 3260 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3261 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3262 "MatGetDiagonalBlock_MPIBAIJ", 3263 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3264 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3265 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3266 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3267 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3268 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3269 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3270 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3271 "MatDiagonalScaleLocal_MPIBAIJ", 3272 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3273 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3274 "MatSetHashTableFactor_MPIBAIJ", 3275 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3276 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3277 PetscFunctionReturn(0); 3278 } 3279 EXTERN_C_END 3280 3281 /*MC 3282 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3283 3284 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3285 and MATMPIBAIJ otherwise. 3286 3287 Options Database Keys: 3288 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3289 3290 Level: beginner 3291 3292 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3293 M*/ 3294 3295 EXTERN_C_BEGIN 3296 #undef __FUNCT__ 3297 #define __FUNCT__ "MatCreate_BAIJ" 3298 PetscErrorCode MatCreate_BAIJ(Mat A) 3299 { 3300 PetscErrorCode ierr; 3301 PetscMPIInt size; 3302 3303 PetscFunctionBegin; 3304 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3305 if (size == 1) { 3306 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 3307 } else { 3308 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 3309 } 3310 PetscFunctionReturn(0); 3311 } 3312 EXTERN_C_END 3313 3314 #undef __FUNCT__ 3315 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3316 /*@C 3317 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3318 (block compressed row). For good matrix assembly performance 3319 the user should preallocate the matrix storage by setting the parameters 3320 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3321 performance can be increased by more than a factor of 50. 3322 3323 Collective on Mat 3324 3325 Input Parameters: 3326 + A - the matrix 3327 . bs - size of blockk 3328 . d_nz - number of block nonzeros per block row in diagonal portion of local 3329 submatrix (same for all local rows) 3330 . d_nnz - array containing the number of block nonzeros in the various block rows 3331 of the in diagonal portion of the local (possibly different for each block 3332 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3333 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3334 submatrix (same for all local rows). 3335 - o_nnz - array containing the number of nonzeros in the various block rows of the 3336 off-diagonal portion of the local submatrix (possibly different for 3337 each block row) or PETSC_NULL. 3338 3339 If the *_nnz parameter is given then the *_nz parameter is ignored 3340 3341 Options Database Keys: 3342 + -mat_block_size - size of the blocks to use 3343 - -mat_use_hash_table <fact> 3344 3345 Notes: 3346 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3347 than it must be used on all processors that share the object for that argument. 3348 3349 Storage Information: 3350 For a square global matrix we define each processor's diagonal portion 3351 to be its local rows and the corresponding columns (a square submatrix); 3352 each processor's off-diagonal portion encompasses the remainder of the 3353 local matrix (a rectangular submatrix). 3354 3355 The user can specify preallocated storage for the diagonal part of 3356 the local submatrix with either d_nz or d_nnz (not both). Set 3357 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3358 memory allocation. Likewise, specify preallocated storage for the 3359 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3360 3361 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3362 the figure below we depict these three local rows and all columns (0-11). 3363 3364 .vb 3365 0 1 2 3 4 5 6 7 8 9 10 11 3366 ------------------- 3367 row 3 | o o o d d d o o o o o o 3368 row 4 | o o o d d d o o o o o o 3369 row 5 | o o o d d d o o o o o o 3370 ------------------- 3371 .ve 3372 3373 Thus, any entries in the d locations are stored in the d (diagonal) 3374 submatrix, and any entries in the o locations are stored in the 3375 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3376 stored simply in the MATSEQBAIJ format for compressed row storage. 3377 3378 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3379 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3380 In general, for PDE problems in which most nonzeros are near the diagonal, 3381 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3382 or you will get TERRIBLE performance; see the users' manual chapter on 3383 matrices. 3384 3385 You can call MatGetInfo() to get information on how effective the preallocation was; 3386 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3387 You can also run with the option -info and look for messages with the string 3388 malloc in them to see if additional memory allocation was needed. 3389 3390 Level: intermediate 3391 3392 .keywords: matrix, block, aij, compressed row, sparse, parallel 3393 3394 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 3395 @*/ 3396 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3397 { 3398 PetscErrorCode ierr; 3399 3400 PetscFunctionBegin; 3401 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3402 PetscFunctionReturn(0); 3403 } 3404 3405 #undef __FUNCT__ 3406 #define __FUNCT__ "MatCreateMPIBAIJ" 3407 /*@C 3408 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 3409 (block compressed row). For good matrix assembly performance 3410 the user should preallocate the matrix storage by setting the parameters 3411 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3412 performance can be increased by more than a factor of 50. 3413 3414 Collective on MPI_Comm 3415 3416 Input Parameters: 3417 + comm - MPI communicator 3418 . bs - size of blockk 3419 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3420 This value should be the same as the local size used in creating the 3421 y vector for the matrix-vector product y = Ax. 3422 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3423 This value should be the same as the local size used in creating the 3424 x vector for the matrix-vector product y = Ax. 3425 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3426 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3427 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3428 submatrix (same for all local rows) 3429 . d_nnz - array containing the number of nonzero blocks in the various block rows 3430 of the in diagonal portion of the local (possibly different for each block 3431 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3432 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3433 submatrix (same for all local rows). 3434 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3435 off-diagonal portion of the local submatrix (possibly different for 3436 each block row) or PETSC_NULL. 3437 3438 Output Parameter: 3439 . A - the matrix 3440 3441 Options Database Keys: 3442 + -mat_block_size - size of the blocks to use 3443 - -mat_use_hash_table <fact> 3444 3445 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3446 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3447 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3448 3449 Notes: 3450 If the *_nnz parameter is given then the *_nz parameter is ignored 3451 3452 A nonzero block is any block that as 1 or more nonzeros in it 3453 3454 The user MUST specify either the local or global matrix dimensions 3455 (possibly both). 3456 3457 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3458 than it must be used on all processors that share the object for that argument. 3459 3460 Storage Information: 3461 For a square global matrix we define each processor's diagonal portion 3462 to be its local rows and the corresponding columns (a square submatrix); 3463 each processor's off-diagonal portion encompasses the remainder of the 3464 local matrix (a rectangular submatrix). 3465 3466 The user can specify preallocated storage for the diagonal part of 3467 the local submatrix with either d_nz or d_nnz (not both). Set 3468 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3469 memory allocation. Likewise, specify preallocated storage for the 3470 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3471 3472 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3473 the figure below we depict these three local rows and all columns (0-11). 3474 3475 .vb 3476 0 1 2 3 4 5 6 7 8 9 10 11 3477 ------------------- 3478 row 3 | o o o d d d o o o o o o 3479 row 4 | o o o d d d o o o o o o 3480 row 5 | o o o d d d o o o o o o 3481 ------------------- 3482 .ve 3483 3484 Thus, any entries in the d locations are stored in the d (diagonal) 3485 submatrix, and any entries in the o locations are stored in the 3486 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3487 stored simply in the MATSEQBAIJ format for compressed row storage. 3488 3489 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3490 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3491 In general, for PDE problems in which most nonzeros are near the diagonal, 3492 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3493 or you will get TERRIBLE performance; see the users' manual chapter on 3494 matrices. 3495 3496 Level: intermediate 3497 3498 .keywords: matrix, block, aij, compressed row, sparse, parallel 3499 3500 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3501 @*/ 3502 PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3503 { 3504 PetscErrorCode ierr; 3505 PetscMPIInt size; 3506 3507 PetscFunctionBegin; 3508 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3509 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3510 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3511 if (size > 1) { 3512 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3513 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3514 } else { 3515 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3516 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3517 } 3518 PetscFunctionReturn(0); 3519 } 3520 3521 #undef __FUNCT__ 3522 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3523 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3524 { 3525 Mat mat; 3526 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3527 PetscErrorCode ierr; 3528 PetscInt len=0; 3529 3530 PetscFunctionBegin; 3531 *newmat = 0; 3532 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3533 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3534 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3535 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3536 3537 mat->factortype = matin->factortype; 3538 mat->preallocated = PETSC_TRUE; 3539 mat->assembled = PETSC_TRUE; 3540 mat->insertmode = NOT_SET_VALUES; 3541 3542 a = (Mat_MPIBAIJ*)mat->data; 3543 mat->rmap->bs = matin->rmap->bs; 3544 a->bs2 = oldmat->bs2; 3545 a->mbs = oldmat->mbs; 3546 a->nbs = oldmat->nbs; 3547 a->Mbs = oldmat->Mbs; 3548 a->Nbs = oldmat->Nbs; 3549 3550 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3551 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3552 3553 a->size = oldmat->size; 3554 a->rank = oldmat->rank; 3555 a->donotstash = oldmat->donotstash; 3556 a->roworiented = oldmat->roworiented; 3557 a->rowindices = 0; 3558 a->rowvalues = 0; 3559 a->getrowactive = PETSC_FALSE; 3560 a->barray = 0; 3561 a->rstartbs = oldmat->rstartbs; 3562 a->rendbs = oldmat->rendbs; 3563 a->cstartbs = oldmat->cstartbs; 3564 a->cendbs = oldmat->cendbs; 3565 3566 /* hash table stuff */ 3567 a->ht = 0; 3568 a->hd = 0; 3569 a->ht_size = 0; 3570 a->ht_flag = oldmat->ht_flag; 3571 a->ht_fact = oldmat->ht_fact; 3572 a->ht_total_ct = 0; 3573 a->ht_insert_ct = 0; 3574 3575 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3576 if (oldmat->colmap) { 3577 #if defined (PETSC_USE_CTABLE) 3578 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3579 #else 3580 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3581 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3582 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3583 #endif 3584 } else a->colmap = 0; 3585 3586 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3587 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3588 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3589 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3590 } else a->garray = 0; 3591 3592 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3593 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3594 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3595 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3596 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3597 3598 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3599 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3600 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3601 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3602 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3603 *newmat = mat; 3604 3605 PetscFunctionReturn(0); 3606 } 3607 3608 #undef __FUNCT__ 3609 #define __FUNCT__ "MatLoad_MPIBAIJ" 3610 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3611 { 3612 PetscErrorCode ierr; 3613 int fd; 3614 PetscInt i,nz,j,rstart,rend; 3615 PetscScalar *vals,*buf; 3616 MPI_Comm comm = ((PetscObject)viewer)->comm; 3617 MPI_Status status; 3618 PetscMPIInt rank,size,maxnz; 3619 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3620 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3621 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3622 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3623 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3624 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3625 3626 PetscFunctionBegin; 3627 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3628 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3629 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3630 3631 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3632 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3633 if (!rank) { 3634 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3635 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3636 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3637 } 3638 3639 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3640 3641 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3642 M = header[1]; N = header[2]; 3643 3644 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3645 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3646 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3647 3648 /* If global sizes are set, check if they are consistent with that given in the file */ 3649 if (sizesset) { 3650 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3651 } 3652 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3653 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3654 3655 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3656 3657 /* 3658 This code adds extra rows to make sure the number of rows is 3659 divisible by the blocksize 3660 */ 3661 Mbs = M/bs; 3662 extra_rows = bs - M + bs*Mbs; 3663 if (extra_rows == bs) extra_rows = 0; 3664 else Mbs++; 3665 if (extra_rows && !rank) { 3666 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3667 } 3668 3669 /* determine ownership of all rows */ 3670 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3671 mbs = Mbs/size + ((Mbs % size) > rank); 3672 m = mbs*bs; 3673 } else { /* User set */ 3674 m = newmat->rmap->n; 3675 mbs = m/bs; 3676 } 3677 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3678 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3679 3680 /* process 0 needs enough room for process with most rows */ 3681 if (!rank) { 3682 mmax = rowners[1]; 3683 for (i=2; i<size; i++) { 3684 mmax = PetscMax(mmax,rowners[i]); 3685 } 3686 mmax*=bs; 3687 } else mmax = m; 3688 3689 rowners[0] = 0; 3690 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3691 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3692 rstart = rowners[rank]; 3693 rend = rowners[rank+1]; 3694 3695 /* distribute row lengths to all processors */ 3696 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3697 if (!rank) { 3698 mend = m; 3699 if (size == 1) mend = mend - extra_rows; 3700 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3701 for (j=mend; j<m; j++) locrowlens[j] = 1; 3702 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3703 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3704 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3705 for (j=0; j<m; j++) { 3706 procsnz[0] += locrowlens[j]; 3707 } 3708 for (i=1; i<size; i++) { 3709 mend = browners[i+1] - browners[i]; 3710 if (i == size-1) mend = mend - extra_rows; 3711 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3712 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3713 /* calculate the number of nonzeros on each processor */ 3714 for (j=0; j<browners[i+1]-browners[i]; j++) { 3715 procsnz[i] += rowlengths[j]; 3716 } 3717 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3718 } 3719 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3720 } else { 3721 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3722 } 3723 3724 if (!rank) { 3725 /* determine max buffer needed and allocate it */ 3726 maxnz = procsnz[0]; 3727 for (i=1; i<size; i++) { 3728 maxnz = PetscMax(maxnz,procsnz[i]); 3729 } 3730 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3731 3732 /* read in my part of the matrix column indices */ 3733 nz = procsnz[0]; 3734 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3735 mycols = ibuf; 3736 if (size == 1) nz -= extra_rows; 3737 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3738 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3739 3740 /* read in every ones (except the last) and ship off */ 3741 for (i=1; i<size-1; i++) { 3742 nz = procsnz[i]; 3743 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3744 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3745 } 3746 /* read in the stuff for the last proc */ 3747 if (size != 1) { 3748 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3749 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3750 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3751 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3752 } 3753 ierr = PetscFree(cols);CHKERRQ(ierr); 3754 } else { 3755 /* determine buffer space needed for message */ 3756 nz = 0; 3757 for (i=0; i<m; i++) { 3758 nz += locrowlens[i]; 3759 } 3760 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3761 mycols = ibuf; 3762 /* receive message of column indices*/ 3763 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3764 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3765 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3766 } 3767 3768 /* loop over local rows, determining number of off diagonal entries */ 3769 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3770 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3771 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3772 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3773 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3774 rowcount = 0; nzcount = 0; 3775 for (i=0; i<mbs; i++) { 3776 dcount = 0; 3777 odcount = 0; 3778 for (j=0; j<bs; j++) { 3779 kmax = locrowlens[rowcount]; 3780 for (k=0; k<kmax; k++) { 3781 tmp = mycols[nzcount++]/bs; 3782 if (!mask[tmp]) { 3783 mask[tmp] = 1; 3784 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3785 else masked1[dcount++] = tmp; 3786 } 3787 } 3788 rowcount++; 3789 } 3790 3791 dlens[i] = dcount; 3792 odlens[i] = odcount; 3793 3794 /* zero out the mask elements we set */ 3795 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3796 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3797 } 3798 3799 3800 if (!sizesset) { 3801 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3802 } 3803 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3804 3805 if (!rank) { 3806 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3807 /* read in my part of the matrix numerical values */ 3808 nz = procsnz[0]; 3809 vals = buf; 3810 mycols = ibuf; 3811 if (size == 1) nz -= extra_rows; 3812 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3813 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3814 3815 /* insert into matrix */ 3816 jj = rstart*bs; 3817 for (i=0; i<m; i++) { 3818 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3819 mycols += locrowlens[i]; 3820 vals += locrowlens[i]; 3821 jj++; 3822 } 3823 /* read in other processors (except the last one) and ship out */ 3824 for (i=1; i<size-1; i++) { 3825 nz = procsnz[i]; 3826 vals = buf; 3827 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3828 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3829 } 3830 /* the last proc */ 3831 if (size != 1){ 3832 nz = procsnz[i] - extra_rows; 3833 vals = buf; 3834 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3835 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3836 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3837 } 3838 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3839 } else { 3840 /* receive numeric values */ 3841 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3842 3843 /* receive message of values*/ 3844 vals = buf; 3845 mycols = ibuf; 3846 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 3847 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3848 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3849 3850 /* insert into matrix */ 3851 jj = rstart*bs; 3852 for (i=0; i<m; i++) { 3853 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3854 mycols += locrowlens[i]; 3855 vals += locrowlens[i]; 3856 jj++; 3857 } 3858 } 3859 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3860 ierr = PetscFree(buf);CHKERRQ(ierr); 3861 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3862 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3863 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3864 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3865 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3866 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3867 3868 PetscFunctionReturn(0); 3869 } 3870 3871 #undef __FUNCT__ 3872 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3873 /*@ 3874 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3875 3876 Input Parameters: 3877 . mat - the matrix 3878 . fact - factor 3879 3880 Not Collective, each process can use a different factor 3881 3882 Level: advanced 3883 3884 Notes: 3885 This can also be set by the command line option: -mat_use_hash_table <fact> 3886 3887 .keywords: matrix, hashtable, factor, HT 3888 3889 .seealso: MatSetOption() 3890 @*/ 3891 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3892 { 3893 PetscErrorCode ierr; 3894 3895 PetscFunctionBegin; 3896 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3897 PetscFunctionReturn(0); 3898 } 3899 3900 EXTERN_C_BEGIN 3901 #undef __FUNCT__ 3902 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3903 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3904 { 3905 Mat_MPIBAIJ *baij; 3906 3907 PetscFunctionBegin; 3908 baij = (Mat_MPIBAIJ*)mat->data; 3909 baij->ht_fact = fact; 3910 PetscFunctionReturn(0); 3911 } 3912 EXTERN_C_END 3913 3914 #undef __FUNCT__ 3915 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3916 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3917 { 3918 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3919 PetscFunctionBegin; 3920 *Ad = a->A; 3921 *Ao = a->B; 3922 *colmap = a->garray; 3923 PetscFunctionReturn(0); 3924 } 3925 3926 /* 3927 Special version for direct calls from Fortran (to eliminate two function call overheads 3928 */ 3929 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3930 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3931 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3932 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3933 #endif 3934 3935 #undef __FUNCT__ 3936 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3937 /*@C 3938 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3939 3940 Collective on Mat 3941 3942 Input Parameters: 3943 + mat - the matrix 3944 . min - number of input rows 3945 . im - input rows 3946 . nin - number of input columns 3947 . in - input columns 3948 . v - numerical values input 3949 - addvin - INSERT_VALUES or ADD_VALUES 3950 3951 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3952 3953 Level: advanced 3954 3955 .seealso: MatSetValuesBlocked() 3956 @*/ 3957 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3958 { 3959 /* convert input arguments to C version */ 3960 Mat mat = *matin; 3961 PetscInt m = *min, n = *nin; 3962 InsertMode addv = *addvin; 3963 3964 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3965 const MatScalar *value; 3966 MatScalar *barray=baij->barray; 3967 PetscBool roworiented = baij->roworiented; 3968 PetscErrorCode ierr; 3969 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3970 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3971 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3972 3973 PetscFunctionBegin; 3974 /* tasks normally handled by MatSetValuesBlocked() */ 3975 if (mat->insertmode == NOT_SET_VALUES) { 3976 mat->insertmode = addv; 3977 } 3978 #if defined(PETSC_USE_DEBUG) 3979 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3980 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3981 #endif 3982 if (mat->assembled) { 3983 mat->was_assembled = PETSC_TRUE; 3984 mat->assembled = PETSC_FALSE; 3985 } 3986 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3987 3988 3989 if(!barray) { 3990 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3991 baij->barray = barray; 3992 } 3993 3994 if (roworiented) { 3995 stepval = (n-1)*bs; 3996 } else { 3997 stepval = (m-1)*bs; 3998 } 3999 for (i=0; i<m; i++) { 4000 if (im[i] < 0) continue; 4001 #if defined(PETSC_USE_DEBUG) 4002 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 4003 #endif 4004 if (im[i] >= rstart && im[i] < rend) { 4005 row = im[i] - rstart; 4006 for (j=0; j<n; j++) { 4007 /* If NumCol = 1 then a copy is not required */ 4008 if ((roworiented) && (n == 1)) { 4009 barray = (MatScalar*)v + i*bs2; 4010 } else if((!roworiented) && (m == 1)) { 4011 barray = (MatScalar*)v + j*bs2; 4012 } else { /* Here a copy is required */ 4013 if (roworiented) { 4014 value = v + i*(stepval+bs)*bs + j*bs; 4015 } else { 4016 value = v + j*(stepval+bs)*bs + i*bs; 4017 } 4018 for (ii=0; ii<bs; ii++,value+=stepval) { 4019 for (jj=0; jj<bs; jj++) { 4020 *barray++ = *value++; 4021 } 4022 } 4023 barray -=bs2; 4024 } 4025 4026 if (in[j] >= cstart && in[j] < cend){ 4027 col = in[j] - cstart; 4028 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4029 } 4030 else if (in[j] < 0) continue; 4031 #if defined(PETSC_USE_DEBUG) 4032 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 4033 #endif 4034 else { 4035 if (mat->was_assembled) { 4036 if (!baij->colmap) { 4037 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 4038 } 4039 4040 #if defined(PETSC_USE_DEBUG) 4041 #if defined (PETSC_USE_CTABLE) 4042 { PetscInt data; 4043 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 4044 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4045 } 4046 #else 4047 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4048 #endif 4049 #endif 4050 #if defined (PETSC_USE_CTABLE) 4051 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4052 col = (col - 1)/bs; 4053 #else 4054 col = (baij->colmap[in[j]] - 1)/bs; 4055 #endif 4056 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 4057 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 4058 col = in[j]; 4059 } 4060 } 4061 else col = in[j]; 4062 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4063 } 4064 } 4065 } else { 4066 if (!baij->donotstash) { 4067 if (roworiented) { 4068 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4069 } else { 4070 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4071 } 4072 } 4073 } 4074 } 4075 4076 /* task normally handled by MatSetValuesBlocked() */ 4077 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4078 PetscFunctionReturn(0); 4079 } 4080 4081 #undef __FUNCT__ 4082 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 4083 /*@ 4084 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 4085 CSR format the local rows. 4086 4087 Collective on MPI_Comm 4088 4089 Input Parameters: 4090 + comm - MPI communicator 4091 . bs - the block size, only a block size of 1 is supported 4092 . m - number of local rows (Cannot be PETSC_DECIDE) 4093 . n - This value should be the same as the local size used in creating the 4094 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4095 calculated if N is given) For square matrices n is almost always m. 4096 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4097 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4098 . i - row indices 4099 . j - column indices 4100 - a - matrix values 4101 4102 Output Parameter: 4103 . mat - the matrix 4104 4105 Level: intermediate 4106 4107 Notes: 4108 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4109 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4110 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4111 4112 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4113 4114 .keywords: matrix, aij, compressed row, sparse, parallel 4115 4116 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4117 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 4118 @*/ 4119 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4120 { 4121 PetscErrorCode ierr; 4122 4123 4124 PetscFunctionBegin; 4125 if (i[0]) { 4126 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4127 } 4128 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4129 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4130 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4131 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4132 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4133 PetscFunctionReturn(0); 4134 } 4135