1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 8 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 9 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 10 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 11 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 12 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 13 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 16 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 19 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 20 { 21 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 22 PetscErrorCode ierr; 23 PetscInt i,*idxb = 0; 24 PetscScalar *va,*vb; 25 Vec vtmp; 26 27 PetscFunctionBegin; 28 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 29 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 30 if (idx) { 31 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 32 } 33 34 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 35 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 36 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 37 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 38 39 for (i=0; i<A->rmap->n; i++){ 40 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 41 } 42 43 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 44 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 45 if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);} 46 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 47 PetscFunctionReturn(0); 48 } 49 50 EXTERN_C_BEGIN 51 #undef __FUNCT__ 52 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 53 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 54 { 55 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 56 PetscErrorCode ierr; 57 58 PetscFunctionBegin; 59 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 60 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 61 PetscFunctionReturn(0); 62 } 63 EXTERN_C_END 64 65 EXTERN_C_BEGIN 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 68 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 69 { 70 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 75 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 76 PetscFunctionReturn(0); 77 } 78 EXTERN_C_END 79 80 /* 81 Local utility routine that creates a mapping from the global column 82 number to the local number in the off-diagonal part of the local 83 storage of the matrix. This is done in a non scalable way since the 84 length of colmap equals the global matrix length. 85 */ 86 #undef __FUNCT__ 87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 89 { 90 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 91 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 92 PetscErrorCode ierr; 93 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 94 95 PetscFunctionBegin; 96 #if defined (PETSC_USE_CTABLE) 97 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 98 for (i=0; i<nbs; i++){ 99 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 100 } 101 #else 102 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 103 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 105 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 106 #endif 107 PetscFunctionReturn(0); 108 } 109 110 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 111 { \ 112 \ 113 brow = row/bs; \ 114 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 115 rmax = aimax[brow]; nrow = ailen[brow]; \ 116 bcol = col/bs; \ 117 ridx = row % bs; cidx = col % bs; \ 118 low = 0; high = nrow; \ 119 while (high-low > 3) { \ 120 t = (low+high)/2; \ 121 if (rp[t] > bcol) high = t; \ 122 else low = t; \ 123 } \ 124 for (_i=low; _i<high; _i++) { \ 125 if (rp[_i] > bcol) break; \ 126 if (rp[_i] == bcol) { \ 127 bap = ap + bs2*_i + bs*cidx + ridx; \ 128 if (addv == ADD_VALUES) *bap += value; \ 129 else *bap = value; \ 130 goto a_noinsert; \ 131 } \ 132 } \ 133 if (a->nonew == 1) goto a_noinsert; \ 134 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 135 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 136 N = nrow++ - 1; \ 137 /* shift up all the later entries in this row */ \ 138 for (ii=N; ii>=_i; ii--) { \ 139 rp[ii+1] = rp[ii]; \ 140 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 141 } \ 142 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 143 rp[_i] = bcol; \ 144 ap[bs2*_i + bs*cidx + ridx] = value; \ 145 a_noinsert:; \ 146 ailen[brow] = nrow; \ 147 } 148 149 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 150 { \ 151 brow = row/bs; \ 152 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 153 rmax = bimax[brow]; nrow = bilen[brow]; \ 154 bcol = col/bs; \ 155 ridx = row % bs; cidx = col % bs; \ 156 low = 0; high = nrow; \ 157 while (high-low > 3) { \ 158 t = (low+high)/2; \ 159 if (rp[t] > bcol) high = t; \ 160 else low = t; \ 161 } \ 162 for (_i=low; _i<high; _i++) { \ 163 if (rp[_i] > bcol) break; \ 164 if (rp[_i] == bcol) { \ 165 bap = ap + bs2*_i + bs*cidx + ridx; \ 166 if (addv == ADD_VALUES) *bap += value; \ 167 else *bap = value; \ 168 goto b_noinsert; \ 169 } \ 170 } \ 171 if (b->nonew == 1) goto b_noinsert; \ 172 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 173 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 174 CHKMEMQ;\ 175 N = nrow++ - 1; \ 176 /* shift up all the later entries in this row */ \ 177 for (ii=N; ii>=_i; ii--) { \ 178 rp[ii+1] = rp[ii]; \ 179 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 180 } \ 181 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 182 rp[_i] = bcol; \ 183 ap[bs2*_i + bs*cidx + ridx] = value; \ 184 b_noinsert:; \ 185 bilen[brow] = nrow; \ 186 } 187 188 #undef __FUNCT__ 189 #define __FUNCT__ "MatSetValues_MPIBAIJ" 190 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 191 { 192 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 193 MatScalar value; 194 PetscBool roworiented = baij->roworiented; 195 PetscErrorCode ierr; 196 PetscInt i,j,row,col; 197 PetscInt rstart_orig=mat->rmap->rstart; 198 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 199 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 200 201 /* Some Variables required in the macro */ 202 Mat A = baij->A; 203 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 204 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 205 MatScalar *aa=a->a; 206 207 Mat B = baij->B; 208 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 209 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 210 MatScalar *ba=b->a; 211 212 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 213 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 214 MatScalar *ap,*bap; 215 216 PetscFunctionBegin; 217 if (v) PetscValidScalarPointer(v,6); 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_DEBUG) 221 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 222 #endif 223 if (im[i] >= rstart_orig && im[i] < rend_orig) { 224 row = im[i] - rstart_orig; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart_orig && in[j] < cend_orig){ 227 col = in[j] - cstart_orig; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 230 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 231 } else if (in[j] < 0) continue; 232 #if defined(PETSC_USE_DEBUG) 233 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 234 #endif 235 else { 236 if (mat->was_assembled) { 237 if (!baij->colmap) { 238 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 239 } 240 #if defined (PETSC_USE_CTABLE) 241 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 242 col = col - 1; 243 #else 244 col = baij->colmap[in[j]/bs] - 1; 245 #endif 246 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 247 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 248 col = in[j]; 249 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 250 B = baij->B; 251 b = (Mat_SeqBAIJ*)(B)->data; 252 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 253 ba=b->a; 254 } else col += in[j]%bs; 255 } else col = in[j]; 256 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 257 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 258 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 259 } 260 } 261 } else { 262 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 263 if (!baij->donotstash) { 264 if (roworiented) { 265 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 266 } else { 267 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 268 } 269 } 270 } 271 } 272 PetscFunctionReturn(0); 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 277 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 278 { 279 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 280 const PetscScalar *value; 281 MatScalar *barray=baij->barray; 282 PetscBool roworiented = baij->roworiented; 283 PetscErrorCode ierr; 284 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 285 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 286 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 287 288 PetscFunctionBegin; 289 if(!barray) { 290 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 291 baij->barray = barray; 292 } 293 294 if (roworiented) { 295 stepval = (n-1)*bs; 296 } else { 297 stepval = (m-1)*bs; 298 } 299 for (i=0; i<m; i++) { 300 if (im[i] < 0) continue; 301 #if defined(PETSC_USE_DEBUG) 302 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 303 #endif 304 if (im[i] >= rstart && im[i] < rend) { 305 row = im[i] - rstart; 306 for (j=0; j<n; j++) { 307 /* If NumCol = 1 then a copy is not required */ 308 if ((roworiented) && (n == 1)) { 309 barray = (MatScalar*)v + i*bs2; 310 } else if((!roworiented) && (m == 1)) { 311 barray = (MatScalar*)v + j*bs2; 312 } else { /* Here a copy is required */ 313 if (roworiented) { 314 value = v + (i*(stepval+bs) + j)*bs; 315 } else { 316 value = v + (j*(stepval+bs) + i)*bs; 317 } 318 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 319 for (jj=0; jj<bs; jj++) { 320 barray[jj] = value[jj]; 321 } 322 barray += bs; 323 } 324 barray -= bs2; 325 } 326 327 if (in[j] >= cstart && in[j] < cend){ 328 col = in[j] - cstart; 329 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 330 } 331 else if (in[j] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 334 #endif 335 else { 336 if (mat->was_assembled) { 337 if (!baij->colmap) { 338 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 339 } 340 341 #if defined(PETSC_USE_DEBUG) 342 #if defined (PETSC_USE_CTABLE) 343 { PetscInt data; 344 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 345 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 346 } 347 #else 348 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 349 #endif 350 #endif 351 #if defined (PETSC_USE_CTABLE) 352 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 353 col = (col - 1)/bs; 354 #else 355 col = (baij->colmap[in[j]] - 1)/bs; 356 #endif 357 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 358 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 359 col = in[j]; 360 } 361 } 362 else col = in[j]; 363 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 364 } 365 } 366 } else { 367 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 368 if (!baij->donotstash) { 369 if (roworiented) { 370 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 371 } else { 372 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 373 } 374 } 375 } 376 } 377 PetscFunctionReturn(0); 378 } 379 380 #define HASH_KEY 0.6180339887 381 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 382 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 383 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 384 #undef __FUNCT__ 385 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 386 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 387 { 388 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 389 PetscBool roworiented = baij->roworiented; 390 PetscErrorCode ierr; 391 PetscInt i,j,row,col; 392 PetscInt rstart_orig=mat->rmap->rstart; 393 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 394 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 395 PetscReal tmp; 396 MatScalar **HD = baij->hd,value; 397 #if defined(PETSC_USE_DEBUG) 398 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 399 #endif 400 401 PetscFunctionBegin; 402 if (v) PetscValidScalarPointer(v,6); 403 for (i=0; i<m; i++) { 404 #if defined(PETSC_USE_DEBUG) 405 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 406 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 407 #endif 408 row = im[i]; 409 if (row >= rstart_orig && row < rend_orig) { 410 for (j=0; j<n; j++) { 411 col = in[j]; 412 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 413 /* Look up PetscInto the Hash Table */ 414 key = (row/bs)*Nbs+(col/bs)+1; 415 h1 = HASH(size,key,tmp); 416 417 418 idx = h1; 419 #if defined(PETSC_USE_DEBUG) 420 insert_ct++; 421 total_ct++; 422 if (HT[idx] != key) { 423 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 424 if (idx == size) { 425 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 426 if (idx == h1) { 427 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 428 } 429 } 430 } 431 #else 432 if (HT[idx] != key) { 433 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 434 if (idx == size) { 435 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 436 if (idx == h1) { 437 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 438 } 439 } 440 } 441 #endif 442 /* A HASH table entry is found, so insert the values at the correct address */ 443 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 444 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 445 } 446 } else { 447 if (!baij->donotstash) { 448 if (roworiented) { 449 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 450 } else { 451 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 452 } 453 } 454 } 455 } 456 #if defined(PETSC_USE_DEBUG) 457 baij->ht_total_ct = total_ct; 458 baij->ht_insert_ct = insert_ct; 459 #endif 460 PetscFunctionReturn(0); 461 } 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 465 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 466 { 467 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 468 PetscBool roworiented = baij->roworiented; 469 PetscErrorCode ierr; 470 PetscInt i,j,ii,jj,row,col; 471 PetscInt rstart=baij->rstartbs; 472 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 473 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 474 PetscReal tmp; 475 MatScalar **HD = baij->hd,*baij_a; 476 const PetscScalar *v_t,*value; 477 #if defined(PETSC_USE_DEBUG) 478 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 479 #endif 480 481 PetscFunctionBegin; 482 483 if (roworiented) { 484 stepval = (n-1)*bs; 485 } else { 486 stepval = (m-1)*bs; 487 } 488 for (i=0; i<m; i++) { 489 #if defined(PETSC_USE_DEBUG) 490 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 491 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 492 #endif 493 row = im[i]; 494 v_t = v + i*nbs2; 495 if (row >= rstart && row < rend) { 496 for (j=0; j<n; j++) { 497 col = in[j]; 498 499 /* Look up into the Hash Table */ 500 key = row*Nbs+col+1; 501 h1 = HASH(size,key,tmp); 502 503 idx = h1; 504 #if defined(PETSC_USE_DEBUG) 505 total_ct++; 506 insert_ct++; 507 if (HT[idx] != key) { 508 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 509 if (idx == size) { 510 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 511 if (idx == h1) { 512 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 513 } 514 } 515 } 516 #else 517 if (HT[idx] != key) { 518 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 519 if (idx == size) { 520 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 521 if (idx == h1) { 522 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 523 } 524 } 525 } 526 #endif 527 baij_a = HD[idx]; 528 if (roworiented) { 529 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 530 /* value = v + (i*(stepval+bs)+j)*bs; */ 531 value = v_t; 532 v_t += bs; 533 if (addv == ADD_VALUES) { 534 for (ii=0; ii<bs; ii++,value+=stepval) { 535 for (jj=ii; jj<bs2; jj+=bs) { 536 baij_a[jj] += *value++; 537 } 538 } 539 } else { 540 for (ii=0; ii<bs; ii++,value+=stepval) { 541 for (jj=ii; jj<bs2; jj+=bs) { 542 baij_a[jj] = *value++; 543 } 544 } 545 } 546 } else { 547 value = v + j*(stepval+bs)*bs + i*bs; 548 if (addv == ADD_VALUES) { 549 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 550 for (jj=0; jj<bs; jj++) { 551 baij_a[jj] += *value++; 552 } 553 } 554 } else { 555 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 556 for (jj=0; jj<bs; jj++) { 557 baij_a[jj] = *value++; 558 } 559 } 560 } 561 } 562 } 563 } else { 564 if (!baij->donotstash) { 565 if (roworiented) { 566 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 567 } else { 568 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 569 } 570 } 571 } 572 } 573 #if defined(PETSC_USE_DEBUG) 574 baij->ht_total_ct = total_ct; 575 baij->ht_insert_ct = insert_ct; 576 #endif 577 PetscFunctionReturn(0); 578 } 579 580 #undef __FUNCT__ 581 #define __FUNCT__ "MatGetValues_MPIBAIJ" 582 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 583 { 584 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 585 PetscErrorCode ierr; 586 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 587 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 588 589 PetscFunctionBegin; 590 for (i=0; i<m; i++) { 591 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 592 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 593 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 594 row = idxm[i] - bsrstart; 595 for (j=0; j<n; j++) { 596 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 597 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 598 if (idxn[j] >= bscstart && idxn[j] < bscend){ 599 col = idxn[j] - bscstart; 600 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 601 } else { 602 if (!baij->colmap) { 603 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 604 } 605 #if defined (PETSC_USE_CTABLE) 606 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 607 data --; 608 #else 609 data = baij->colmap[idxn[j]/bs]-1; 610 #endif 611 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 612 else { 613 col = data + idxn[j]%bs; 614 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 615 } 616 } 617 } 618 } else { 619 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 620 } 621 } 622 PetscFunctionReturn(0); 623 } 624 625 #undef __FUNCT__ 626 #define __FUNCT__ "MatNorm_MPIBAIJ" 627 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 628 { 629 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 630 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 631 PetscErrorCode ierr; 632 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 633 PetscReal sum = 0.0; 634 MatScalar *v; 635 636 PetscFunctionBegin; 637 if (baij->size == 1) { 638 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 639 } else { 640 if (type == NORM_FROBENIUS) { 641 v = amat->a; 642 nz = amat->nz*bs2; 643 for (i=0; i<nz; i++) { 644 #if defined(PETSC_USE_COMPLEX) 645 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 646 #else 647 sum += (*v)*(*v); v++; 648 #endif 649 } 650 v = bmat->a; 651 nz = bmat->nz*bs2; 652 for (i=0; i<nz; i++) { 653 #if defined(PETSC_USE_COMPLEX) 654 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 655 #else 656 sum += (*v)*(*v); v++; 657 #endif 658 } 659 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 660 *nrm = sqrt(*nrm); 661 } else if (type == NORM_1) { /* max column sum */ 662 PetscReal *tmp,*tmp2; 663 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 664 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 665 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 666 v = amat->a; jj = amat->j; 667 for (i=0; i<amat->nz; i++) { 668 for (j=0; j<bs; j++){ 669 col = bs*(cstart + *jj) + j; /* column index */ 670 for (row=0; row<bs; row++){ 671 tmp[col] += PetscAbsScalar(*v); v++; 672 } 673 } 674 jj++; 675 } 676 v = bmat->a; jj = bmat->j; 677 for (i=0; i<bmat->nz; i++) { 678 for (j=0; j<bs; j++){ 679 col = bs*garray[*jj] + j; 680 for (row=0; row<bs; row++){ 681 tmp[col] += PetscAbsScalar(*v); v++; 682 } 683 } 684 jj++; 685 } 686 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 687 *nrm = 0.0; 688 for (j=0; j<mat->cmap->N; j++) { 689 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 690 } 691 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 692 } else if (type == NORM_INFINITY) { /* max row sum */ 693 PetscReal *sums; 694 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 695 sum = 0.0; 696 for (j=0; j<amat->mbs; j++) { 697 for (row=0; row<bs; row++) sums[row] = 0.0; 698 v = amat->a + bs2*amat->i[j]; 699 nz = amat->i[j+1]-amat->i[j]; 700 for (i=0; i<nz; i++) { 701 for (col=0; col<bs; col++){ 702 for (row=0; row<bs; row++){ 703 sums[row] += PetscAbsScalar(*v); v++; 704 } 705 } 706 } 707 v = bmat->a + bs2*bmat->i[j]; 708 nz = bmat->i[j+1]-bmat->i[j]; 709 for (i=0; i<nz; i++) { 710 for (col=0; col<bs; col++){ 711 for (row=0; row<bs; row++){ 712 sums[row] += PetscAbsScalar(*v); v++; 713 } 714 } 715 } 716 for (row=0; row<bs; row++){ 717 if (sums[row] > sum) sum = sums[row]; 718 } 719 } 720 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 721 ierr = PetscFree(sums);CHKERRQ(ierr); 722 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 723 } 724 PetscFunctionReturn(0); 725 } 726 727 /* 728 Creates the hash table, and sets the table 729 This table is created only once. 730 If new entried need to be added to the matrix 731 then the hash table has to be destroyed and 732 recreated. 733 */ 734 #undef __FUNCT__ 735 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 736 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 737 { 738 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 739 Mat A = baij->A,B=baij->B; 740 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 741 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 742 PetscErrorCode ierr; 743 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 744 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 745 PetscInt *HT,key; 746 MatScalar **HD; 747 PetscReal tmp; 748 #if defined(PETSC_USE_INFO) 749 PetscInt ct=0,max=0; 750 #endif 751 752 PetscFunctionBegin; 753 if (baij->ht) PetscFunctionReturn(0); 754 755 baij->ht_size = (PetscInt)(factor*nz); 756 ht_size = baij->ht_size; 757 758 /* Allocate Memory for Hash Table */ 759 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 760 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 761 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 762 HD = baij->hd; 763 HT = baij->ht; 764 765 /* Loop Over A */ 766 for (i=0; i<a->mbs; i++) { 767 for (j=ai[i]; j<ai[i+1]; j++) { 768 row = i+rstart; 769 col = aj[j]+cstart; 770 771 key = row*Nbs + col + 1; 772 h1 = HASH(ht_size,key,tmp); 773 for (k=0; k<ht_size; k++){ 774 if (!HT[(h1+k)%ht_size]) { 775 HT[(h1+k)%ht_size] = key; 776 HD[(h1+k)%ht_size] = a->a + j*bs2; 777 break; 778 #if defined(PETSC_USE_INFO) 779 } else { 780 ct++; 781 #endif 782 } 783 } 784 #if defined(PETSC_USE_INFO) 785 if (k> max) max = k; 786 #endif 787 } 788 } 789 /* Loop Over B */ 790 for (i=0; i<b->mbs; i++) { 791 for (j=bi[i]; j<bi[i+1]; j++) { 792 row = i+rstart; 793 col = garray[bj[j]]; 794 key = row*Nbs + col + 1; 795 h1 = HASH(ht_size,key,tmp); 796 for (k=0; k<ht_size; k++){ 797 if (!HT[(h1+k)%ht_size]) { 798 HT[(h1+k)%ht_size] = key; 799 HD[(h1+k)%ht_size] = b->a + j*bs2; 800 break; 801 #if defined(PETSC_USE_INFO) 802 } else { 803 ct++; 804 #endif 805 } 806 } 807 #if defined(PETSC_USE_INFO) 808 if (k> max) max = k; 809 #endif 810 } 811 } 812 813 /* Print Summary */ 814 #if defined(PETSC_USE_INFO) 815 for (i=0,j=0; i<ht_size; i++) { 816 if (HT[i]) {j++;} 817 } 818 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 819 #endif 820 PetscFunctionReturn(0); 821 } 822 823 #undef __FUNCT__ 824 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 825 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 826 { 827 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 828 PetscErrorCode ierr; 829 PetscInt nstash,reallocs; 830 InsertMode addv; 831 832 PetscFunctionBegin; 833 if (baij->donotstash || mat->nooffprocentries) { 834 PetscFunctionReturn(0); 835 } 836 837 /* make sure all processors are either in INSERTMODE or ADDMODE */ 838 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 839 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 840 mat->insertmode = addv; /* in case this processor had no cache */ 841 842 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 843 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 844 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 845 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 846 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 847 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 848 PetscFunctionReturn(0); 849 } 850 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 853 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 854 { 855 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 856 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 857 PetscErrorCode ierr; 858 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 859 PetscInt *row,*col; 860 PetscBool r1,r2,r3,other_disassembled; 861 MatScalar *val; 862 InsertMode addv = mat->insertmode; 863 PetscMPIInt n; 864 865 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 866 PetscFunctionBegin; 867 if (!baij->donotstash && !mat->nooffprocentries) { 868 while (1) { 869 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 870 if (!flg) break; 871 872 for (i=0; i<n;) { 873 /* Now identify the consecutive vals belonging to the same row */ 874 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 875 if (j < n) ncols = j-i; 876 else ncols = n-i; 877 /* Now assemble all these values with a single function call */ 878 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 879 i = j; 880 } 881 } 882 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 883 /* Now process the block-stash. Since the values are stashed column-oriented, 884 set the roworiented flag to column oriented, and after MatSetValues() 885 restore the original flags */ 886 r1 = baij->roworiented; 887 r2 = a->roworiented; 888 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 889 baij->roworiented = PETSC_FALSE; 890 a->roworiented = PETSC_FALSE; 891 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 892 while (1) { 893 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 894 if (!flg) break; 895 896 for (i=0; i<n;) { 897 /* Now identify the consecutive vals belonging to the same row */ 898 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 899 if (j < n) ncols = j-i; 900 else ncols = n-i; 901 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 902 i = j; 903 } 904 } 905 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 906 baij->roworiented = r1; 907 a->roworiented = r2; 908 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 909 } 910 911 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 912 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 913 914 /* determine if any processor has disassembled, if so we must 915 also disassemble ourselfs, in order that we may reassemble. */ 916 /* 917 if nonzero structure of submatrix B cannot change then we know that 918 no processor disassembled thus we can skip this stuff 919 */ 920 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 921 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 922 if (mat->was_assembled && !other_disassembled) { 923 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 924 } 925 } 926 927 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 928 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 929 } 930 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 931 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 932 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 933 934 #if defined(PETSC_USE_INFO) 935 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 936 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 937 baij->ht_total_ct = 0; 938 baij->ht_insert_ct = 0; 939 } 940 #endif 941 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 942 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 943 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 944 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 945 } 946 947 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 948 baij->rowvalues = 0; 949 PetscFunctionReturn(0); 950 } 951 952 #undef __FUNCT__ 953 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 954 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 955 { 956 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 957 PetscErrorCode ierr; 958 PetscMPIInt size = baij->size,rank = baij->rank; 959 PetscInt bs = mat->rmap->bs; 960 PetscBool iascii,isdraw; 961 PetscViewer sviewer; 962 PetscViewerFormat format; 963 964 PetscFunctionBegin; 965 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 966 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 967 if (iascii) { 968 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 969 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 970 MatInfo info; 971 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 972 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 973 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 974 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 975 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 976 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 977 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 978 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 979 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 980 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 981 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 982 PetscFunctionReturn(0); 983 } else if (format == PETSC_VIEWER_ASCII_INFO) { 984 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 985 PetscFunctionReturn(0); 986 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 987 PetscFunctionReturn(0); 988 } 989 } 990 991 if (isdraw) { 992 PetscDraw draw; 993 PetscBool isnull; 994 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 995 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 996 } 997 998 if (size == 1) { 999 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1000 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1001 } else { 1002 /* assemble the entire matrix onto first processor. */ 1003 Mat A; 1004 Mat_SeqBAIJ *Aloc; 1005 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1006 MatScalar *a; 1007 1008 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1009 /* Perhaps this should be the type of mat? */ 1010 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1011 if (!rank) { 1012 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1013 } else { 1014 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1015 } 1016 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1017 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1018 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1019 1020 /* copy over the A part */ 1021 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1022 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1023 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1024 1025 for (i=0; i<mbs; i++) { 1026 rvals[0] = bs*(baij->rstartbs + i); 1027 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1028 for (j=ai[i]; j<ai[i+1]; j++) { 1029 col = (baij->cstartbs+aj[j])*bs; 1030 for (k=0; k<bs; k++) { 1031 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1032 col++; a += bs; 1033 } 1034 } 1035 } 1036 /* copy over the B part */ 1037 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1038 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1039 for (i=0; i<mbs; i++) { 1040 rvals[0] = bs*(baij->rstartbs + i); 1041 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1042 for (j=ai[i]; j<ai[i+1]; j++) { 1043 col = baij->garray[aj[j]]*bs; 1044 for (k=0; k<bs; k++) { 1045 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1046 col++; a += bs; 1047 } 1048 } 1049 } 1050 ierr = PetscFree(rvals);CHKERRQ(ierr); 1051 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1052 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1053 /* 1054 Everyone has to call to draw the matrix since the graphics waits are 1055 synchronized across all processors that share the PetscDraw object 1056 */ 1057 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1058 if (!rank) { 1059 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1060 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1061 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1062 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1063 } 1064 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1065 ierr = MatDestroy(A);CHKERRQ(ierr); 1066 } 1067 PetscFunctionReturn(0); 1068 } 1069 1070 #undef __FUNCT__ 1071 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1072 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1073 { 1074 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1075 Mat_SeqBAIJ* A = (Mat_SeqBAIJ*)a->A->data; 1076 Mat_SeqBAIJ* B = (Mat_SeqBAIJ*)a->B->data; 1077 PetscErrorCode ierr; 1078 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,count,j,k,bs2=a->bs2,header[4],nz,rlen; 1079 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1080 int fd; 1081 PetscScalar *column_values; 1082 FILE *file; 1083 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1084 PetscInt message_count,flowcontrolcount; 1085 1086 PetscFunctionBegin; 1087 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1088 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1089 nz = bs2*(A->nz + B->nz); 1090 rlen = mat->rmap->n; 1091 if (!rank) { 1092 header[0] = MAT_FILE_CLASSID; 1093 header[1] = mat->rmap->N; 1094 header[2] = mat->cmap->N; 1095 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1096 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1097 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1098 /* get largest number of rows any processor has */ 1099 range = mat->rmap->range; 1100 for (i=1; i<size; i++) { 1101 rlen = PetscMax(rlen,range[i+1] - range[i]); 1102 } 1103 } else { 1104 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1105 } 1106 1107 ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr); 1108 /* compute lengths of each row */ 1109 count = 0; 1110 for (i=0; i<a->mbs; i++) { 1111 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1112 } 1113 /* store the row lengths to the file */ 1114 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1115 if (!rank) { 1116 MPI_Status status; 1117 ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr); 1118 rlen = (range[1] - range[0])/bs; 1119 for (i=0; i<rlen; i++) { 1120 for (j=0; j<bs; j++) { 1121 row_lens[i*bs+j] = bs*crow_lens[i]; 1122 } 1123 } 1124 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1125 for (i=1; i<size; i++) { 1126 rlen = (range[i+1] - range[i])/bs; 1127 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1128 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1129 for (k=0; k<rlen; k++) { 1130 for (j=0; j<bs; j++) { 1131 row_lens[k*bs+j] = bs*crow_lens[k]; 1132 } 1133 } 1134 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1135 } 1136 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1137 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1138 } else { 1139 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1140 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1141 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1142 } 1143 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1144 1145 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1146 information needed to make it for each row from a block row. This does require more communication but still not more than 1147 the communication needed for the nonzero values */ 1148 nzmax = nz; /* space a largest processor needs */ 1149 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1150 ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1151 cnt = 0; 1152 for (i=0; i<a->mbs; i++) { 1153 pcnt = cnt; 1154 for (j=B->i[i]; j<B->i[i+1]; j++) { 1155 if ( (col = garray[B->j[j]]) > cstart) break; 1156 for (l=0; l<bs; l++) { 1157 column_indices[cnt++] = bs*col+l; 1158 } 1159 } 1160 for (k=A->i[i]; k<A->i[i+1]; k++) { 1161 for (l=0; l<bs; l++) { 1162 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1163 } 1164 } 1165 for (; j<B->i[i+1]; j++) { 1166 for (l=0; l<bs; l++) { 1167 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1168 } 1169 } 1170 len = cnt - pcnt; 1171 for (k=1; k<bs; k++) { 1172 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1173 cnt += len; 1174 } 1175 } 1176 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1177 1178 /* store the columns to the file */ 1179 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1180 if (!rank) { 1181 MPI_Status status; 1182 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1183 for (i=1; i<size; i++) { 1184 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1185 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1186 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1187 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1188 } 1189 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1190 } else { 1191 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1192 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1193 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1194 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1195 } 1196 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1197 1198 /* load up the numerical values */ 1199 ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1200 cnt = 0; 1201 for (i=0; i<a->mbs; i++) { 1202 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1203 for (j=B->i[i]; j<B->i[i+1]; j++) { 1204 if ( garray[B->j[j]] > cstart) break; 1205 for (l=0; l<bs; l++) { 1206 for (ll=0; ll<bs; ll++) { 1207 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1208 } 1209 } 1210 cnt += bs; 1211 } 1212 for (k=A->i[i]; k<A->i[i+1]; k++) { 1213 for (l=0; l<bs; l++) { 1214 for (ll=0; ll<bs; ll++) { 1215 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1216 } 1217 } 1218 cnt += bs; 1219 } 1220 for (; j<B->i[i+1]; j++) { 1221 for (l=0; l<bs; l++) { 1222 for (ll=0; ll<bs; ll++) { 1223 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1224 } 1225 } 1226 cnt += bs; 1227 } 1228 cnt += (bs-1)*rlen; 1229 } 1230 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1231 1232 /* store the column values to the file */ 1233 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1234 if (!rank) { 1235 MPI_Status status; 1236 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1237 for (i=1; i<size; i++) { 1238 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1239 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1240 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1241 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1242 } 1243 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1244 } else { 1245 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1246 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1247 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1248 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1249 } 1250 ierr = PetscFree(column_values);CHKERRQ(ierr); 1251 1252 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1253 if (file) { 1254 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1255 } 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "MatView_MPIBAIJ" 1261 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1262 { 1263 PetscErrorCode ierr; 1264 PetscBool iascii,isdraw,issocket,isbinary; 1265 1266 PetscFunctionBegin; 1267 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1268 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1269 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1270 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1271 if (iascii || isdraw || issocket) { 1272 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1273 } else if (isbinary) { 1274 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1275 } else { 1276 SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1277 } 1278 PetscFunctionReturn(0); 1279 } 1280 1281 #undef __FUNCT__ 1282 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1283 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1284 { 1285 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1286 PetscErrorCode ierr; 1287 1288 PetscFunctionBegin; 1289 #if defined(PETSC_USE_LOG) 1290 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1291 #endif 1292 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1293 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1294 if (baij->A){ierr = MatDestroy(baij->A);CHKERRQ(ierr);} 1295 if (baij->B){ierr = MatDestroy(baij->B);CHKERRQ(ierr);} 1296 #if defined (PETSC_USE_CTABLE) 1297 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 1298 #else 1299 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1300 #endif 1301 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1302 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1303 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1304 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1305 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1306 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1307 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1308 ierr = PetscFree(baij);CHKERRQ(ierr); 1309 1310 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1311 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1312 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1313 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1314 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1315 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1316 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1317 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1318 PetscFunctionReturn(0); 1319 } 1320 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "MatMult_MPIBAIJ" 1323 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1324 { 1325 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1326 PetscErrorCode ierr; 1327 PetscInt nt; 1328 1329 PetscFunctionBegin; 1330 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1331 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1332 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1333 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1334 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1335 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1336 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1337 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1338 PetscFunctionReturn(0); 1339 } 1340 1341 #undef __FUNCT__ 1342 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1343 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1344 { 1345 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1346 PetscErrorCode ierr; 1347 1348 PetscFunctionBegin; 1349 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1350 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1351 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1352 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1353 PetscFunctionReturn(0); 1354 } 1355 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1358 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1359 { 1360 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1361 PetscErrorCode ierr; 1362 PetscBool merged; 1363 1364 PetscFunctionBegin; 1365 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1366 /* do nondiagonal part */ 1367 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1368 if (!merged) { 1369 /* send it on its way */ 1370 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1371 /* do local part */ 1372 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1373 /* receive remote parts: note this assumes the values are not actually */ 1374 /* inserted in yy until the next line */ 1375 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1376 } else { 1377 /* do local part */ 1378 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1379 /* send it on its way */ 1380 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1381 /* values actually were received in the Begin() but we need to call this nop */ 1382 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1383 } 1384 PetscFunctionReturn(0); 1385 } 1386 1387 #undef __FUNCT__ 1388 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1389 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1390 { 1391 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1392 PetscErrorCode ierr; 1393 1394 PetscFunctionBegin; 1395 /* do nondiagonal part */ 1396 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1397 /* send it on its way */ 1398 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1399 /* do local part */ 1400 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1401 /* receive remote parts: note this assumes the values are not actually */ 1402 /* inserted in yy until the next line, which is true for my implementation*/ 1403 /* but is not perhaps always true. */ 1404 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1405 PetscFunctionReturn(0); 1406 } 1407 1408 /* 1409 This only works correctly for square matrices where the subblock A->A is the 1410 diagonal block 1411 */ 1412 #undef __FUNCT__ 1413 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1414 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1415 { 1416 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1417 PetscErrorCode ierr; 1418 1419 PetscFunctionBegin; 1420 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1421 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1422 PetscFunctionReturn(0); 1423 } 1424 1425 #undef __FUNCT__ 1426 #define __FUNCT__ "MatScale_MPIBAIJ" 1427 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1428 { 1429 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1430 PetscErrorCode ierr; 1431 1432 PetscFunctionBegin; 1433 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1434 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1435 PetscFunctionReturn(0); 1436 } 1437 1438 #undef __FUNCT__ 1439 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1440 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1441 { 1442 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1443 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1444 PetscErrorCode ierr; 1445 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1446 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1447 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1448 1449 PetscFunctionBegin; 1450 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1451 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1452 mat->getrowactive = PETSC_TRUE; 1453 1454 if (!mat->rowvalues && (idx || v)) { 1455 /* 1456 allocate enough space to hold information from the longest row. 1457 */ 1458 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1459 PetscInt max = 1,mbs = mat->mbs,tmp; 1460 for (i=0; i<mbs; i++) { 1461 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1462 if (max < tmp) { max = tmp; } 1463 } 1464 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1465 } 1466 lrow = row - brstart; 1467 1468 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1469 if (!v) {pvA = 0; pvB = 0;} 1470 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1471 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1472 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1473 nztot = nzA + nzB; 1474 1475 cmap = mat->garray; 1476 if (v || idx) { 1477 if (nztot) { 1478 /* Sort by increasing column numbers, assuming A and B already sorted */ 1479 PetscInt imark = -1; 1480 if (v) { 1481 *v = v_p = mat->rowvalues; 1482 for (i=0; i<nzB; i++) { 1483 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1484 else break; 1485 } 1486 imark = i; 1487 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1488 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1489 } 1490 if (idx) { 1491 *idx = idx_p = mat->rowindices; 1492 if (imark > -1) { 1493 for (i=0; i<imark; i++) { 1494 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1495 } 1496 } else { 1497 for (i=0; i<nzB; i++) { 1498 if (cmap[cworkB[i]/bs] < cstart) 1499 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1500 else break; 1501 } 1502 imark = i; 1503 } 1504 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1505 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1506 } 1507 } else { 1508 if (idx) *idx = 0; 1509 if (v) *v = 0; 1510 } 1511 } 1512 *nz = nztot; 1513 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1514 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1515 PetscFunctionReturn(0); 1516 } 1517 1518 #undef __FUNCT__ 1519 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1520 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1521 { 1522 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1523 1524 PetscFunctionBegin; 1525 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1526 baij->getrowactive = PETSC_FALSE; 1527 PetscFunctionReturn(0); 1528 } 1529 1530 #undef __FUNCT__ 1531 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1532 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1533 { 1534 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1535 PetscErrorCode ierr; 1536 1537 PetscFunctionBegin; 1538 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1539 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1540 PetscFunctionReturn(0); 1541 } 1542 1543 #undef __FUNCT__ 1544 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1545 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1546 { 1547 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1548 Mat A = a->A,B = a->B; 1549 PetscErrorCode ierr; 1550 PetscReal isend[5],irecv[5]; 1551 1552 PetscFunctionBegin; 1553 info->block_size = (PetscReal)matin->rmap->bs; 1554 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1555 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1556 isend[3] = info->memory; isend[4] = info->mallocs; 1557 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1558 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1559 isend[3] += info->memory; isend[4] += info->mallocs; 1560 if (flag == MAT_LOCAL) { 1561 info->nz_used = isend[0]; 1562 info->nz_allocated = isend[1]; 1563 info->nz_unneeded = isend[2]; 1564 info->memory = isend[3]; 1565 info->mallocs = isend[4]; 1566 } else if (flag == MAT_GLOBAL_MAX) { 1567 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1568 info->nz_used = irecv[0]; 1569 info->nz_allocated = irecv[1]; 1570 info->nz_unneeded = irecv[2]; 1571 info->memory = irecv[3]; 1572 info->mallocs = irecv[4]; 1573 } else if (flag == MAT_GLOBAL_SUM) { 1574 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1575 info->nz_used = irecv[0]; 1576 info->nz_allocated = irecv[1]; 1577 info->nz_unneeded = irecv[2]; 1578 info->memory = irecv[3]; 1579 info->mallocs = irecv[4]; 1580 } else { 1581 SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1582 } 1583 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1584 info->fill_ratio_needed = 0; 1585 info->factor_mallocs = 0; 1586 PetscFunctionReturn(0); 1587 } 1588 1589 #undef __FUNCT__ 1590 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1591 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1592 { 1593 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1594 PetscErrorCode ierr; 1595 1596 PetscFunctionBegin; 1597 switch (op) { 1598 case MAT_NEW_NONZERO_LOCATIONS: 1599 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1600 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1601 case MAT_KEEP_NONZERO_PATTERN: 1602 case MAT_NEW_NONZERO_LOCATION_ERR: 1603 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1604 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1605 break; 1606 case MAT_ROW_ORIENTED: 1607 a->roworiented = flg; 1608 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1609 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1610 break; 1611 case MAT_NEW_DIAGONALS: 1612 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1613 break; 1614 case MAT_IGNORE_OFF_PROC_ENTRIES: 1615 a->donotstash = flg; 1616 break; 1617 case MAT_USE_HASH_TABLE: 1618 a->ht_flag = flg; 1619 break; 1620 case MAT_SYMMETRIC: 1621 case MAT_STRUCTURALLY_SYMMETRIC: 1622 case MAT_HERMITIAN: 1623 case MAT_SYMMETRY_ETERNAL: 1624 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1625 break; 1626 default: 1627 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op); 1628 } 1629 PetscFunctionReturn(0); 1630 } 1631 1632 #undef __FUNCT__ 1633 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1634 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1635 { 1636 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1637 Mat_SeqBAIJ *Aloc; 1638 Mat B; 1639 PetscErrorCode ierr; 1640 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1641 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1642 MatScalar *a; 1643 1644 PetscFunctionBegin; 1645 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1646 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1647 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1648 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1649 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1650 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1651 } else { 1652 B = *matout; 1653 } 1654 1655 /* copy over the A part */ 1656 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1657 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1658 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1659 1660 for (i=0; i<mbs; i++) { 1661 rvals[0] = bs*(baij->rstartbs + i); 1662 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1663 for (j=ai[i]; j<ai[i+1]; j++) { 1664 col = (baij->cstartbs+aj[j])*bs; 1665 for (k=0; k<bs; k++) { 1666 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1667 col++; a += bs; 1668 } 1669 } 1670 } 1671 /* copy over the B part */ 1672 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1673 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1674 for (i=0; i<mbs; i++) { 1675 rvals[0] = bs*(baij->rstartbs + i); 1676 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1677 for (j=ai[i]; j<ai[i+1]; j++) { 1678 col = baij->garray[aj[j]]*bs; 1679 for (k=0; k<bs; k++) { 1680 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1681 col++; a += bs; 1682 } 1683 } 1684 } 1685 ierr = PetscFree(rvals);CHKERRQ(ierr); 1686 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1687 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1688 1689 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1690 *matout = B; 1691 } else { 1692 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1693 } 1694 PetscFunctionReturn(0); 1695 } 1696 1697 #undef __FUNCT__ 1698 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1699 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1700 { 1701 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1702 Mat a = baij->A,b = baij->B; 1703 PetscErrorCode ierr; 1704 PetscInt s1,s2,s3; 1705 1706 PetscFunctionBegin; 1707 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1708 if (rr) { 1709 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1710 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1711 /* Overlap communication with computation. */ 1712 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1713 } 1714 if (ll) { 1715 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1716 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1717 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1718 } 1719 /* scale the diagonal block */ 1720 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1721 1722 if (rr) { 1723 /* Do a scatter end and then right scale the off-diagonal block */ 1724 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1725 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1726 } 1727 1728 PetscFunctionReturn(0); 1729 } 1730 1731 #undef __FUNCT__ 1732 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1733 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1734 { 1735 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1736 PetscErrorCode ierr; 1737 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1738 PetscInt i,*owners = A->rmap->range; 1739 PetscInt *nprocs,j,idx,nsends,row; 1740 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1741 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1742 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1743 MPI_Comm comm = ((PetscObject)A)->comm; 1744 MPI_Request *send_waits,*recv_waits; 1745 MPI_Status recv_status,*send_status; 1746 #if defined(PETSC_DEBUG) 1747 PetscBool found = PETSC_FALSE; 1748 #endif 1749 1750 PetscFunctionBegin; 1751 /* first count number of contributors to each processor */ 1752 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1753 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1754 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1755 j = 0; 1756 for (i=0; i<N; i++) { 1757 if (lastidx > (idx = rows[i])) j = 0; 1758 lastidx = idx; 1759 for (; j<size; j++) { 1760 if (idx >= owners[j] && idx < owners[j+1]) { 1761 nprocs[2*j]++; 1762 nprocs[2*j+1] = 1; 1763 owner[i] = j; 1764 #if defined(PETSC_DEBUG) 1765 found = PETSC_TRUE; 1766 #endif 1767 break; 1768 } 1769 } 1770 #if defined(PETSC_DEBUG) 1771 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1772 found = PETSC_FALSE; 1773 #endif 1774 } 1775 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1776 1777 if (A->nooffproczerorows) { 1778 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 1779 nrecvs = nsends; 1780 nmax = N; 1781 } else { 1782 /* inform other processors of number of messages and max length*/ 1783 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1784 } 1785 1786 /* post receives: */ 1787 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1788 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1789 for (i=0; i<nrecvs; i++) { 1790 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1791 } 1792 1793 /* do sends: 1794 1) starts[i] gives the starting index in svalues for stuff going to 1795 the ith processor 1796 */ 1797 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1798 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1799 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1800 starts[0] = 0; 1801 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1802 for (i=0; i<N; i++) { 1803 svalues[starts[owner[i]]++] = rows[i]; 1804 } 1805 1806 starts[0] = 0; 1807 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1808 count = 0; 1809 for (i=0; i<size; i++) { 1810 if (nprocs[2*i+1]) { 1811 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1812 } 1813 } 1814 ierr = PetscFree(starts);CHKERRQ(ierr); 1815 1816 base = owners[rank]; 1817 1818 /* wait on receives */ 1819 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1820 count = nrecvs; 1821 slen = 0; 1822 while (count) { 1823 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1824 /* unpack receives into our local space */ 1825 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1826 source[imdex] = recv_status.MPI_SOURCE; 1827 lens[imdex] = n; 1828 slen += n; 1829 count--; 1830 } 1831 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1832 1833 /* move the data into the send scatter */ 1834 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1835 count = 0; 1836 for (i=0; i<nrecvs; i++) { 1837 values = rvalues + i*nmax; 1838 for (j=0; j<lens[i]; j++) { 1839 lrows[count++] = values[j] - base; 1840 } 1841 } 1842 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1843 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1844 ierr = PetscFree(owner);CHKERRQ(ierr); 1845 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1846 1847 /* actually zap the local rows */ 1848 /* 1849 Zero the required rows. If the "diagonal block" of the matrix 1850 is square and the user wishes to set the diagonal we use separate 1851 code so that MatSetValues() is not called for each diagonal allocating 1852 new memory, thus calling lots of mallocs and slowing things down. 1853 1854 */ 1855 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1856 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1857 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1858 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 1859 } else if (diag != 0.0) { 1860 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1861 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1862 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1863 for (i=0; i<slen; i++) { 1864 row = lrows[i] + rstart_bs; 1865 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1866 } 1867 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1868 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1869 } else { 1870 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1871 } 1872 1873 ierr = PetscFree(lrows);CHKERRQ(ierr); 1874 1875 /* wait on sends */ 1876 if (nsends) { 1877 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1878 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1879 ierr = PetscFree(send_status);CHKERRQ(ierr); 1880 } 1881 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1882 ierr = PetscFree(svalues);CHKERRQ(ierr); 1883 1884 PetscFunctionReturn(0); 1885 } 1886 1887 #undef __FUNCT__ 1888 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1889 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1890 { 1891 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1892 PetscErrorCode ierr; 1893 1894 PetscFunctionBegin; 1895 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1896 PetscFunctionReturn(0); 1897 } 1898 1899 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1900 1901 #undef __FUNCT__ 1902 #define __FUNCT__ "MatEqual_MPIBAIJ" 1903 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1904 { 1905 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1906 Mat a,b,c,d; 1907 PetscBool flg; 1908 PetscErrorCode ierr; 1909 1910 PetscFunctionBegin; 1911 a = matA->A; b = matA->B; 1912 c = matB->A; d = matB->B; 1913 1914 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1915 if (flg) { 1916 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1917 } 1918 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1919 PetscFunctionReturn(0); 1920 } 1921 1922 #undef __FUNCT__ 1923 #define __FUNCT__ "MatCopy_MPIBAIJ" 1924 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1925 { 1926 PetscErrorCode ierr; 1927 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1928 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1929 1930 PetscFunctionBegin; 1931 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1932 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1933 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1934 } else { 1935 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1936 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1937 } 1938 PetscFunctionReturn(0); 1939 } 1940 1941 #undef __FUNCT__ 1942 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1943 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1944 { 1945 PetscErrorCode ierr; 1946 1947 PetscFunctionBegin; 1948 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1949 PetscFunctionReturn(0); 1950 } 1951 1952 #undef __FUNCT__ 1953 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1954 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1955 { 1956 PetscErrorCode ierr; 1957 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1958 PetscBLASInt bnz,one=1; 1959 Mat_SeqBAIJ *x,*y; 1960 1961 PetscFunctionBegin; 1962 if (str == SAME_NONZERO_PATTERN) { 1963 PetscScalar alpha = a; 1964 x = (Mat_SeqBAIJ *)xx->A->data; 1965 y = (Mat_SeqBAIJ *)yy->A->data; 1966 bnz = PetscBLASIntCast(x->nz); 1967 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1968 x = (Mat_SeqBAIJ *)xx->B->data; 1969 y = (Mat_SeqBAIJ *)yy->B->data; 1970 bnz = PetscBLASIntCast(x->nz); 1971 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1972 } else { 1973 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1974 } 1975 PetscFunctionReturn(0); 1976 } 1977 1978 #undef __FUNCT__ 1979 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ" 1980 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs) 1981 { 1982 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1983 PetscInt rbs,cbs; 1984 PetscErrorCode ierr; 1985 1986 PetscFunctionBegin; 1987 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1988 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1989 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 1990 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 1991 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 1992 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 1993 PetscFunctionReturn(0); 1994 } 1995 1996 #undef __FUNCT__ 1997 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1998 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1999 { 2000 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2001 PetscErrorCode ierr; 2002 2003 PetscFunctionBegin; 2004 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2005 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2006 PetscFunctionReturn(0); 2007 } 2008 2009 #undef __FUNCT__ 2010 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2011 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2012 { 2013 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2014 PetscErrorCode ierr; 2015 2016 PetscFunctionBegin; 2017 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2018 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2019 PetscFunctionReturn(0); 2020 } 2021 2022 #undef __FUNCT__ 2023 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2024 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2025 { 2026 PetscErrorCode ierr; 2027 IS iscol_local; 2028 PetscInt csize; 2029 2030 PetscFunctionBegin; 2031 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2032 if (call == MAT_REUSE_MATRIX) { 2033 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2034 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2035 } else { 2036 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2037 } 2038 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2039 if (call == MAT_INITIAL_MATRIX) { 2040 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2041 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 2042 } 2043 PetscFunctionReturn(0); 2044 } 2045 2046 #undef __FUNCT__ 2047 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2048 /* 2049 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2050 in local and then by concatenating the local matrices the end result. 2051 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2052 */ 2053 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2054 { 2055 PetscErrorCode ierr; 2056 PetscMPIInt rank,size; 2057 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2058 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2059 Mat *local,M,Mreuse; 2060 MatScalar *vwork,*aa; 2061 MPI_Comm comm = ((PetscObject)mat)->comm; 2062 Mat_SeqBAIJ *aij; 2063 2064 2065 PetscFunctionBegin; 2066 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2067 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2068 2069 if (call == MAT_REUSE_MATRIX) { 2070 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2071 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2072 local = &Mreuse; 2073 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2074 } else { 2075 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2076 Mreuse = *local; 2077 ierr = PetscFree(local);CHKERRQ(ierr); 2078 } 2079 2080 /* 2081 m - number of local rows 2082 n - number of columns (same on all processors) 2083 rstart - first row in new global matrix generated 2084 */ 2085 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2086 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2087 m = m/bs; 2088 n = n/bs; 2089 2090 if (call == MAT_INITIAL_MATRIX) { 2091 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2092 ii = aij->i; 2093 jj = aij->j; 2094 2095 /* 2096 Determine the number of non-zeros in the diagonal and off-diagonal 2097 portions of the matrix in order to do correct preallocation 2098 */ 2099 2100 /* first get start and end of "diagonal" columns */ 2101 if (csize == PETSC_DECIDE) { 2102 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2103 if (mglobal == n*bs) { /* square matrix */ 2104 nlocal = m; 2105 } else { 2106 nlocal = n/size + ((n % size) > rank); 2107 } 2108 } else { 2109 nlocal = csize/bs; 2110 } 2111 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2112 rstart = rend - nlocal; 2113 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2114 2115 /* next, compute all the lengths */ 2116 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2117 olens = dlens + m; 2118 for (i=0; i<m; i++) { 2119 jend = ii[i+1] - ii[i]; 2120 olen = 0; 2121 dlen = 0; 2122 for (j=0; j<jend; j++) { 2123 if (*jj < rstart || *jj >= rend) olen++; 2124 else dlen++; 2125 jj++; 2126 } 2127 olens[i] = olen; 2128 dlens[i] = dlen; 2129 } 2130 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2131 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2132 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2133 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2134 ierr = PetscFree(dlens);CHKERRQ(ierr); 2135 } else { 2136 PetscInt ml,nl; 2137 2138 M = *newmat; 2139 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2140 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2141 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2142 /* 2143 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2144 rather than the slower MatSetValues(). 2145 */ 2146 M->was_assembled = PETSC_TRUE; 2147 M->assembled = PETSC_FALSE; 2148 } 2149 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2150 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2151 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2152 ii = aij->i; 2153 jj = aij->j; 2154 aa = aij->a; 2155 for (i=0; i<m; i++) { 2156 row = rstart/bs + i; 2157 nz = ii[i+1] - ii[i]; 2158 cwork = jj; jj += nz; 2159 vwork = aa; aa += nz; 2160 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2161 } 2162 2163 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2164 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2165 *newmat = M; 2166 2167 /* save submatrix used in processor for next request */ 2168 if (call == MAT_INITIAL_MATRIX) { 2169 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2170 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2171 } 2172 2173 PetscFunctionReturn(0); 2174 } 2175 2176 #undef __FUNCT__ 2177 #define __FUNCT__ "MatPermute_MPIBAIJ" 2178 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2179 { 2180 MPI_Comm comm,pcomm; 2181 PetscInt first,local_size,nrows; 2182 const PetscInt *rows; 2183 PetscMPIInt size; 2184 IS crowp,growp,irowp,lrowp,lcolp,icolp; 2185 PetscErrorCode ierr; 2186 2187 PetscFunctionBegin; 2188 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2189 /* make a collective version of 'rowp' */ 2190 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2191 if (pcomm==comm) { 2192 crowp = rowp; 2193 } else { 2194 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2195 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2196 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2197 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2198 } 2199 /* collect the global row permutation and invert it */ 2200 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2201 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2202 if (pcomm!=comm) { 2203 ierr = ISDestroy(crowp);CHKERRQ(ierr); 2204 } 2205 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2206 /* get the local target indices */ 2207 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2208 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2209 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2210 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 2211 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2212 ierr = ISDestroy(irowp);CHKERRQ(ierr); 2213 /* the column permutation is so much easier; 2214 make a local version of 'colp' and invert it */ 2215 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2216 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2217 if (size==1) { 2218 lcolp = colp; 2219 } else { 2220 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2221 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2222 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 2223 } 2224 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2225 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2226 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2227 if (size>1) { 2228 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2229 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 2230 } 2231 /* now we just get the submatrix */ 2232 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2233 /* clean up */ 2234 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 2235 ierr = ISDestroy(icolp);CHKERRQ(ierr); 2236 PetscFunctionReturn(0); 2237 } 2238 2239 #undef __FUNCT__ 2240 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2241 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2242 { 2243 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2244 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2245 2246 PetscFunctionBegin; 2247 if (nghosts) { *nghosts = B->nbs;} 2248 if (ghosts) {*ghosts = baij->garray;} 2249 PetscFunctionReturn(0); 2250 } 2251 2252 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat); 2253 2254 #undef __FUNCT__ 2255 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2256 /* 2257 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2258 */ 2259 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2260 { 2261 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2262 PetscErrorCode ierr; 2263 PetscMPIInt size,*ncolsonproc,*disp,nn; 2264 PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col; 2265 const PetscInt *is; 2266 PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj; 2267 PetscInt *rowhit,M,cstart,cend,colb; 2268 PetscInt *columnsforrow,l; 2269 IS *isa; 2270 PetscBool done,flg; 2271 ISLocalToGlobalMapping map = mat->bmapping; 2272 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2273 2274 PetscFunctionBegin; 2275 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2276 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2277 2278 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2279 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2280 M = mat->rmap->n/bs; 2281 cstart = mat->cmap->rstart/bs; 2282 cend = mat->cmap->rend/bs; 2283 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2284 c->N = mat->cmap->N/bs; 2285 c->m = mat->rmap->n/bs; 2286 c->rstart = mat->rmap->rstart/bs; 2287 2288 c->ncolors = nis; 2289 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2290 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2291 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2292 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2293 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2294 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2295 2296 /* Allow access to data structures of local part of matrix */ 2297 if (!baij->colmap) { 2298 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2299 } 2300 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2301 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2302 2303 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2304 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2305 2306 for (i=0; i<nis; i++) { 2307 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2308 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2309 c->ncolumns[i] = n; 2310 if (n) { 2311 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2312 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2313 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2314 } else { 2315 c->columns[i] = 0; 2316 } 2317 2318 if (ctype == IS_COLORING_GLOBAL){ 2319 /* Determine the total (parallel) number of columns of this color */ 2320 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2321 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2322 2323 nn = PetscMPIIntCast(n); 2324 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2325 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2326 if (!nctot) { 2327 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2328 } 2329 2330 disp[0] = 0; 2331 for (j=1; j<size; j++) { 2332 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2333 } 2334 2335 /* Get complete list of columns for color on each processor */ 2336 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2337 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2338 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2339 } else if (ctype == IS_COLORING_GHOSTED){ 2340 /* Determine local number of columns of this color on this process, including ghost points */ 2341 nctot = n; 2342 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2343 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2344 } else { 2345 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2346 } 2347 2348 /* 2349 Mark all rows affect by these columns 2350 */ 2351 /* Temporary option to allow for debugging/testing */ 2352 flg = PETSC_FALSE; 2353 ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2354 if (!flg) {/*-----------------------------------------------------------------------------*/ 2355 /* crude, fast version */ 2356 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2357 /* loop over columns*/ 2358 for (j=0; j<nctot; j++) { 2359 if (ctype == IS_COLORING_GHOSTED) { 2360 col = ltog[cols[j]]; 2361 } else { 2362 col = cols[j]; 2363 } 2364 if (col >= cstart && col < cend) { 2365 /* column is in diagonal block of matrix */ 2366 rows = A_cj + A_ci[col-cstart]; 2367 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2368 } else { 2369 #if defined (PETSC_USE_CTABLE) 2370 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2371 colb --; 2372 #else 2373 colb = baij->colmap[col] - 1; 2374 #endif 2375 if (colb == -1) { 2376 m = 0; 2377 } else { 2378 colb = colb/bs; 2379 rows = B_cj + B_ci[colb]; 2380 m = B_ci[colb+1] - B_ci[colb]; 2381 } 2382 } 2383 /* loop over columns marking them in rowhit */ 2384 for (k=0; k<m; k++) { 2385 rowhit[*rows++] = col + 1; 2386 } 2387 } 2388 2389 /* count the number of hits */ 2390 nrows = 0; 2391 for (j=0; j<M; j++) { 2392 if (rowhit[j]) nrows++; 2393 } 2394 c->nrows[i] = nrows; 2395 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2396 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2397 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2398 nrows = 0; 2399 for (j=0; j<M; j++) { 2400 if (rowhit[j]) { 2401 c->rows[i][nrows] = j; 2402 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2403 nrows++; 2404 } 2405 } 2406 } else {/*-------------------------------------------------------------------------------*/ 2407 /* slow version, using rowhit as a linked list */ 2408 PetscInt currentcol,fm,mfm; 2409 rowhit[M] = M; 2410 nrows = 0; 2411 /* loop over columns*/ 2412 for (j=0; j<nctot; j++) { 2413 if (ctype == IS_COLORING_GHOSTED) { 2414 col = ltog[cols[j]]; 2415 } else { 2416 col = cols[j]; 2417 } 2418 if (col >= cstart && col < cend) { 2419 /* column is in diagonal block of matrix */ 2420 rows = A_cj + A_ci[col-cstart]; 2421 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2422 } else { 2423 #if defined (PETSC_USE_CTABLE) 2424 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2425 colb --; 2426 #else 2427 colb = baij->colmap[col] - 1; 2428 #endif 2429 if (colb == -1) { 2430 m = 0; 2431 } else { 2432 colb = colb/bs; 2433 rows = B_cj + B_ci[colb]; 2434 m = B_ci[colb+1] - B_ci[colb]; 2435 } 2436 } 2437 2438 /* loop over columns marking them in rowhit */ 2439 fm = M; /* fm points to first entry in linked list */ 2440 for (k=0; k<m; k++) { 2441 currentcol = *rows++; 2442 /* is it already in the list? */ 2443 do { 2444 mfm = fm; 2445 fm = rowhit[fm]; 2446 } while (fm < currentcol); 2447 /* not in list so add it */ 2448 if (fm != currentcol) { 2449 nrows++; 2450 columnsforrow[currentcol] = col; 2451 /* next three lines insert new entry into linked list */ 2452 rowhit[mfm] = currentcol; 2453 rowhit[currentcol] = fm; 2454 fm = currentcol; 2455 /* fm points to present position in list since we know the columns are sorted */ 2456 } else { 2457 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2458 } 2459 } 2460 } 2461 c->nrows[i] = nrows; 2462 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2463 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2464 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2465 /* now store the linked list of rows into c->rows[i] */ 2466 nrows = 0; 2467 fm = rowhit[M]; 2468 do { 2469 c->rows[i][nrows] = fm; 2470 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2471 fm = rowhit[fm]; 2472 } while (fm < M); 2473 } /* ---------------------------------------------------------------------------------------*/ 2474 ierr = PetscFree(cols);CHKERRQ(ierr); 2475 } 2476 2477 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2478 /* 2479 vscale will contain the "diagonal" on processor scalings followed by the off processor 2480 */ 2481 if (ctype == IS_COLORING_GLOBAL) { 2482 PetscInt *garray; 2483 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2484 for (i=0; i<baij->B->cmap->n/bs; i++) { 2485 for (j=0; j<bs; j++) { 2486 garray[i*bs+j] = bs*baij->garray[i]+j; 2487 } 2488 } 2489 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2490 ierr = PetscFree(garray);CHKERRQ(ierr); 2491 CHKMEMQ; 2492 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2493 for (k=0; k<c->ncolors; k++) { 2494 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2495 for (l=0; l<c->nrows[k]; l++) { 2496 col = c->columnsforrow[k][l]; 2497 if (col >= cstart && col < cend) { 2498 /* column is in diagonal block of matrix */ 2499 colb = col - cstart; 2500 } else { 2501 /* column is in "off-processor" part */ 2502 #if defined (PETSC_USE_CTABLE) 2503 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2504 colb --; 2505 #else 2506 colb = baij->colmap[col] - 1; 2507 #endif 2508 colb = colb/bs; 2509 colb += cend - cstart; 2510 } 2511 c->vscaleforrow[k][l] = colb; 2512 } 2513 } 2514 } else if (ctype == IS_COLORING_GHOSTED) { 2515 /* Get gtol mapping */ 2516 PetscInt N = mat->cmap->N, *gtol; 2517 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2518 for (i=0; i<N; i++) gtol[i] = -1; 2519 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2520 2521 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2522 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2523 for (k=0; k<c->ncolors; k++) { 2524 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2525 for (l=0; l<c->nrows[k]; l++) { 2526 col = c->columnsforrow[k][l]; /* global column index */ 2527 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2528 } 2529 } 2530 ierr = PetscFree(gtol);CHKERRQ(ierr); 2531 } 2532 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2533 2534 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2535 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2536 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2537 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2538 CHKMEMQ; 2539 PetscFunctionReturn(0); 2540 } 2541 2542 #undef __FUNCT__ 2543 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ" 2544 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat) 2545 { 2546 Mat B; 2547 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2548 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2549 Mat_SeqAIJ *b; 2550 PetscErrorCode ierr; 2551 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2552 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2553 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2554 2555 PetscFunctionBegin; 2556 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2557 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2558 2559 /* ---------------------------------------------------------------- 2560 Tell every processor the number of nonzeros per row 2561 */ 2562 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2563 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2564 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2565 } 2566 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2567 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2568 displs = recvcounts + size; 2569 for (i=0; i<size; i++) { 2570 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2571 displs[i] = A->rmap->range[i]/bs; 2572 } 2573 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2574 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2575 #else 2576 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2577 #endif 2578 /* --------------------------------------------------------------- 2579 Create the sequential matrix of the same type as the local block diagonal 2580 */ 2581 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2582 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2583 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2584 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2585 b = (Mat_SeqAIJ *)B->data; 2586 2587 /*-------------------------------------------------------------------- 2588 Copy my part of matrix column indices over 2589 */ 2590 sendcount = ad->nz + bd->nz; 2591 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2592 a_jsendbuf = ad->j; 2593 b_jsendbuf = bd->j; 2594 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2595 cnt = 0; 2596 for (i=0; i<n; i++) { 2597 2598 /* put in lower diagonal portion */ 2599 m = bd->i[i+1] - bd->i[i]; 2600 while (m > 0) { 2601 /* is it above diagonal (in bd (compressed) numbering) */ 2602 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2603 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2604 m--; 2605 } 2606 2607 /* put in diagonal portion */ 2608 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2609 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2610 } 2611 2612 /* put in upper diagonal portion */ 2613 while (m-- > 0) { 2614 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2615 } 2616 } 2617 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2618 2619 /*-------------------------------------------------------------------- 2620 Gather all column indices to all processors 2621 */ 2622 for (i=0; i<size; i++) { 2623 recvcounts[i] = 0; 2624 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2625 recvcounts[i] += lens[j]; 2626 } 2627 } 2628 displs[0] = 0; 2629 for (i=1; i<size; i++) { 2630 displs[i] = displs[i-1] + recvcounts[i-1]; 2631 } 2632 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2633 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2634 #else 2635 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2636 #endif 2637 /*-------------------------------------------------------------------- 2638 Assemble the matrix into useable form (note numerical values not yet set) 2639 */ 2640 /* set the b->ilen (length of each row) values */ 2641 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2642 /* set the b->i indices */ 2643 b->i[0] = 0; 2644 for (i=1; i<=A->rmap->N/bs; i++) { 2645 b->i[i] = b->i[i-1] + lens[i-1]; 2646 } 2647 ierr = PetscFree(lens);CHKERRQ(ierr); 2648 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2649 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2650 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2651 2652 if (A->symmetric){ 2653 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2654 } else if (A->hermitian) { 2655 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2656 } else if (A->structurally_symmetric) { 2657 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2658 } 2659 *newmat = B; 2660 PetscFunctionReturn(0); 2661 } 2662 2663 #undef __FUNCT__ 2664 #define __FUNCT__ "MatSOR_MPIBAIJ" 2665 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2666 { 2667 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2668 PetscErrorCode ierr; 2669 Vec bb1 = 0; 2670 2671 PetscFunctionBegin; 2672 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2673 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2674 } 2675 2676 if (flag == SOR_APPLY_UPPER) { 2677 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2678 PetscFunctionReturn(0); 2679 } 2680 2681 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2682 if (flag & SOR_ZERO_INITIAL_GUESS) { 2683 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2684 its--; 2685 } 2686 2687 while (its--) { 2688 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2689 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2690 2691 /* update rhs: bb1 = bb - B*x */ 2692 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2693 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2694 2695 /* local sweep */ 2696 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2697 } 2698 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2699 if (flag & SOR_ZERO_INITIAL_GUESS) { 2700 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2701 its--; 2702 } 2703 while (its--) { 2704 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2705 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2706 2707 /* update rhs: bb1 = bb - B*x */ 2708 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2709 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2710 2711 /* local sweep */ 2712 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2713 } 2714 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2715 if (flag & SOR_ZERO_INITIAL_GUESS) { 2716 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2717 its--; 2718 } 2719 while (its--) { 2720 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2721 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2722 2723 /* update rhs: bb1 = bb - B*x */ 2724 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2725 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2726 2727 /* local sweep */ 2728 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2729 } 2730 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2731 2732 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 2733 PetscFunctionReturn(0); 2734 } 2735 2736 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2737 2738 2739 /* -------------------------------------------------------------------*/ 2740 static struct _MatOps MatOps_Values = { 2741 MatSetValues_MPIBAIJ, 2742 MatGetRow_MPIBAIJ, 2743 MatRestoreRow_MPIBAIJ, 2744 MatMult_MPIBAIJ, 2745 /* 4*/ MatMultAdd_MPIBAIJ, 2746 MatMultTranspose_MPIBAIJ, 2747 MatMultTransposeAdd_MPIBAIJ, 2748 0, 2749 0, 2750 0, 2751 /*10*/ 0, 2752 0, 2753 0, 2754 MatSOR_MPIBAIJ, 2755 MatTranspose_MPIBAIJ, 2756 /*15*/ MatGetInfo_MPIBAIJ, 2757 MatEqual_MPIBAIJ, 2758 MatGetDiagonal_MPIBAIJ, 2759 MatDiagonalScale_MPIBAIJ, 2760 MatNorm_MPIBAIJ, 2761 /*20*/ MatAssemblyBegin_MPIBAIJ, 2762 MatAssemblyEnd_MPIBAIJ, 2763 MatSetOption_MPIBAIJ, 2764 MatZeroEntries_MPIBAIJ, 2765 /*24*/ MatZeroRows_MPIBAIJ, 2766 0, 2767 0, 2768 0, 2769 0, 2770 /*29*/ MatSetUpPreallocation_MPIBAIJ, 2771 0, 2772 0, 2773 0, 2774 0, 2775 /*34*/ MatDuplicate_MPIBAIJ, 2776 0, 2777 0, 2778 0, 2779 0, 2780 /*39*/ MatAXPY_MPIBAIJ, 2781 MatGetSubMatrices_MPIBAIJ, 2782 MatIncreaseOverlap_MPIBAIJ, 2783 MatGetValues_MPIBAIJ, 2784 MatCopy_MPIBAIJ, 2785 /*44*/ 0, 2786 MatScale_MPIBAIJ, 2787 0, 2788 0, 2789 0, 2790 /*49*/ MatSetBlockSize_MPIBAIJ, 2791 0, 2792 0, 2793 0, 2794 0, 2795 /*54*/ MatFDColoringCreate_MPIBAIJ, 2796 0, 2797 MatSetUnfactored_MPIBAIJ, 2798 MatPermute_MPIBAIJ, 2799 MatSetValuesBlocked_MPIBAIJ, 2800 /*59*/ MatGetSubMatrix_MPIBAIJ, 2801 MatDestroy_MPIBAIJ, 2802 MatView_MPIBAIJ, 2803 0, 2804 0, 2805 /*64*/ 0, 2806 0, 2807 0, 2808 0, 2809 0, 2810 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2811 0, 2812 0, 2813 0, 2814 0, 2815 /*74*/ 0, 2816 MatFDColoringApply_BAIJ, 2817 0, 2818 0, 2819 0, 2820 /*79*/ 0, 2821 0, 2822 0, 2823 0, 2824 MatLoad_MPIBAIJ, 2825 /*84*/ 0, 2826 0, 2827 0, 2828 0, 2829 0, 2830 /*89*/ 0, 2831 0, 2832 0, 2833 0, 2834 0, 2835 /*94*/ 0, 2836 0, 2837 0, 2838 0, 2839 0, 2840 /*99*/ 0, 2841 0, 2842 0, 2843 0, 2844 0, 2845 /*104*/0, 2846 MatRealPart_MPIBAIJ, 2847 MatImaginaryPart_MPIBAIJ, 2848 0, 2849 0, 2850 /*109*/0, 2851 0, 2852 0, 2853 0, 2854 0, 2855 /*114*/MatGetSeqNonzerostructure_MPIBAIJ, 2856 0, 2857 MatGetGhosts_MPIBAIJ, 2858 0, 2859 0, 2860 /*119*/0, 2861 0, 2862 0, 2863 0 2864 }; 2865 2866 EXTERN_C_BEGIN 2867 #undef __FUNCT__ 2868 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2869 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscBool *iscopy,MatReuse reuse,Mat *a) 2870 { 2871 PetscFunctionBegin; 2872 *a = ((Mat_MPIBAIJ *)A->data)->A; 2873 *iscopy = PETSC_FALSE; 2874 PetscFunctionReturn(0); 2875 } 2876 EXTERN_C_END 2877 2878 EXTERN_C_BEGIN 2879 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2880 EXTERN_C_END 2881 2882 EXTERN_C_BEGIN 2883 #undef __FUNCT__ 2884 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2885 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2886 { 2887 PetscInt m,rstart,cstart,cend; 2888 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2889 const PetscInt *JJ=0; 2890 PetscScalar *values=0; 2891 PetscErrorCode ierr; 2892 2893 PetscFunctionBegin; 2894 2895 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2896 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2897 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2898 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2899 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2900 m = B->rmap->n/bs; 2901 rstart = B->rmap->rstart/bs; 2902 cstart = B->cmap->rstart/bs; 2903 cend = B->cmap->rend/bs; 2904 2905 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2906 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2907 for (i=0; i<m; i++) { 2908 nz = ii[i+1] - ii[i]; 2909 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2910 nz_max = PetscMax(nz_max,nz); 2911 JJ = jj + ii[i]; 2912 for (j=0; j<nz; j++) { 2913 if (*JJ >= cstart) break; 2914 JJ++; 2915 } 2916 d = 0; 2917 for (; j<nz; j++) { 2918 if (*JJ++ >= cend) break; 2919 d++; 2920 } 2921 d_nnz[i] = d; 2922 o_nnz[i] = nz - d; 2923 } 2924 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2925 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2926 2927 values = (PetscScalar*)V; 2928 if (!values) { 2929 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2930 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2931 } 2932 for (i=0; i<m; i++) { 2933 PetscInt row = i + rstart; 2934 PetscInt ncols = ii[i+1] - ii[i]; 2935 const PetscInt *icols = jj + ii[i]; 2936 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2937 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2938 } 2939 2940 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2941 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2942 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2943 2944 PetscFunctionReturn(0); 2945 } 2946 EXTERN_C_END 2947 2948 #undef __FUNCT__ 2949 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2950 /*@C 2951 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2952 (the default parallel PETSc format). 2953 2954 Collective on MPI_Comm 2955 2956 Input Parameters: 2957 + A - the matrix 2958 . bs - the block size 2959 . i - the indices into j for the start of each local row (starts with zero) 2960 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2961 - v - optional values in the matrix 2962 2963 Level: developer 2964 2965 .keywords: matrix, aij, compressed row, sparse, parallel 2966 2967 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2968 @*/ 2969 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2970 { 2971 PetscErrorCode ierr; 2972 2973 PetscFunctionBegin; 2974 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2975 PetscFunctionReturn(0); 2976 } 2977 2978 EXTERN_C_BEGIN 2979 #undef __FUNCT__ 2980 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2981 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2982 { 2983 Mat_MPIBAIJ *b; 2984 PetscErrorCode ierr; 2985 PetscInt i, newbs = PetscAbs(bs); 2986 2987 PetscFunctionBegin; 2988 if (bs < 0) { 2989 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2990 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2991 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2992 bs = PetscAbs(bs); 2993 } 2994 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 2995 bs = newbs; 2996 2997 2998 if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2999 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3000 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3001 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3002 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3003 3004 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3005 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3006 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3007 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3008 3009 if (d_nnz) { 3010 for (i=0; i<B->rmap->n/bs; i++) { 3011 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 3012 } 3013 } 3014 if (o_nnz) { 3015 for (i=0; i<B->rmap->n/bs; i++) { 3016 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 3017 } 3018 } 3019 3020 b = (Mat_MPIBAIJ*)B->data; 3021 b->bs2 = bs*bs; 3022 b->mbs = B->rmap->n/bs; 3023 b->nbs = B->cmap->n/bs; 3024 b->Mbs = B->rmap->N/bs; 3025 b->Nbs = B->cmap->N/bs; 3026 3027 for (i=0; i<=b->size; i++) { 3028 b->rangebs[i] = B->rmap->range[i]/bs; 3029 } 3030 b->rstartbs = B->rmap->rstart/bs; 3031 b->rendbs = B->rmap->rend/bs; 3032 b->cstartbs = B->cmap->rstart/bs; 3033 b->cendbs = B->cmap->rend/bs; 3034 3035 if (!B->preallocated) { 3036 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3037 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3038 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3039 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3040 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3041 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3042 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3043 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3044 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 3045 } 3046 3047 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3048 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3049 B->preallocated = PETSC_TRUE; 3050 PetscFunctionReturn(0); 3051 } 3052 EXTERN_C_END 3053 3054 EXTERN_C_BEGIN 3055 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3056 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3057 EXTERN_C_END 3058 3059 3060 EXTERN_C_BEGIN 3061 #undef __FUNCT__ 3062 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3063 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj) 3064 { 3065 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3066 PetscErrorCode ierr; 3067 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3068 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3069 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3070 3071 PetscFunctionBegin; 3072 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 3073 ii[0] = 0; 3074 CHKMEMQ; 3075 for (i=0; i<M; i++) { 3076 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3077 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3078 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3079 /* remove one from count of matrix has diagonal */ 3080 for (j=id[i]; j<id[i+1]; j++) { 3081 if (jd[j] == i) {ii[i+1]--;break;} 3082 } 3083 CHKMEMQ; 3084 } 3085 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3086 cnt = 0; 3087 for (i=0; i<M; i++) { 3088 for (j=io[i]; j<io[i+1]; j++) { 3089 if (garray[jo[j]] > rstart) break; 3090 jj[cnt++] = garray[jo[j]]; 3091 CHKMEMQ; 3092 } 3093 for (k=id[i]; k<id[i+1]; k++) { 3094 if (jd[k] != i) { 3095 jj[cnt++] = rstart + jd[k]; 3096 CHKMEMQ; 3097 } 3098 } 3099 for (;j<io[i+1]; j++) { 3100 jj[cnt++] = garray[jo[j]]; 3101 CHKMEMQ; 3102 } 3103 } 3104 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 3105 PetscFunctionReturn(0); 3106 } 3107 EXTERN_C_END 3108 3109 EXTERN_C_BEGIN 3110 #if defined(PETSC_HAVE_MUMPS) 3111 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3112 #endif 3113 EXTERN_C_END 3114 3115 /*MC 3116 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3117 3118 Options Database Keys: 3119 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3120 . -mat_block_size <bs> - set the blocksize used to store the matrix 3121 - -mat_use_hash_table <fact> 3122 3123 Level: beginner 3124 3125 .seealso: MatCreateMPIBAIJ 3126 M*/ 3127 3128 EXTERN_C_BEGIN 3129 #undef __FUNCT__ 3130 #define __FUNCT__ "MatCreate_MPIBAIJ" 3131 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 3132 { 3133 Mat_MPIBAIJ *b; 3134 PetscErrorCode ierr; 3135 PetscBool flg; 3136 3137 PetscFunctionBegin; 3138 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 3139 B->data = (void*)b; 3140 3141 3142 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3143 B->mapping = 0; 3144 B->assembled = PETSC_FALSE; 3145 3146 B->insertmode = NOT_SET_VALUES; 3147 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 3148 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 3149 3150 /* build local table of row and column ownerships */ 3151 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 3152 3153 /* build cache for off array entries formed */ 3154 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 3155 b->donotstash = PETSC_FALSE; 3156 b->colmap = PETSC_NULL; 3157 b->garray = PETSC_NULL; 3158 b->roworiented = PETSC_TRUE; 3159 3160 /* stuff used in block assembly */ 3161 b->barray = 0; 3162 3163 /* stuff used for matrix vector multiply */ 3164 b->lvec = 0; 3165 b->Mvctx = 0; 3166 3167 /* stuff for MatGetRow() */ 3168 b->rowindices = 0; 3169 b->rowvalues = 0; 3170 b->getrowactive = PETSC_FALSE; 3171 3172 /* hash table stuff */ 3173 b->ht = 0; 3174 b->hd = 0; 3175 b->ht_size = 0; 3176 b->ht_flag = PETSC_FALSE; 3177 b->ht_fact = 0; 3178 b->ht_total_ct = 0; 3179 b->ht_insert_ct = 0; 3180 3181 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3182 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3183 if (flg) { 3184 PetscReal fact = 1.39; 3185 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3186 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 3187 if (fact <= 1.0) fact = 1.39; 3188 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3189 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3190 } 3191 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3192 3193 #if defined(PETSC_HAVE_MUMPS) 3194 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3195 #endif 3196 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3197 "MatConvert_MPIBAIJ_MPIAdj", 3198 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3199 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3200 "MatStoreValues_MPIBAIJ", 3201 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3202 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3203 "MatRetrieveValues_MPIBAIJ", 3204 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3205 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3206 "MatGetDiagonalBlock_MPIBAIJ", 3207 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3208 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3209 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3210 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3211 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3212 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3213 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3214 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3215 "MatDiagonalScaleLocal_MPIBAIJ", 3216 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3217 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3218 "MatSetHashTableFactor_MPIBAIJ", 3219 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3220 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3221 PetscFunctionReturn(0); 3222 } 3223 EXTERN_C_END 3224 3225 /*MC 3226 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3227 3228 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3229 and MATMPIBAIJ otherwise. 3230 3231 Options Database Keys: 3232 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3233 3234 Level: beginner 3235 3236 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3237 M*/ 3238 3239 EXTERN_C_BEGIN 3240 #undef __FUNCT__ 3241 #define __FUNCT__ "MatCreate_BAIJ" 3242 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 3243 { 3244 PetscErrorCode ierr; 3245 PetscMPIInt size; 3246 3247 PetscFunctionBegin; 3248 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3249 if (size == 1) { 3250 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 3251 } else { 3252 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 3253 } 3254 PetscFunctionReturn(0); 3255 } 3256 EXTERN_C_END 3257 3258 #undef __FUNCT__ 3259 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3260 /*@C 3261 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3262 (block compressed row). For good matrix assembly performance 3263 the user should preallocate the matrix storage by setting the parameters 3264 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3265 performance can be increased by more than a factor of 50. 3266 3267 Collective on Mat 3268 3269 Input Parameters: 3270 + A - the matrix 3271 . bs - size of blockk 3272 . d_nz - number of block nonzeros per block row in diagonal portion of local 3273 submatrix (same for all local rows) 3274 . d_nnz - array containing the number of block nonzeros in the various block rows 3275 of the in diagonal portion of the local (possibly different for each block 3276 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3277 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3278 submatrix (same for all local rows). 3279 - o_nnz - array containing the number of nonzeros in the various block rows of the 3280 off-diagonal portion of the local submatrix (possibly different for 3281 each block row) or PETSC_NULL. 3282 3283 If the *_nnz parameter is given then the *_nz parameter is ignored 3284 3285 Options Database Keys: 3286 + -mat_block_size - size of the blocks to use 3287 - -mat_use_hash_table <fact> 3288 3289 Notes: 3290 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3291 than it must be used on all processors that share the object for that argument. 3292 3293 Storage Information: 3294 For a square global matrix we define each processor's diagonal portion 3295 to be its local rows and the corresponding columns (a square submatrix); 3296 each processor's off-diagonal portion encompasses the remainder of the 3297 local matrix (a rectangular submatrix). 3298 3299 The user can specify preallocated storage for the diagonal part of 3300 the local submatrix with either d_nz or d_nnz (not both). Set 3301 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3302 memory allocation. Likewise, specify preallocated storage for the 3303 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3304 3305 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3306 the figure below we depict these three local rows and all columns (0-11). 3307 3308 .vb 3309 0 1 2 3 4 5 6 7 8 9 10 11 3310 ------------------- 3311 row 3 | o o o d d d o o o o o o 3312 row 4 | o o o d d d o o o o o o 3313 row 5 | o o o d d d o o o o o o 3314 ------------------- 3315 .ve 3316 3317 Thus, any entries in the d locations are stored in the d (diagonal) 3318 submatrix, and any entries in the o locations are stored in the 3319 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3320 stored simply in the MATSEQBAIJ format for compressed row storage. 3321 3322 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3323 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3324 In general, for PDE problems in which most nonzeros are near the diagonal, 3325 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3326 or you will get TERRIBLE performance; see the users' manual chapter on 3327 matrices. 3328 3329 You can call MatGetInfo() to get information on how effective the preallocation was; 3330 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3331 You can also run with the option -info and look for messages with the string 3332 malloc in them to see if additional memory allocation was needed. 3333 3334 Level: intermediate 3335 3336 .keywords: matrix, block, aij, compressed row, sparse, parallel 3337 3338 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 3339 @*/ 3340 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3341 { 3342 PetscErrorCode ierr; 3343 3344 PetscFunctionBegin; 3345 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3346 PetscFunctionReturn(0); 3347 } 3348 3349 #undef __FUNCT__ 3350 #define __FUNCT__ "MatCreateMPIBAIJ" 3351 /*@C 3352 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 3353 (block compressed row). For good matrix assembly performance 3354 the user should preallocate the matrix storage by setting the parameters 3355 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3356 performance can be increased by more than a factor of 50. 3357 3358 Collective on MPI_Comm 3359 3360 Input Parameters: 3361 + comm - MPI communicator 3362 . bs - size of blockk 3363 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3364 This value should be the same as the local size used in creating the 3365 y vector for the matrix-vector product y = Ax. 3366 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3367 This value should be the same as the local size used in creating the 3368 x vector for the matrix-vector product y = Ax. 3369 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3370 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3371 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3372 submatrix (same for all local rows) 3373 . d_nnz - array containing the number of nonzero blocks in the various block rows 3374 of the in diagonal portion of the local (possibly different for each block 3375 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3376 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3377 submatrix (same for all local rows). 3378 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3379 off-diagonal portion of the local submatrix (possibly different for 3380 each block row) or PETSC_NULL. 3381 3382 Output Parameter: 3383 . A - the matrix 3384 3385 Options Database Keys: 3386 + -mat_block_size - size of the blocks to use 3387 - -mat_use_hash_table <fact> 3388 3389 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3390 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3391 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3392 3393 Notes: 3394 If the *_nnz parameter is given then the *_nz parameter is ignored 3395 3396 A nonzero block is any block that as 1 or more nonzeros in it 3397 3398 The user MUST specify either the local or global matrix dimensions 3399 (possibly both). 3400 3401 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3402 than it must be used on all processors that share the object for that argument. 3403 3404 Storage Information: 3405 For a square global matrix we define each processor's diagonal portion 3406 to be its local rows and the corresponding columns (a square submatrix); 3407 each processor's off-diagonal portion encompasses the remainder of the 3408 local matrix (a rectangular submatrix). 3409 3410 The user can specify preallocated storage for the diagonal part of 3411 the local submatrix with either d_nz or d_nnz (not both). Set 3412 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3413 memory allocation. Likewise, specify preallocated storage for the 3414 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3415 3416 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3417 the figure below we depict these three local rows and all columns (0-11). 3418 3419 .vb 3420 0 1 2 3 4 5 6 7 8 9 10 11 3421 ------------------- 3422 row 3 | o o o d d d o o o o o o 3423 row 4 | o o o d d d o o o o o o 3424 row 5 | o o o d d d o o o o o o 3425 ------------------- 3426 .ve 3427 3428 Thus, any entries in the d locations are stored in the d (diagonal) 3429 submatrix, and any entries in the o locations are stored in the 3430 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3431 stored simply in the MATSEQBAIJ format for compressed row storage. 3432 3433 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3434 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3435 In general, for PDE problems in which most nonzeros are near the diagonal, 3436 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3437 or you will get TERRIBLE performance; see the users' manual chapter on 3438 matrices. 3439 3440 Level: intermediate 3441 3442 .keywords: matrix, block, aij, compressed row, sparse, parallel 3443 3444 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3445 @*/ 3446 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3447 { 3448 PetscErrorCode ierr; 3449 PetscMPIInt size; 3450 3451 PetscFunctionBegin; 3452 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3453 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3454 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3455 if (size > 1) { 3456 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3457 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3458 } else { 3459 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3460 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3461 } 3462 PetscFunctionReturn(0); 3463 } 3464 3465 #undef __FUNCT__ 3466 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3467 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3468 { 3469 Mat mat; 3470 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3471 PetscErrorCode ierr; 3472 PetscInt len=0; 3473 3474 PetscFunctionBegin; 3475 *newmat = 0; 3476 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3477 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3478 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3479 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3480 3481 mat->factortype = matin->factortype; 3482 mat->preallocated = PETSC_TRUE; 3483 mat->assembled = PETSC_TRUE; 3484 mat->insertmode = NOT_SET_VALUES; 3485 3486 a = (Mat_MPIBAIJ*)mat->data; 3487 mat->rmap->bs = matin->rmap->bs; 3488 a->bs2 = oldmat->bs2; 3489 a->mbs = oldmat->mbs; 3490 a->nbs = oldmat->nbs; 3491 a->Mbs = oldmat->Mbs; 3492 a->Nbs = oldmat->Nbs; 3493 3494 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3495 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3496 3497 a->size = oldmat->size; 3498 a->rank = oldmat->rank; 3499 a->donotstash = oldmat->donotstash; 3500 a->roworiented = oldmat->roworiented; 3501 a->rowindices = 0; 3502 a->rowvalues = 0; 3503 a->getrowactive = PETSC_FALSE; 3504 a->barray = 0; 3505 a->rstartbs = oldmat->rstartbs; 3506 a->rendbs = oldmat->rendbs; 3507 a->cstartbs = oldmat->cstartbs; 3508 a->cendbs = oldmat->cendbs; 3509 3510 /* hash table stuff */ 3511 a->ht = 0; 3512 a->hd = 0; 3513 a->ht_size = 0; 3514 a->ht_flag = oldmat->ht_flag; 3515 a->ht_fact = oldmat->ht_fact; 3516 a->ht_total_ct = 0; 3517 a->ht_insert_ct = 0; 3518 3519 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3520 if (oldmat->colmap) { 3521 #if defined (PETSC_USE_CTABLE) 3522 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3523 #else 3524 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3525 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3526 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3527 #endif 3528 } else a->colmap = 0; 3529 3530 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3531 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3532 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3533 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3534 } else a->garray = 0; 3535 3536 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3537 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3538 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3539 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3540 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3541 3542 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3543 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3544 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3545 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3546 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3547 *newmat = mat; 3548 3549 PetscFunctionReturn(0); 3550 } 3551 3552 #undef __FUNCT__ 3553 #define __FUNCT__ "MatLoad_MPIBAIJ" 3554 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3555 { 3556 PetscErrorCode ierr; 3557 int fd; 3558 PetscInt i,nz,j,rstart,rend; 3559 PetscScalar *vals,*buf; 3560 MPI_Comm comm = ((PetscObject)viewer)->comm; 3561 MPI_Status status; 3562 PetscMPIInt rank,size,maxnz; 3563 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3564 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3565 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3566 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3567 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3568 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3569 3570 PetscFunctionBegin; 3571 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3572 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3573 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3574 3575 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3576 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3577 if (!rank) { 3578 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3579 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3580 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3581 } 3582 3583 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3584 3585 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3586 M = header[1]; N = header[2]; 3587 3588 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3589 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3590 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3591 3592 /* If global sizes are set, check if they are consistent with that given in the file */ 3593 if (sizesset) { 3594 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3595 } 3596 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3597 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3598 3599 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3600 3601 /* 3602 This code adds extra rows to make sure the number of rows is 3603 divisible by the blocksize 3604 */ 3605 Mbs = M/bs; 3606 extra_rows = bs - M + bs*Mbs; 3607 if (extra_rows == bs) extra_rows = 0; 3608 else Mbs++; 3609 if (extra_rows && !rank) { 3610 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3611 } 3612 3613 /* determine ownership of all rows */ 3614 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3615 mbs = Mbs/size + ((Mbs % size) > rank); 3616 m = mbs*bs; 3617 } else { /* User set */ 3618 m = newmat->rmap->n; 3619 mbs = m/bs; 3620 } 3621 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3622 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3623 3624 /* process 0 needs enough room for process with most rows */ 3625 if (!rank) { 3626 mmax = rowners[1]; 3627 for (i=2; i<size; i++) { 3628 mmax = PetscMax(mmax,rowners[i]); 3629 } 3630 mmax*=bs; 3631 } else mmax = m; 3632 3633 rowners[0] = 0; 3634 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3635 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3636 rstart = rowners[rank]; 3637 rend = rowners[rank+1]; 3638 3639 /* distribute row lengths to all processors */ 3640 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3641 if (!rank) { 3642 mend = m; 3643 if (size == 1) mend = mend - extra_rows; 3644 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3645 for (j=mend; j<m; j++) locrowlens[j] = 1; 3646 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3647 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3648 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3649 for (j=0; j<m; j++) { 3650 procsnz[0] += locrowlens[j]; 3651 } 3652 for (i=1; i<size; i++) { 3653 mend = browners[i+1] - browners[i]; 3654 if (i == size-1) mend = mend - extra_rows; 3655 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3656 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3657 /* calculate the number of nonzeros on each processor */ 3658 for (j=0; j<browners[i+1]-browners[i]; j++) { 3659 procsnz[i] += rowlengths[j]; 3660 } 3661 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3662 } 3663 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3664 } else { 3665 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3666 } 3667 3668 if (!rank) { 3669 /* determine max buffer needed and allocate it */ 3670 maxnz = procsnz[0]; 3671 for (i=1; i<size; i++) { 3672 maxnz = PetscMax(maxnz,procsnz[i]); 3673 } 3674 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3675 3676 /* read in my part of the matrix column indices */ 3677 nz = procsnz[0]; 3678 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3679 mycols = ibuf; 3680 if (size == 1) nz -= extra_rows; 3681 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3682 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3683 3684 /* read in every ones (except the last) and ship off */ 3685 for (i=1; i<size-1; i++) { 3686 nz = procsnz[i]; 3687 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3688 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3689 } 3690 /* read in the stuff for the last proc */ 3691 if (size != 1) { 3692 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3693 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3694 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3695 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3696 } 3697 ierr = PetscFree(cols);CHKERRQ(ierr); 3698 } else { 3699 /* determine buffer space needed for message */ 3700 nz = 0; 3701 for (i=0; i<m; i++) { 3702 nz += locrowlens[i]; 3703 } 3704 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3705 mycols = ibuf; 3706 /* receive message of column indices*/ 3707 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3708 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3709 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3710 } 3711 3712 /* loop over local rows, determining number of off diagonal entries */ 3713 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3714 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3715 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3716 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3717 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3718 rowcount = 0; nzcount = 0; 3719 for (i=0; i<mbs; i++) { 3720 dcount = 0; 3721 odcount = 0; 3722 for (j=0; j<bs; j++) { 3723 kmax = locrowlens[rowcount]; 3724 for (k=0; k<kmax; k++) { 3725 tmp = mycols[nzcount++]/bs; 3726 if (!mask[tmp]) { 3727 mask[tmp] = 1; 3728 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3729 else masked1[dcount++] = tmp; 3730 } 3731 } 3732 rowcount++; 3733 } 3734 3735 dlens[i] = dcount; 3736 odlens[i] = odcount; 3737 3738 /* zero out the mask elements we set */ 3739 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3740 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3741 } 3742 3743 3744 if (!sizesset) { 3745 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3746 } 3747 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3748 3749 if (!rank) { 3750 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3751 /* read in my part of the matrix numerical values */ 3752 nz = procsnz[0]; 3753 vals = buf; 3754 mycols = ibuf; 3755 if (size == 1) nz -= extra_rows; 3756 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3757 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3758 3759 /* insert into matrix */ 3760 jj = rstart*bs; 3761 for (i=0; i<m; i++) { 3762 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3763 mycols += locrowlens[i]; 3764 vals += locrowlens[i]; 3765 jj++; 3766 } 3767 /* read in other processors (except the last one) and ship out */ 3768 for (i=1; i<size-1; i++) { 3769 nz = procsnz[i]; 3770 vals = buf; 3771 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3772 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3773 } 3774 /* the last proc */ 3775 if (size != 1){ 3776 nz = procsnz[i] - extra_rows; 3777 vals = buf; 3778 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3779 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3780 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3781 } 3782 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3783 } else { 3784 /* receive numeric values */ 3785 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3786 3787 /* receive message of values*/ 3788 vals = buf; 3789 mycols = ibuf; 3790 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 3791 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3792 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3793 3794 /* insert into matrix */ 3795 jj = rstart*bs; 3796 for (i=0; i<m; i++) { 3797 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3798 mycols += locrowlens[i]; 3799 vals += locrowlens[i]; 3800 jj++; 3801 } 3802 } 3803 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3804 ierr = PetscFree(buf);CHKERRQ(ierr); 3805 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3806 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3807 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3808 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3809 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3810 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3811 3812 PetscFunctionReturn(0); 3813 } 3814 3815 #undef __FUNCT__ 3816 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3817 /*@ 3818 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3819 3820 Input Parameters: 3821 . mat - the matrix 3822 . fact - factor 3823 3824 Not Collective, each process can use a different factor 3825 3826 Level: advanced 3827 3828 Notes: 3829 This can also be set by the command line option: -mat_use_hash_table <fact> 3830 3831 .keywords: matrix, hashtable, factor, HT 3832 3833 .seealso: MatSetOption() 3834 @*/ 3835 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3836 { 3837 PetscErrorCode ierr; 3838 3839 PetscFunctionBegin; 3840 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3841 PetscFunctionReturn(0); 3842 } 3843 3844 EXTERN_C_BEGIN 3845 #undef __FUNCT__ 3846 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3847 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3848 { 3849 Mat_MPIBAIJ *baij; 3850 3851 PetscFunctionBegin; 3852 baij = (Mat_MPIBAIJ*)mat->data; 3853 baij->ht_fact = fact; 3854 PetscFunctionReturn(0); 3855 } 3856 EXTERN_C_END 3857 3858 #undef __FUNCT__ 3859 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3860 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3861 { 3862 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3863 PetscFunctionBegin; 3864 *Ad = a->A; 3865 *Ao = a->B; 3866 *colmap = a->garray; 3867 PetscFunctionReturn(0); 3868 } 3869 3870 /* 3871 Special version for direct calls from Fortran (to eliminate two function call overheads 3872 */ 3873 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3874 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3875 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3876 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3877 #endif 3878 3879 #undef __FUNCT__ 3880 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3881 /*@C 3882 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3883 3884 Collective on Mat 3885 3886 Input Parameters: 3887 + mat - the matrix 3888 . min - number of input rows 3889 . im - input rows 3890 . nin - number of input columns 3891 . in - input columns 3892 . v - numerical values input 3893 - addvin - INSERT_VALUES or ADD_VALUES 3894 3895 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3896 3897 Level: advanced 3898 3899 .seealso: MatSetValuesBlocked() 3900 @*/ 3901 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3902 { 3903 /* convert input arguments to C version */ 3904 Mat mat = *matin; 3905 PetscInt m = *min, n = *nin; 3906 InsertMode addv = *addvin; 3907 3908 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3909 const MatScalar *value; 3910 MatScalar *barray=baij->barray; 3911 PetscBool roworiented = baij->roworiented; 3912 PetscErrorCode ierr; 3913 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3914 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3915 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3916 3917 PetscFunctionBegin; 3918 /* tasks normally handled by MatSetValuesBlocked() */ 3919 if (mat->insertmode == NOT_SET_VALUES) { 3920 mat->insertmode = addv; 3921 } 3922 #if defined(PETSC_USE_DEBUG) 3923 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3924 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3925 #endif 3926 if (mat->assembled) { 3927 mat->was_assembled = PETSC_TRUE; 3928 mat->assembled = PETSC_FALSE; 3929 } 3930 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3931 3932 3933 if(!barray) { 3934 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3935 baij->barray = barray; 3936 } 3937 3938 if (roworiented) { 3939 stepval = (n-1)*bs; 3940 } else { 3941 stepval = (m-1)*bs; 3942 } 3943 for (i=0; i<m; i++) { 3944 if (im[i] < 0) continue; 3945 #if defined(PETSC_USE_DEBUG) 3946 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3947 #endif 3948 if (im[i] >= rstart && im[i] < rend) { 3949 row = im[i] - rstart; 3950 for (j=0; j<n; j++) { 3951 /* If NumCol = 1 then a copy is not required */ 3952 if ((roworiented) && (n == 1)) { 3953 barray = (MatScalar*)v + i*bs2; 3954 } else if((!roworiented) && (m == 1)) { 3955 barray = (MatScalar*)v + j*bs2; 3956 } else { /* Here a copy is required */ 3957 if (roworiented) { 3958 value = v + i*(stepval+bs)*bs + j*bs; 3959 } else { 3960 value = v + j*(stepval+bs)*bs + i*bs; 3961 } 3962 for (ii=0; ii<bs; ii++,value+=stepval) { 3963 for (jj=0; jj<bs; jj++) { 3964 *barray++ = *value++; 3965 } 3966 } 3967 barray -=bs2; 3968 } 3969 3970 if (in[j] >= cstart && in[j] < cend){ 3971 col = in[j] - cstart; 3972 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3973 } 3974 else if (in[j] < 0) continue; 3975 #if defined(PETSC_USE_DEBUG) 3976 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3977 #endif 3978 else { 3979 if (mat->was_assembled) { 3980 if (!baij->colmap) { 3981 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3982 } 3983 3984 #if defined(PETSC_USE_DEBUG) 3985 #if defined (PETSC_USE_CTABLE) 3986 { PetscInt data; 3987 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3988 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3989 } 3990 #else 3991 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3992 #endif 3993 #endif 3994 #if defined (PETSC_USE_CTABLE) 3995 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3996 col = (col - 1)/bs; 3997 #else 3998 col = (baij->colmap[in[j]] - 1)/bs; 3999 #endif 4000 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 4001 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 4002 col = in[j]; 4003 } 4004 } 4005 else col = in[j]; 4006 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4007 } 4008 } 4009 } else { 4010 if (!baij->donotstash) { 4011 if (roworiented) { 4012 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4013 } else { 4014 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4015 } 4016 } 4017 } 4018 } 4019 4020 /* task normally handled by MatSetValuesBlocked() */ 4021 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4022 PetscFunctionReturn(0); 4023 } 4024 4025 #undef __FUNCT__ 4026 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 4027 /*@ 4028 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 4029 CSR format the local rows. 4030 4031 Collective on MPI_Comm 4032 4033 Input Parameters: 4034 + comm - MPI communicator 4035 . bs - the block size, only a block size of 1 is supported 4036 . m - number of local rows (Cannot be PETSC_DECIDE) 4037 . n - This value should be the same as the local size used in creating the 4038 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4039 calculated if N is given) For square matrices n is almost always m. 4040 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4041 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4042 . i - row indices 4043 . j - column indices 4044 - a - matrix values 4045 4046 Output Parameter: 4047 . mat - the matrix 4048 4049 Level: intermediate 4050 4051 Notes: 4052 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4053 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4054 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4055 4056 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4057 4058 .keywords: matrix, aij, compressed row, sparse, parallel 4059 4060 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4061 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 4062 @*/ 4063 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4064 { 4065 PetscErrorCode ierr; 4066 4067 4068 PetscFunctionBegin; 4069 if (i[0]) { 4070 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4071 } 4072 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4073 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4074 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4075 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4076 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4077 PetscFunctionReturn(0); 4078 } 4079