1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 8 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 9 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 10 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 11 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 12 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 13 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 16 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 19 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 20 { 21 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 22 PetscErrorCode ierr; 23 PetscInt i,*idxb = 0; 24 PetscScalar *va,*vb; 25 Vec vtmp; 26 27 PetscFunctionBegin; 28 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 29 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 30 if (idx) { 31 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 32 } 33 34 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 35 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 36 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 37 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 38 39 for (i=0; i<A->rmap->n; i++){ 40 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 41 } 42 43 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 44 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 45 if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);} 46 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 47 PetscFunctionReturn(0); 48 } 49 50 EXTERN_C_BEGIN 51 #undef __FUNCT__ 52 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 53 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 54 { 55 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 56 PetscErrorCode ierr; 57 58 PetscFunctionBegin; 59 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 60 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 61 PetscFunctionReturn(0); 62 } 63 EXTERN_C_END 64 65 EXTERN_C_BEGIN 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 68 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 69 { 70 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 75 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 76 PetscFunctionReturn(0); 77 } 78 EXTERN_C_END 79 80 /* 81 Local utility routine that creates a mapping from the global column 82 number to the local number in the off-diagonal part of the local 83 storage of the matrix. This is done in a non scalable way since the 84 length of colmap equals the global matrix length. 85 */ 86 #undef __FUNCT__ 87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 89 { 90 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 91 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 92 PetscErrorCode ierr; 93 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 94 95 PetscFunctionBegin; 96 #if defined (PETSC_USE_CTABLE) 97 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 98 for (i=0; i<nbs; i++){ 99 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 100 } 101 #else 102 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 103 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 105 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 106 #endif 107 PetscFunctionReturn(0); 108 } 109 110 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 111 { \ 112 \ 113 brow = row/bs; \ 114 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 115 rmax = aimax[brow]; nrow = ailen[brow]; \ 116 bcol = col/bs; \ 117 ridx = row % bs; cidx = col % bs; \ 118 low = 0; high = nrow; \ 119 while (high-low > 3) { \ 120 t = (low+high)/2; \ 121 if (rp[t] > bcol) high = t; \ 122 else low = t; \ 123 } \ 124 for (_i=low; _i<high; _i++) { \ 125 if (rp[_i] > bcol) break; \ 126 if (rp[_i] == bcol) { \ 127 bap = ap + bs2*_i + bs*cidx + ridx; \ 128 if (addv == ADD_VALUES) *bap += value; \ 129 else *bap = value; \ 130 goto a_noinsert; \ 131 } \ 132 } \ 133 if (a->nonew == 1) goto a_noinsert; \ 134 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 135 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 136 N = nrow++ - 1; \ 137 /* shift up all the later entries in this row */ \ 138 for (ii=N; ii>=_i; ii--) { \ 139 rp[ii+1] = rp[ii]; \ 140 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 141 } \ 142 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 143 rp[_i] = bcol; \ 144 ap[bs2*_i + bs*cidx + ridx] = value; \ 145 a_noinsert:; \ 146 ailen[brow] = nrow; \ 147 } 148 149 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 150 { \ 151 brow = row/bs; \ 152 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 153 rmax = bimax[brow]; nrow = bilen[brow]; \ 154 bcol = col/bs; \ 155 ridx = row % bs; cidx = col % bs; \ 156 low = 0; high = nrow; \ 157 while (high-low > 3) { \ 158 t = (low+high)/2; \ 159 if (rp[t] > bcol) high = t; \ 160 else low = t; \ 161 } \ 162 for (_i=low; _i<high; _i++) { \ 163 if (rp[_i] > bcol) break; \ 164 if (rp[_i] == bcol) { \ 165 bap = ap + bs2*_i + bs*cidx + ridx; \ 166 if (addv == ADD_VALUES) *bap += value; \ 167 else *bap = value; \ 168 goto b_noinsert; \ 169 } \ 170 } \ 171 if (b->nonew == 1) goto b_noinsert; \ 172 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 173 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 174 CHKMEMQ;\ 175 N = nrow++ - 1; \ 176 /* shift up all the later entries in this row */ \ 177 for (ii=N; ii>=_i; ii--) { \ 178 rp[ii+1] = rp[ii]; \ 179 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 180 } \ 181 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 182 rp[_i] = bcol; \ 183 ap[bs2*_i + bs*cidx + ridx] = value; \ 184 b_noinsert:; \ 185 bilen[brow] = nrow; \ 186 } 187 188 #undef __FUNCT__ 189 #define __FUNCT__ "MatSetValues_MPIBAIJ" 190 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 191 { 192 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 193 MatScalar value; 194 PetscTruth roworiented = baij->roworiented; 195 PetscErrorCode ierr; 196 PetscInt i,j,row,col; 197 PetscInt rstart_orig=mat->rmap->rstart; 198 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 199 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 200 201 /* Some Variables required in the macro */ 202 Mat A = baij->A; 203 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 204 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 205 MatScalar *aa=a->a; 206 207 Mat B = baij->B; 208 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 209 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 210 MatScalar *ba=b->a; 211 212 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 213 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 214 MatScalar *ap,*bap; 215 216 PetscFunctionBegin; 217 if (v) PetscValidScalarPointer(v,6); 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_DEBUG) 221 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 222 #endif 223 if (im[i] >= rstart_orig && im[i] < rend_orig) { 224 row = im[i] - rstart_orig; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart_orig && in[j] < cend_orig){ 227 col = in[j] - cstart_orig; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 230 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 231 } else if (in[j] < 0) continue; 232 #if defined(PETSC_USE_DEBUG) 233 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1); 234 #endif 235 else { 236 if (mat->was_assembled) { 237 if (!baij->colmap) { 238 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 239 } 240 #if defined (PETSC_USE_CTABLE) 241 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 242 col = col - 1; 243 #else 244 col = baij->colmap[in[j]/bs] - 1; 245 #endif 246 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 247 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 248 col = in[j]; 249 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 250 B = baij->B; 251 b = (Mat_SeqBAIJ*)(B)->data; 252 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 253 ba=b->a; 254 } else col += in[j]%bs; 255 } else col = in[j]; 256 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 257 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 258 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 259 } 260 } 261 } else { 262 if (!baij->donotstash) { 263 if (roworiented) { 264 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 265 } else { 266 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 267 } 268 } 269 } 270 } 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 276 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 277 { 278 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 279 const PetscScalar *value; 280 MatScalar *barray=baij->barray; 281 PetscTruth roworiented = baij->roworiented; 282 PetscErrorCode ierr; 283 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 284 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 285 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 286 287 PetscFunctionBegin; 288 if(!barray) { 289 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 290 baij->barray = barray; 291 } 292 293 if (roworiented) { 294 stepval = (n-1)*bs; 295 } else { 296 stepval = (m-1)*bs; 297 } 298 for (i=0; i<m; i++) { 299 if (im[i] < 0) continue; 300 #if defined(PETSC_USE_DEBUG) 301 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 302 #endif 303 if (im[i] >= rstart && im[i] < rend) { 304 row = im[i] - rstart; 305 for (j=0; j<n; j++) { 306 /* If NumCol = 1 then a copy is not required */ 307 if ((roworiented) && (n == 1)) { 308 barray = (MatScalar*)v + i*bs2; 309 } else if((!roworiented) && (m == 1)) { 310 barray = (MatScalar*)v + j*bs2; 311 } else { /* Here a copy is required */ 312 if (roworiented) { 313 value = v + i*(stepval+bs)*bs + j*bs; 314 } else { 315 value = v + j*(stepval+bs)*bs + i*bs; 316 } 317 for (ii=0; ii<bs; ii++,value+=stepval) { 318 for (jj=0; jj<bs; jj++) { 319 *barray++ = *value++; 320 } 321 } 322 barray -=bs2; 323 } 324 325 if (in[j] >= cstart && in[j] < cend){ 326 col = in[j] - cstart; 327 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 328 } 329 else if (in[j] < 0) continue; 330 #if defined(PETSC_USE_DEBUG) 331 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 332 #endif 333 else { 334 if (mat->was_assembled) { 335 if (!baij->colmap) { 336 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 337 } 338 339 #if defined(PETSC_USE_DEBUG) 340 #if defined (PETSC_USE_CTABLE) 341 { PetscInt data; 342 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 343 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 344 } 345 #else 346 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 347 #endif 348 #endif 349 #if defined (PETSC_USE_CTABLE) 350 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 351 col = (col - 1)/bs; 352 #else 353 col = (baij->colmap[in[j]] - 1)/bs; 354 #endif 355 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 356 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 357 col = in[j]; 358 } 359 } 360 else col = in[j]; 361 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 362 } 363 } 364 } else { 365 if (!baij->donotstash) { 366 if (roworiented) { 367 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 368 } else { 369 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 370 } 371 } 372 } 373 } 374 PetscFunctionReturn(0); 375 } 376 377 #define HASH_KEY 0.6180339887 378 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 379 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 380 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 381 #undef __FUNCT__ 382 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 383 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 384 { 385 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 386 PetscTruth roworiented = baij->roworiented; 387 PetscErrorCode ierr; 388 PetscInt i,j,row,col; 389 PetscInt rstart_orig=mat->rmap->rstart; 390 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 391 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 392 PetscReal tmp; 393 MatScalar **HD = baij->hd,value; 394 #if defined(PETSC_USE_DEBUG) 395 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 396 #endif 397 398 PetscFunctionBegin; 399 if (v) PetscValidScalarPointer(v,6); 400 for (i=0; i<m; i++) { 401 #if defined(PETSC_USE_DEBUG) 402 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 403 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 404 #endif 405 row = im[i]; 406 if (row >= rstart_orig && row < rend_orig) { 407 for (j=0; j<n; j++) { 408 col = in[j]; 409 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 410 /* Look up PetscInto the Hash Table */ 411 key = (row/bs)*Nbs+(col/bs)+1; 412 h1 = HASH(size,key,tmp); 413 414 415 idx = h1; 416 #if defined(PETSC_USE_DEBUG) 417 insert_ct++; 418 total_ct++; 419 if (HT[idx] != key) { 420 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 421 if (idx == size) { 422 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 423 if (idx == h1) { 424 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 425 } 426 } 427 } 428 #else 429 if (HT[idx] != key) { 430 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 431 if (idx == size) { 432 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 433 if (idx == h1) { 434 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 435 } 436 } 437 } 438 #endif 439 /* A HASH table entry is found, so insert the values at the correct address */ 440 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 441 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 442 } 443 } else { 444 if (!baij->donotstash) { 445 if (roworiented) { 446 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 447 } else { 448 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 449 } 450 } 451 } 452 } 453 #if defined(PETSC_USE_DEBUG) 454 baij->ht_total_ct = total_ct; 455 baij->ht_insert_ct = insert_ct; 456 #endif 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 463 { 464 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 465 PetscTruth roworiented = baij->roworiented; 466 PetscErrorCode ierr; 467 PetscInt i,j,ii,jj,row,col; 468 PetscInt rstart=baij->rstartbs; 469 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 470 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 471 PetscReal tmp; 472 MatScalar **HD = baij->hd,*baij_a; 473 const PetscScalar *v_t,*value; 474 #if defined(PETSC_USE_DEBUG) 475 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 476 #endif 477 478 PetscFunctionBegin; 479 480 if (roworiented) { 481 stepval = (n-1)*bs; 482 } else { 483 stepval = (m-1)*bs; 484 } 485 for (i=0; i<m; i++) { 486 #if defined(PETSC_USE_DEBUG) 487 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 488 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 489 #endif 490 row = im[i]; 491 v_t = v + i*nbs2; 492 if (row >= rstart && row < rend) { 493 for (j=0; j<n; j++) { 494 col = in[j]; 495 496 /* Look up into the Hash Table */ 497 key = row*Nbs+col+1; 498 h1 = HASH(size,key,tmp); 499 500 idx = h1; 501 #if defined(PETSC_USE_DEBUG) 502 total_ct++; 503 insert_ct++; 504 if (HT[idx] != key) { 505 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 506 if (idx == size) { 507 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 508 if (idx == h1) { 509 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 510 } 511 } 512 } 513 #else 514 if (HT[idx] != key) { 515 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 516 if (idx == size) { 517 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 518 if (idx == h1) { 519 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 520 } 521 } 522 } 523 #endif 524 baij_a = HD[idx]; 525 if (roworiented) { 526 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 527 /* value = v + (i*(stepval+bs)+j)*bs; */ 528 value = v_t; 529 v_t += bs; 530 if (addv == ADD_VALUES) { 531 for (ii=0; ii<bs; ii++,value+=stepval) { 532 for (jj=ii; jj<bs2; jj+=bs) { 533 baij_a[jj] += *value++; 534 } 535 } 536 } else { 537 for (ii=0; ii<bs; ii++,value+=stepval) { 538 for (jj=ii; jj<bs2; jj+=bs) { 539 baij_a[jj] = *value++; 540 } 541 } 542 } 543 } else { 544 value = v + j*(stepval+bs)*bs + i*bs; 545 if (addv == ADD_VALUES) { 546 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 547 for (jj=0; jj<bs; jj++) { 548 baij_a[jj] += *value++; 549 } 550 } 551 } else { 552 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 553 for (jj=0; jj<bs; jj++) { 554 baij_a[jj] = *value++; 555 } 556 } 557 } 558 } 559 } 560 } else { 561 if (!baij->donotstash) { 562 if (roworiented) { 563 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 564 } else { 565 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 566 } 567 } 568 } 569 } 570 #if defined(PETSC_USE_DEBUG) 571 baij->ht_total_ct = total_ct; 572 baij->ht_insert_ct = insert_ct; 573 #endif 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatGetValues_MPIBAIJ" 579 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 580 { 581 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 582 PetscErrorCode ierr; 583 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 584 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 585 586 PetscFunctionBegin; 587 for (i=0; i<m; i++) { 588 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 589 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 590 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 591 row = idxm[i] - bsrstart; 592 for (j=0; j<n; j++) { 593 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 594 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 595 if (idxn[j] >= bscstart && idxn[j] < bscend){ 596 col = idxn[j] - bscstart; 597 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 598 } else { 599 if (!baij->colmap) { 600 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 601 } 602 #if defined (PETSC_USE_CTABLE) 603 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 604 data --; 605 #else 606 data = baij->colmap[idxn[j]/bs]-1; 607 #endif 608 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 609 else { 610 col = data + idxn[j]%bs; 611 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 612 } 613 } 614 } 615 } else { 616 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 617 } 618 } 619 PetscFunctionReturn(0); 620 } 621 622 #undef __FUNCT__ 623 #define __FUNCT__ "MatNorm_MPIBAIJ" 624 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 625 { 626 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 627 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 628 PetscErrorCode ierr; 629 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 630 PetscReal sum = 0.0; 631 MatScalar *v; 632 633 PetscFunctionBegin; 634 if (baij->size == 1) { 635 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 636 } else { 637 if (type == NORM_FROBENIUS) { 638 v = amat->a; 639 nz = amat->nz*bs2; 640 for (i=0; i<nz; i++) { 641 #if defined(PETSC_USE_COMPLEX) 642 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 643 #else 644 sum += (*v)*(*v); v++; 645 #endif 646 } 647 v = bmat->a; 648 nz = bmat->nz*bs2; 649 for (i=0; i<nz; i++) { 650 #if defined(PETSC_USE_COMPLEX) 651 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 652 #else 653 sum += (*v)*(*v); v++; 654 #endif 655 } 656 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 657 *nrm = sqrt(*nrm); 658 } else if (type == NORM_1) { /* max column sum */ 659 PetscReal *tmp,*tmp2; 660 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 661 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 662 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 663 v = amat->a; jj = amat->j; 664 for (i=0; i<amat->nz; i++) { 665 for (j=0; j<bs; j++){ 666 col = bs*(cstart + *jj) + j; /* column index */ 667 for (row=0; row<bs; row++){ 668 tmp[col] += PetscAbsScalar(*v); v++; 669 } 670 } 671 jj++; 672 } 673 v = bmat->a; jj = bmat->j; 674 for (i=0; i<bmat->nz; i++) { 675 for (j=0; j<bs; j++){ 676 col = bs*garray[*jj] + j; 677 for (row=0; row<bs; row++){ 678 tmp[col] += PetscAbsScalar(*v); v++; 679 } 680 } 681 jj++; 682 } 683 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 684 *nrm = 0.0; 685 for (j=0; j<mat->cmap->N; j++) { 686 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 687 } 688 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 689 } else if (type == NORM_INFINITY) { /* max row sum */ 690 PetscReal *sums; 691 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 692 sum = 0.0; 693 for (j=0; j<amat->mbs; j++) { 694 for (row=0; row<bs; row++) sums[row] = 0.0; 695 v = amat->a + bs2*amat->i[j]; 696 nz = amat->i[j+1]-amat->i[j]; 697 for (i=0; i<nz; i++) { 698 for (col=0; col<bs; col++){ 699 for (row=0; row<bs; row++){ 700 sums[row] += PetscAbsScalar(*v); v++; 701 } 702 } 703 } 704 v = bmat->a + bs2*bmat->i[j]; 705 nz = bmat->i[j+1]-bmat->i[j]; 706 for (i=0; i<nz; i++) { 707 for (col=0; col<bs; col++){ 708 for (row=0; row<bs; row++){ 709 sums[row] += PetscAbsScalar(*v); v++; 710 } 711 } 712 } 713 for (row=0; row<bs; row++){ 714 if (sums[row] > sum) sum = sums[row]; 715 } 716 } 717 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 718 ierr = PetscFree(sums);CHKERRQ(ierr); 719 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 720 } 721 PetscFunctionReturn(0); 722 } 723 724 /* 725 Creates the hash table, and sets the table 726 This table is created only once. 727 If new entried need to be added to the matrix 728 then the hash table has to be destroyed and 729 recreated. 730 */ 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 733 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 734 { 735 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 736 Mat A = baij->A,B=baij->B; 737 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 738 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 739 PetscErrorCode ierr; 740 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 741 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 742 PetscInt *HT,key; 743 MatScalar **HD; 744 PetscReal tmp; 745 #if defined(PETSC_USE_INFO) 746 PetscInt ct=0,max=0; 747 #endif 748 749 PetscFunctionBegin; 750 if (baij->ht) PetscFunctionReturn(0); 751 752 baij->ht_size = (PetscInt)(factor*nz); 753 ht_size = baij->ht_size; 754 755 /* Allocate Memory for Hash Table */ 756 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 757 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 758 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 759 HD = baij->hd; 760 HT = baij->ht; 761 762 /* Loop Over A */ 763 for (i=0; i<a->mbs; i++) { 764 for (j=ai[i]; j<ai[i+1]; j++) { 765 row = i+rstart; 766 col = aj[j]+cstart; 767 768 key = row*Nbs + col + 1; 769 h1 = HASH(ht_size,key,tmp); 770 for (k=0; k<ht_size; k++){ 771 if (!HT[(h1+k)%ht_size]) { 772 HT[(h1+k)%ht_size] = key; 773 HD[(h1+k)%ht_size] = a->a + j*bs2; 774 break; 775 #if defined(PETSC_USE_INFO) 776 } else { 777 ct++; 778 #endif 779 } 780 } 781 #if defined(PETSC_USE_INFO) 782 if (k> max) max = k; 783 #endif 784 } 785 } 786 /* Loop Over B */ 787 for (i=0; i<b->mbs; i++) { 788 for (j=bi[i]; j<bi[i+1]; j++) { 789 row = i+rstart; 790 col = garray[bj[j]]; 791 key = row*Nbs + col + 1; 792 h1 = HASH(ht_size,key,tmp); 793 for (k=0; k<ht_size; k++){ 794 if (!HT[(h1+k)%ht_size]) { 795 HT[(h1+k)%ht_size] = key; 796 HD[(h1+k)%ht_size] = b->a + j*bs2; 797 break; 798 #if defined(PETSC_USE_INFO) 799 } else { 800 ct++; 801 #endif 802 } 803 } 804 #if defined(PETSC_USE_INFO) 805 if (k> max) max = k; 806 #endif 807 } 808 } 809 810 /* Print Summary */ 811 #if defined(PETSC_USE_INFO) 812 for (i=0,j=0; i<ht_size; i++) { 813 if (HT[i]) {j++;} 814 } 815 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 816 #endif 817 PetscFunctionReturn(0); 818 } 819 820 #undef __FUNCT__ 821 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 822 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 823 { 824 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 825 PetscErrorCode ierr; 826 PetscInt nstash,reallocs; 827 InsertMode addv; 828 829 PetscFunctionBegin; 830 if (baij->donotstash) { 831 PetscFunctionReturn(0); 832 } 833 834 /* make sure all processors are either in INSERTMODE or ADDMODE */ 835 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 836 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 837 mat->insertmode = addv; /* in case this processor had no cache */ 838 839 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 840 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 841 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 842 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 843 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 844 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 845 PetscFunctionReturn(0); 846 } 847 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 850 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 851 { 852 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 853 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 854 PetscErrorCode ierr; 855 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 856 PetscInt *row,*col; 857 PetscTruth r1,r2,r3,other_disassembled; 858 MatScalar *val; 859 InsertMode addv = mat->insertmode; 860 PetscMPIInt n; 861 862 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 863 PetscFunctionBegin; 864 if (!baij->donotstash) { 865 while (1) { 866 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 867 if (!flg) break; 868 869 for (i=0; i<n;) { 870 /* Now identify the consecutive vals belonging to the same row */ 871 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 872 if (j < n) ncols = j-i; 873 else ncols = n-i; 874 /* Now assemble all these values with a single function call */ 875 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 876 i = j; 877 } 878 } 879 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 880 /* Now process the block-stash. Since the values are stashed column-oriented, 881 set the roworiented flag to column oriented, and after MatSetValues() 882 restore the original flags */ 883 r1 = baij->roworiented; 884 r2 = a->roworiented; 885 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 886 baij->roworiented = PETSC_FALSE; 887 a->roworiented = PETSC_FALSE; 888 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 889 while (1) { 890 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 891 if (!flg) break; 892 893 for (i=0; i<n;) { 894 /* Now identify the consecutive vals belonging to the same row */ 895 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 896 if (j < n) ncols = j-i; 897 else ncols = n-i; 898 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 899 i = j; 900 } 901 } 902 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 903 baij->roworiented = r1; 904 a->roworiented = r2; 905 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 906 } 907 908 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 909 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 910 911 /* determine if any processor has disassembled, if so we must 912 also disassemble ourselfs, in order that we may reassemble. */ 913 /* 914 if nonzero structure of submatrix B cannot change then we know that 915 no processor disassembled thus we can skip this stuff 916 */ 917 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 918 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 919 if (mat->was_assembled && !other_disassembled) { 920 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 921 } 922 } 923 924 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 925 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 926 } 927 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 928 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 929 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 930 931 #if defined(PETSC_USE_INFO) 932 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 933 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 934 baij->ht_total_ct = 0; 935 baij->ht_insert_ct = 0; 936 } 937 #endif 938 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 939 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 940 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 941 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 942 } 943 944 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 945 baij->rowvalues = 0; 946 PetscFunctionReturn(0); 947 } 948 949 #undef __FUNCT__ 950 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 951 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 952 { 953 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 954 PetscErrorCode ierr; 955 PetscMPIInt size = baij->size,rank = baij->rank; 956 PetscInt bs = mat->rmap->bs; 957 PetscTruth iascii,isdraw; 958 PetscViewer sviewer; 959 PetscViewerFormat format; 960 961 PetscFunctionBegin; 962 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 963 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 964 if (iascii) { 965 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 966 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 967 MatInfo info; 968 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 969 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 970 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 971 rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 972 mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 973 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 974 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 975 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 976 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 977 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 978 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 979 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 980 PetscFunctionReturn(0); 981 } else if (format == PETSC_VIEWER_ASCII_INFO) { 982 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 983 PetscFunctionReturn(0); 984 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 985 PetscFunctionReturn(0); 986 } 987 } 988 989 if (isdraw) { 990 PetscDraw draw; 991 PetscTruth isnull; 992 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 993 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 994 } 995 996 if (size == 1) { 997 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 998 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 999 } else { 1000 /* assemble the entire matrix onto first processor. */ 1001 Mat A; 1002 Mat_SeqBAIJ *Aloc; 1003 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1004 MatScalar *a; 1005 1006 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1007 /* Perhaps this should be the type of mat? */ 1008 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1009 if (!rank) { 1010 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1011 } else { 1012 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1013 } 1014 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1015 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1016 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1017 1018 /* copy over the A part */ 1019 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1020 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1021 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1022 1023 for (i=0; i<mbs; i++) { 1024 rvals[0] = bs*(baij->rstartbs + i); 1025 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1026 for (j=ai[i]; j<ai[i+1]; j++) { 1027 col = (baij->cstartbs+aj[j])*bs; 1028 for (k=0; k<bs; k++) { 1029 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1030 col++; a += bs; 1031 } 1032 } 1033 } 1034 /* copy over the B part */ 1035 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1036 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1037 for (i=0; i<mbs; i++) { 1038 rvals[0] = bs*(baij->rstartbs + i); 1039 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1040 for (j=ai[i]; j<ai[i+1]; j++) { 1041 col = baij->garray[aj[j]]*bs; 1042 for (k=0; k<bs; k++) { 1043 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1044 col++; a += bs; 1045 } 1046 } 1047 } 1048 ierr = PetscFree(rvals);CHKERRQ(ierr); 1049 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1050 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1051 /* 1052 Everyone has to call to draw the matrix since the graphics waits are 1053 synchronized across all processors that share the PetscDraw object 1054 */ 1055 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1056 if (!rank) { 1057 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1058 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1059 } 1060 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1061 ierr = MatDestroy(A);CHKERRQ(ierr); 1062 } 1063 PetscFunctionReturn(0); 1064 } 1065 1066 #undef __FUNCT__ 1067 #define __FUNCT__ "MatView_MPIBAIJ" 1068 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1069 { 1070 PetscErrorCode ierr; 1071 PetscTruth iascii,isdraw,issocket,isbinary; 1072 1073 PetscFunctionBegin; 1074 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1075 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1076 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1077 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1078 if (iascii || isdraw || issocket || isbinary) { 1079 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1080 } else { 1081 SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1082 } 1083 PetscFunctionReturn(0); 1084 } 1085 1086 #undef __FUNCT__ 1087 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1088 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1089 { 1090 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1091 PetscErrorCode ierr; 1092 1093 PetscFunctionBegin; 1094 #if defined(PETSC_USE_LOG) 1095 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1096 #endif 1097 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1098 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1099 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1100 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1101 #if defined (PETSC_USE_CTABLE) 1102 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 1103 #else 1104 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1105 #endif 1106 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1107 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1108 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1109 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1110 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1111 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1112 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1113 ierr = PetscFree(baij);CHKERRQ(ierr); 1114 1115 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1116 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1117 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1118 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1119 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1120 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1121 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1122 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1123 PetscFunctionReturn(0); 1124 } 1125 1126 #undef __FUNCT__ 1127 #define __FUNCT__ "MatMult_MPIBAIJ" 1128 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1129 { 1130 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1131 PetscErrorCode ierr; 1132 PetscInt nt; 1133 1134 PetscFunctionBegin; 1135 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1136 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1137 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1138 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1139 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1140 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1141 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1142 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1148 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1149 { 1150 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1151 PetscErrorCode ierr; 1152 1153 PetscFunctionBegin; 1154 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1155 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1156 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1157 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1158 PetscFunctionReturn(0); 1159 } 1160 1161 #undef __FUNCT__ 1162 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1163 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1164 { 1165 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1166 PetscErrorCode ierr; 1167 PetscTruth merged; 1168 1169 PetscFunctionBegin; 1170 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1171 /* do nondiagonal part */ 1172 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1173 if (!merged) { 1174 /* send it on its way */ 1175 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1176 /* do local part */ 1177 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1178 /* receive remote parts: note this assumes the values are not actually */ 1179 /* inserted in yy until the next line */ 1180 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1181 } else { 1182 /* do local part */ 1183 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1184 /* send it on its way */ 1185 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1186 /* values actually were received in the Begin() but we need to call this nop */ 1187 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1188 } 1189 PetscFunctionReturn(0); 1190 } 1191 1192 #undef __FUNCT__ 1193 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1194 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1195 { 1196 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1197 PetscErrorCode ierr; 1198 1199 PetscFunctionBegin; 1200 /* do nondiagonal part */ 1201 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1202 /* send it on its way */ 1203 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1204 /* do local part */ 1205 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1206 /* receive remote parts: note this assumes the values are not actually */ 1207 /* inserted in yy until the next line, which is true for my implementation*/ 1208 /* but is not perhaps always true. */ 1209 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1210 PetscFunctionReturn(0); 1211 } 1212 1213 /* 1214 This only works correctly for square matrices where the subblock A->A is the 1215 diagonal block 1216 */ 1217 #undef __FUNCT__ 1218 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1219 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1220 { 1221 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1222 PetscErrorCode ierr; 1223 1224 PetscFunctionBegin; 1225 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1226 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1227 PetscFunctionReturn(0); 1228 } 1229 1230 #undef __FUNCT__ 1231 #define __FUNCT__ "MatScale_MPIBAIJ" 1232 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1233 { 1234 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1235 PetscErrorCode ierr; 1236 1237 PetscFunctionBegin; 1238 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1239 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1240 PetscFunctionReturn(0); 1241 } 1242 1243 #undef __FUNCT__ 1244 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1245 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1246 { 1247 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1248 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1249 PetscErrorCode ierr; 1250 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1251 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1252 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1253 1254 PetscFunctionBegin; 1255 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1256 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1257 mat->getrowactive = PETSC_TRUE; 1258 1259 if (!mat->rowvalues && (idx || v)) { 1260 /* 1261 allocate enough space to hold information from the longest row. 1262 */ 1263 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1264 PetscInt max = 1,mbs = mat->mbs,tmp; 1265 for (i=0; i<mbs; i++) { 1266 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1267 if (max < tmp) { max = tmp; } 1268 } 1269 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1270 } 1271 lrow = row - brstart; 1272 1273 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1274 if (!v) {pvA = 0; pvB = 0;} 1275 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1276 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1277 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1278 nztot = nzA + nzB; 1279 1280 cmap = mat->garray; 1281 if (v || idx) { 1282 if (nztot) { 1283 /* Sort by increasing column numbers, assuming A and B already sorted */ 1284 PetscInt imark = -1; 1285 if (v) { 1286 *v = v_p = mat->rowvalues; 1287 for (i=0; i<nzB; i++) { 1288 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1289 else break; 1290 } 1291 imark = i; 1292 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1293 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1294 } 1295 if (idx) { 1296 *idx = idx_p = mat->rowindices; 1297 if (imark > -1) { 1298 for (i=0; i<imark; i++) { 1299 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1300 } 1301 } else { 1302 for (i=0; i<nzB; i++) { 1303 if (cmap[cworkB[i]/bs] < cstart) 1304 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1305 else break; 1306 } 1307 imark = i; 1308 } 1309 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1310 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1311 } 1312 } else { 1313 if (idx) *idx = 0; 1314 if (v) *v = 0; 1315 } 1316 } 1317 *nz = nztot; 1318 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1319 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1320 PetscFunctionReturn(0); 1321 } 1322 1323 #undef __FUNCT__ 1324 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1325 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1326 { 1327 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1328 1329 PetscFunctionBegin; 1330 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1331 baij->getrowactive = PETSC_FALSE; 1332 PetscFunctionReturn(0); 1333 } 1334 1335 #undef __FUNCT__ 1336 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1337 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1338 { 1339 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1340 PetscErrorCode ierr; 1341 1342 PetscFunctionBegin; 1343 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1344 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1345 PetscFunctionReturn(0); 1346 } 1347 1348 #undef __FUNCT__ 1349 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1350 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1351 { 1352 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1353 Mat A = a->A,B = a->B; 1354 PetscErrorCode ierr; 1355 PetscReal isend[5],irecv[5]; 1356 1357 PetscFunctionBegin; 1358 info->block_size = (PetscReal)matin->rmap->bs; 1359 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1360 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1361 isend[3] = info->memory; isend[4] = info->mallocs; 1362 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1363 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1364 isend[3] += info->memory; isend[4] += info->mallocs; 1365 if (flag == MAT_LOCAL) { 1366 info->nz_used = isend[0]; 1367 info->nz_allocated = isend[1]; 1368 info->nz_unneeded = isend[2]; 1369 info->memory = isend[3]; 1370 info->mallocs = isend[4]; 1371 } else if (flag == MAT_GLOBAL_MAX) { 1372 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1373 info->nz_used = irecv[0]; 1374 info->nz_allocated = irecv[1]; 1375 info->nz_unneeded = irecv[2]; 1376 info->memory = irecv[3]; 1377 info->mallocs = irecv[4]; 1378 } else if (flag == MAT_GLOBAL_SUM) { 1379 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1380 info->nz_used = irecv[0]; 1381 info->nz_allocated = irecv[1]; 1382 info->nz_unneeded = irecv[2]; 1383 info->memory = irecv[3]; 1384 info->mallocs = irecv[4]; 1385 } else { 1386 SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1387 } 1388 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1389 info->fill_ratio_needed = 0; 1390 info->factor_mallocs = 0; 1391 PetscFunctionReturn(0); 1392 } 1393 1394 #undef __FUNCT__ 1395 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1396 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg) 1397 { 1398 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1399 PetscErrorCode ierr; 1400 1401 PetscFunctionBegin; 1402 switch (op) { 1403 case MAT_NEW_NONZERO_LOCATIONS: 1404 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1405 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1406 case MAT_KEEP_NONZERO_PATTERN: 1407 case MAT_NEW_NONZERO_LOCATION_ERR: 1408 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1409 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1410 break; 1411 case MAT_ROW_ORIENTED: 1412 a->roworiented = flg; 1413 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1414 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1415 break; 1416 case MAT_NEW_DIAGONALS: 1417 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1418 break; 1419 case MAT_IGNORE_OFF_PROC_ENTRIES: 1420 a->donotstash = flg; 1421 break; 1422 case MAT_USE_HASH_TABLE: 1423 a->ht_flag = flg; 1424 break; 1425 case MAT_SYMMETRIC: 1426 case MAT_STRUCTURALLY_SYMMETRIC: 1427 case MAT_HERMITIAN: 1428 case MAT_SYMMETRY_ETERNAL: 1429 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1430 break; 1431 default: 1432 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op); 1433 } 1434 PetscFunctionReturn(0); 1435 } 1436 1437 #undef __FUNCT__ 1438 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1439 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1440 { 1441 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1442 Mat_SeqBAIJ *Aloc; 1443 Mat B; 1444 PetscErrorCode ierr; 1445 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1446 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1447 MatScalar *a; 1448 1449 PetscFunctionBegin; 1450 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1451 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1452 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1453 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1454 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1455 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1456 } else { 1457 B = *matout; 1458 } 1459 1460 /* copy over the A part */ 1461 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1462 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1463 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1464 1465 for (i=0; i<mbs; i++) { 1466 rvals[0] = bs*(baij->rstartbs + i); 1467 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1468 for (j=ai[i]; j<ai[i+1]; j++) { 1469 col = (baij->cstartbs+aj[j])*bs; 1470 for (k=0; k<bs; k++) { 1471 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1472 col++; a += bs; 1473 } 1474 } 1475 } 1476 /* copy over the B part */ 1477 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1478 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1479 for (i=0; i<mbs; i++) { 1480 rvals[0] = bs*(baij->rstartbs + i); 1481 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1482 for (j=ai[i]; j<ai[i+1]; j++) { 1483 col = baij->garray[aj[j]]*bs; 1484 for (k=0; k<bs; k++) { 1485 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1486 col++; a += bs; 1487 } 1488 } 1489 } 1490 ierr = PetscFree(rvals);CHKERRQ(ierr); 1491 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1492 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1493 1494 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1495 *matout = B; 1496 } else { 1497 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1498 } 1499 PetscFunctionReturn(0); 1500 } 1501 1502 #undef __FUNCT__ 1503 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1504 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1505 { 1506 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1507 Mat a = baij->A,b = baij->B; 1508 PetscErrorCode ierr; 1509 PetscInt s1,s2,s3; 1510 1511 PetscFunctionBegin; 1512 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1513 if (rr) { 1514 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1515 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1516 /* Overlap communication with computation. */ 1517 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1518 } 1519 if (ll) { 1520 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1521 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1522 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1523 } 1524 /* scale the diagonal block */ 1525 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1526 1527 if (rr) { 1528 /* Do a scatter end and then right scale the off-diagonal block */ 1529 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1530 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1531 } 1532 1533 PetscFunctionReturn(0); 1534 } 1535 1536 #undef __FUNCT__ 1537 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1538 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 1539 { 1540 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1541 PetscErrorCode ierr; 1542 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1543 PetscInt i,*owners = A->rmap->range; 1544 PetscInt *nprocs,j,idx,nsends,row; 1545 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1546 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1547 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1548 MPI_Comm comm = ((PetscObject)A)->comm; 1549 MPI_Request *send_waits,*recv_waits; 1550 MPI_Status recv_status,*send_status; 1551 #if defined(PETSC_DEBUG) 1552 PetscTruth found = PETSC_FALSE; 1553 #endif 1554 1555 PetscFunctionBegin; 1556 /* first count number of contributors to each processor */ 1557 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1558 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1559 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1560 j = 0; 1561 for (i=0; i<N; i++) { 1562 if (lastidx > (idx = rows[i])) j = 0; 1563 lastidx = idx; 1564 for (; j<size; j++) { 1565 if (idx >= owners[j] && idx < owners[j+1]) { 1566 nprocs[2*j]++; 1567 nprocs[2*j+1] = 1; 1568 owner[i] = j; 1569 #if defined(PETSC_DEBUG) 1570 found = PETSC_TRUE; 1571 #endif 1572 break; 1573 } 1574 } 1575 #if defined(PETSC_DEBUG) 1576 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1577 found = PETSC_FALSE; 1578 #endif 1579 } 1580 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1581 1582 /* inform other processors of number of messages and max length*/ 1583 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1584 1585 /* post receives: */ 1586 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1587 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1588 for (i=0; i<nrecvs; i++) { 1589 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1590 } 1591 1592 /* do sends: 1593 1) starts[i] gives the starting index in svalues for stuff going to 1594 the ith processor 1595 */ 1596 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1597 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1598 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1599 starts[0] = 0; 1600 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1601 for (i=0; i<N; i++) { 1602 svalues[starts[owner[i]]++] = rows[i]; 1603 } 1604 1605 starts[0] = 0; 1606 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1607 count = 0; 1608 for (i=0; i<size; i++) { 1609 if (nprocs[2*i+1]) { 1610 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1611 } 1612 } 1613 ierr = PetscFree(starts);CHKERRQ(ierr); 1614 1615 base = owners[rank]; 1616 1617 /* wait on receives */ 1618 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1619 count = nrecvs; 1620 slen = 0; 1621 while (count) { 1622 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1623 /* unpack receives into our local space */ 1624 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1625 source[imdex] = recv_status.MPI_SOURCE; 1626 lens[imdex] = n; 1627 slen += n; 1628 count--; 1629 } 1630 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1631 1632 /* move the data into the send scatter */ 1633 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1634 count = 0; 1635 for (i=0; i<nrecvs; i++) { 1636 values = rvalues + i*nmax; 1637 for (j=0; j<lens[i]; j++) { 1638 lrows[count++] = values[j] - base; 1639 } 1640 } 1641 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1642 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1643 ierr = PetscFree(owner);CHKERRQ(ierr); 1644 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1645 1646 /* actually zap the local rows */ 1647 /* 1648 Zero the required rows. If the "diagonal block" of the matrix 1649 is square and the user wishes to set the diagonal we use separate 1650 code so that MatSetValues() is not called for each diagonal allocating 1651 new memory, thus calling lots of mallocs and slowing things down. 1652 1653 */ 1654 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1655 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1656 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1657 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1658 } else if (diag != 0.0) { 1659 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1660 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1661 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1662 for (i=0; i<slen; i++) { 1663 row = lrows[i] + rstart_bs; 1664 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1665 } 1666 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1667 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1668 } else { 1669 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1670 } 1671 1672 ierr = PetscFree(lrows);CHKERRQ(ierr); 1673 1674 /* wait on sends */ 1675 if (nsends) { 1676 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1677 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1678 ierr = PetscFree(send_status);CHKERRQ(ierr); 1679 } 1680 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1681 ierr = PetscFree(svalues);CHKERRQ(ierr); 1682 1683 PetscFunctionReturn(0); 1684 } 1685 1686 #undef __FUNCT__ 1687 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1688 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1689 { 1690 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1691 PetscErrorCode ierr; 1692 1693 PetscFunctionBegin; 1694 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1695 PetscFunctionReturn(0); 1696 } 1697 1698 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1699 1700 #undef __FUNCT__ 1701 #define __FUNCT__ "MatEqual_MPIBAIJ" 1702 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1703 { 1704 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1705 Mat a,b,c,d; 1706 PetscTruth flg; 1707 PetscErrorCode ierr; 1708 1709 PetscFunctionBegin; 1710 a = matA->A; b = matA->B; 1711 c = matB->A; d = matB->B; 1712 1713 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1714 if (flg) { 1715 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1716 } 1717 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1718 PetscFunctionReturn(0); 1719 } 1720 1721 #undef __FUNCT__ 1722 #define __FUNCT__ "MatCopy_MPIBAIJ" 1723 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1724 { 1725 PetscErrorCode ierr; 1726 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1727 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1728 1729 PetscFunctionBegin; 1730 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1731 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1732 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1733 } else { 1734 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1735 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1736 } 1737 PetscFunctionReturn(0); 1738 } 1739 1740 #undef __FUNCT__ 1741 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1742 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1743 { 1744 PetscErrorCode ierr; 1745 1746 PetscFunctionBegin; 1747 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1748 PetscFunctionReturn(0); 1749 } 1750 1751 #undef __FUNCT__ 1752 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1753 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1754 { 1755 PetscErrorCode ierr; 1756 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1757 PetscBLASInt bnz,one=1; 1758 Mat_SeqBAIJ *x,*y; 1759 1760 PetscFunctionBegin; 1761 if (str == SAME_NONZERO_PATTERN) { 1762 PetscScalar alpha = a; 1763 x = (Mat_SeqBAIJ *)xx->A->data; 1764 y = (Mat_SeqBAIJ *)yy->A->data; 1765 bnz = PetscBLASIntCast(x->nz); 1766 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1767 x = (Mat_SeqBAIJ *)xx->B->data; 1768 y = (Mat_SeqBAIJ *)yy->B->data; 1769 bnz = PetscBLASIntCast(x->nz); 1770 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1771 } else { 1772 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1773 } 1774 PetscFunctionReturn(0); 1775 } 1776 1777 #undef __FUNCT__ 1778 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ" 1779 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs) 1780 { 1781 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1782 PetscInt rbs,cbs; 1783 PetscErrorCode ierr; 1784 1785 PetscFunctionBegin; 1786 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1787 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1788 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 1789 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 1790 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 1791 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 1792 PetscFunctionReturn(0); 1793 } 1794 1795 #undef __FUNCT__ 1796 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1797 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1798 { 1799 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1800 PetscErrorCode ierr; 1801 1802 PetscFunctionBegin; 1803 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1804 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1805 PetscFunctionReturn(0); 1806 } 1807 1808 #undef __FUNCT__ 1809 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1810 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1811 { 1812 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1813 PetscErrorCode ierr; 1814 1815 PetscFunctionBegin; 1816 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1817 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1818 PetscFunctionReturn(0); 1819 } 1820 1821 #undef __FUNCT__ 1822 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 1823 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1824 { 1825 PetscErrorCode ierr; 1826 IS iscol_local; 1827 PetscInt csize; 1828 1829 PetscFunctionBegin; 1830 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1831 if (call == MAT_REUSE_MATRIX) { 1832 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1833 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1834 } else { 1835 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1836 } 1837 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1838 if (call == MAT_INITIAL_MATRIX) { 1839 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1840 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 1841 } 1842 PetscFunctionReturn(0); 1843 } 1844 1845 #undef __FUNCT__ 1846 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 1847 /* 1848 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1849 in local and then by concatenating the local matrices the end result. 1850 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 1851 */ 1852 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 1853 { 1854 PetscErrorCode ierr; 1855 PetscMPIInt rank,size; 1856 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 1857 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 1858 Mat *local,M,Mreuse; 1859 MatScalar *vwork,*aa; 1860 MPI_Comm comm = ((PetscObject)mat)->comm; 1861 Mat_SeqBAIJ *aij; 1862 1863 1864 PetscFunctionBegin; 1865 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1866 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1867 1868 if (call == MAT_REUSE_MATRIX) { 1869 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1870 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1871 local = &Mreuse; 1872 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1873 } else { 1874 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1875 Mreuse = *local; 1876 ierr = PetscFree(local);CHKERRQ(ierr); 1877 } 1878 1879 /* 1880 m - number of local rows 1881 n - number of columns (same on all processors) 1882 rstart - first row in new global matrix generated 1883 */ 1884 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 1885 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1886 m = m/bs; 1887 n = n/bs; 1888 1889 if (call == MAT_INITIAL_MATRIX) { 1890 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1891 ii = aij->i; 1892 jj = aij->j; 1893 1894 /* 1895 Determine the number of non-zeros in the diagonal and off-diagonal 1896 portions of the matrix in order to do correct preallocation 1897 */ 1898 1899 /* first get start and end of "diagonal" columns */ 1900 if (csize == PETSC_DECIDE) { 1901 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 1902 if (mglobal == n*bs) { /* square matrix */ 1903 nlocal = m; 1904 } else { 1905 nlocal = n/size + ((n % size) > rank); 1906 } 1907 } else { 1908 nlocal = csize/bs; 1909 } 1910 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1911 rstart = rend - nlocal; 1912 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 1913 1914 /* next, compute all the lengths */ 1915 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 1916 olens = dlens + m; 1917 for (i=0; i<m; i++) { 1918 jend = ii[i+1] - ii[i]; 1919 olen = 0; 1920 dlen = 0; 1921 for (j=0; j<jend; j++) { 1922 if (*jj < rstart || *jj >= rend) olen++; 1923 else dlen++; 1924 jj++; 1925 } 1926 olens[i] = olen; 1927 dlens[i] = dlen; 1928 } 1929 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 1930 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 1931 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 1932 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 1933 ierr = PetscFree(dlens);CHKERRQ(ierr); 1934 } else { 1935 PetscInt ml,nl; 1936 1937 M = *newmat; 1938 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1939 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1940 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1941 /* 1942 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1943 rather than the slower MatSetValues(). 1944 */ 1945 M->was_assembled = PETSC_TRUE; 1946 M->assembled = PETSC_FALSE; 1947 } 1948 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1949 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1950 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1951 ii = aij->i; 1952 jj = aij->j; 1953 aa = aij->a; 1954 for (i=0; i<m; i++) { 1955 row = rstart/bs + i; 1956 nz = ii[i+1] - ii[i]; 1957 cwork = jj; jj += nz; 1958 vwork = aa; aa += nz; 1959 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1960 } 1961 1962 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1963 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1964 *newmat = M; 1965 1966 /* save submatrix used in processor for next request */ 1967 if (call == MAT_INITIAL_MATRIX) { 1968 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 1969 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 1970 } 1971 1972 PetscFunctionReturn(0); 1973 } 1974 1975 #undef __FUNCT__ 1976 #define __FUNCT__ "MatPermute_MPIBAIJ" 1977 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 1978 { 1979 MPI_Comm comm,pcomm; 1980 PetscInt first,local_size,nrows; 1981 const PetscInt *rows; 1982 PetscMPIInt size; 1983 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1984 PetscErrorCode ierr; 1985 1986 PetscFunctionBegin; 1987 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1988 /* make a collective version of 'rowp' */ 1989 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1990 if (pcomm==comm) { 1991 crowp = rowp; 1992 } else { 1993 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1994 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1995 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1996 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1997 } 1998 /* collect the global row permutation and invert it */ 1999 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2000 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2001 if (pcomm!=comm) { 2002 ierr = ISDestroy(crowp);CHKERRQ(ierr); 2003 } 2004 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2005 /* get the local target indices */ 2006 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2007 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2008 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2009 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 2010 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2011 ierr = ISDestroy(irowp);CHKERRQ(ierr); 2012 /* the column permutation is so much easier; 2013 make a local version of 'colp' and invert it */ 2014 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2015 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2016 if (size==1) { 2017 lcolp = colp; 2018 } else { 2019 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2020 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2021 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 2022 } 2023 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2024 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2025 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2026 if (size>1) { 2027 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2028 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 2029 } 2030 /* now we just get the submatrix */ 2031 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2032 /* clean up */ 2033 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 2034 ierr = ISDestroy(icolp);CHKERRQ(ierr); 2035 PetscFunctionReturn(0); 2036 } 2037 2038 #undef __FUNCT__ 2039 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2040 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2041 { 2042 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2043 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2044 2045 PetscFunctionBegin; 2046 if (nghosts) { *nghosts = B->nbs;} 2047 if (ghosts) {*ghosts = baij->garray;} 2048 PetscFunctionReturn(0); 2049 } 2050 2051 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat); 2052 2053 #undef __FUNCT__ 2054 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2055 /* 2056 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2057 */ 2058 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2059 { 2060 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2061 PetscErrorCode ierr; 2062 PetscMPIInt size,*ncolsonproc,*disp,nn; 2063 PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col; 2064 const PetscInt *is; 2065 PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj; 2066 PetscInt *rowhit,M,cstart,cend,colb; 2067 PetscInt *columnsforrow,l; 2068 IS *isa; 2069 PetscTruth done,flg; 2070 ISLocalToGlobalMapping map = mat->bmapping; 2071 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2072 2073 PetscFunctionBegin; 2074 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2075 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2076 2077 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2078 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2079 M = mat->rmap->n/bs; 2080 cstart = mat->cmap->rstart/bs; 2081 cend = mat->cmap->rend/bs; 2082 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2083 c->N = mat->cmap->N/bs; 2084 c->m = mat->rmap->n/bs; 2085 c->rstart = mat->rmap->rstart/bs; 2086 2087 c->ncolors = nis; 2088 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2089 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2090 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2091 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2092 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2093 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2094 2095 /* Allow access to data structures of local part of matrix */ 2096 if (!baij->colmap) { 2097 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2098 } 2099 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2100 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2101 2102 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2103 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2104 2105 for (i=0; i<nis; i++) { 2106 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2107 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2108 c->ncolumns[i] = n; 2109 if (n) { 2110 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2111 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2112 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2113 } else { 2114 c->columns[i] = 0; 2115 } 2116 2117 if (ctype == IS_COLORING_GLOBAL){ 2118 /* Determine the total (parallel) number of columns of this color */ 2119 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2120 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2121 2122 nn = PetscMPIIntCast(n); 2123 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2124 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2125 if (!nctot) { 2126 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2127 } 2128 2129 disp[0] = 0; 2130 for (j=1; j<size; j++) { 2131 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2132 } 2133 2134 /* Get complete list of columns for color on each processor */ 2135 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2136 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2137 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2138 } else if (ctype == IS_COLORING_GHOSTED){ 2139 /* Determine local number of columns of this color on this process, including ghost points */ 2140 nctot = n; 2141 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2142 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2143 } else { 2144 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2145 } 2146 2147 /* 2148 Mark all rows affect by these columns 2149 */ 2150 /* Temporary option to allow for debugging/testing */ 2151 flg = PETSC_FALSE; 2152 ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2153 if (!flg) {/*-----------------------------------------------------------------------------*/ 2154 /* crude, fast version */ 2155 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2156 /* loop over columns*/ 2157 for (j=0; j<nctot; j++) { 2158 if (ctype == IS_COLORING_GHOSTED) { 2159 col = ltog[cols[j]]; 2160 } else { 2161 col = cols[j]; 2162 } 2163 if (col >= cstart && col < cend) { 2164 /* column is in diagonal block of matrix */ 2165 rows = A_cj + A_ci[col-cstart]; 2166 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2167 } else { 2168 #if defined (PETSC_USE_CTABLE) 2169 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2170 colb --; 2171 #else 2172 colb = baij->colmap[col] - 1; 2173 #endif 2174 if (colb == -1) { 2175 m = 0; 2176 } else { 2177 colb = colb/bs; 2178 rows = B_cj + B_ci[colb]; 2179 m = B_ci[colb+1] - B_ci[colb]; 2180 } 2181 } 2182 /* loop over columns marking them in rowhit */ 2183 for (k=0; k<m; k++) { 2184 rowhit[*rows++] = col + 1; 2185 } 2186 } 2187 2188 /* count the number of hits */ 2189 nrows = 0; 2190 for (j=0; j<M; j++) { 2191 if (rowhit[j]) nrows++; 2192 } 2193 c->nrows[i] = nrows; 2194 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2195 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2196 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2197 nrows = 0; 2198 for (j=0; j<M; j++) { 2199 if (rowhit[j]) { 2200 c->rows[i][nrows] = j; 2201 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2202 nrows++; 2203 } 2204 } 2205 } else {/*-------------------------------------------------------------------------------*/ 2206 /* slow version, using rowhit as a linked list */ 2207 PetscInt currentcol,fm,mfm; 2208 rowhit[M] = M; 2209 nrows = 0; 2210 /* loop over columns*/ 2211 for (j=0; j<nctot; j++) { 2212 if (ctype == IS_COLORING_GHOSTED) { 2213 col = ltog[cols[j]]; 2214 } else { 2215 col = cols[j]; 2216 } 2217 if (col >= cstart && col < cend) { 2218 /* column is in diagonal block of matrix */ 2219 rows = A_cj + A_ci[col-cstart]; 2220 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2221 } else { 2222 #if defined (PETSC_USE_CTABLE) 2223 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2224 colb --; 2225 #else 2226 colb = baij->colmap[col] - 1; 2227 #endif 2228 if (colb == -1) { 2229 m = 0; 2230 } else { 2231 colb = colb/bs; 2232 rows = B_cj + B_ci[colb]; 2233 m = B_ci[colb+1] - B_ci[colb]; 2234 } 2235 } 2236 2237 /* loop over columns marking them in rowhit */ 2238 fm = M; /* fm points to first entry in linked list */ 2239 for (k=0; k<m; k++) { 2240 currentcol = *rows++; 2241 /* is it already in the list? */ 2242 do { 2243 mfm = fm; 2244 fm = rowhit[fm]; 2245 } while (fm < currentcol); 2246 /* not in list so add it */ 2247 if (fm != currentcol) { 2248 nrows++; 2249 columnsforrow[currentcol] = col; 2250 /* next three lines insert new entry into linked list */ 2251 rowhit[mfm] = currentcol; 2252 rowhit[currentcol] = fm; 2253 fm = currentcol; 2254 /* fm points to present position in list since we know the columns are sorted */ 2255 } else { 2256 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2257 } 2258 } 2259 } 2260 c->nrows[i] = nrows; 2261 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2262 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2263 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2264 /* now store the linked list of rows into c->rows[i] */ 2265 nrows = 0; 2266 fm = rowhit[M]; 2267 do { 2268 c->rows[i][nrows] = fm; 2269 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2270 fm = rowhit[fm]; 2271 } while (fm < M); 2272 } /* ---------------------------------------------------------------------------------------*/ 2273 ierr = PetscFree(cols);CHKERRQ(ierr); 2274 } 2275 2276 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2277 /* 2278 vscale will contain the "diagonal" on processor scalings followed by the off processor 2279 */ 2280 if (ctype == IS_COLORING_GLOBAL) { 2281 PetscInt *garray; 2282 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2283 for (i=0; i<baij->B->cmap->n/bs; i++) { 2284 for (j=0; j<bs; j++) { 2285 garray[i*bs+j] = bs*baij->garray[i]+j; 2286 } 2287 } 2288 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2289 ierr = PetscFree(garray);CHKERRQ(ierr); 2290 CHKMEMQ; 2291 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2292 for (k=0; k<c->ncolors; k++) { 2293 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2294 for (l=0; l<c->nrows[k]; l++) { 2295 col = c->columnsforrow[k][l]; 2296 if (col >= cstart && col < cend) { 2297 /* column is in diagonal block of matrix */ 2298 colb = col - cstart; 2299 } else { 2300 /* column is in "off-processor" part */ 2301 #if defined (PETSC_USE_CTABLE) 2302 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2303 colb --; 2304 #else 2305 colb = baij->colmap[col] - 1; 2306 #endif 2307 colb = colb/bs; 2308 colb += cend - cstart; 2309 } 2310 c->vscaleforrow[k][l] = colb; 2311 } 2312 } 2313 } else if (ctype == IS_COLORING_GHOSTED) { 2314 /* Get gtol mapping */ 2315 PetscInt N = mat->cmap->N, *gtol; 2316 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2317 for (i=0; i<N; i++) gtol[i] = -1; 2318 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2319 2320 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2321 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2322 for (k=0; k<c->ncolors; k++) { 2323 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2324 for (l=0; l<c->nrows[k]; l++) { 2325 col = c->columnsforrow[k][l]; /* global column index */ 2326 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2327 } 2328 } 2329 ierr = PetscFree(gtol);CHKERRQ(ierr); 2330 } 2331 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2332 2333 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2334 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2335 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2336 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2337 CHKMEMQ; 2338 PetscFunctionReturn(0); 2339 } 2340 2341 #undef __FUNCT__ 2342 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ" 2343 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat) 2344 { 2345 Mat B; 2346 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2347 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2348 Mat_SeqAIJ *b; 2349 PetscErrorCode ierr; 2350 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2351 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2352 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2353 2354 PetscFunctionBegin; 2355 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2356 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2357 2358 /* ---------------------------------------------------------------- 2359 Tell every processor the number of nonzeros per row 2360 */ 2361 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2362 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2363 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2364 } 2365 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2366 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2367 displs = recvcounts + size; 2368 for (i=0; i<size; i++) { 2369 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2370 displs[i] = A->rmap->range[i]/bs; 2371 } 2372 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2373 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2374 #else 2375 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2376 #endif 2377 /* --------------------------------------------------------------- 2378 Create the sequential matrix of the same type as the local block diagonal 2379 */ 2380 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2381 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2382 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2383 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2384 b = (Mat_SeqAIJ *)B->data; 2385 2386 /*-------------------------------------------------------------------- 2387 Copy my part of matrix column indices over 2388 */ 2389 sendcount = ad->nz + bd->nz; 2390 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2391 a_jsendbuf = ad->j; 2392 b_jsendbuf = bd->j; 2393 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2394 cnt = 0; 2395 for (i=0; i<n; i++) { 2396 2397 /* put in lower diagonal portion */ 2398 m = bd->i[i+1] - bd->i[i]; 2399 while (m > 0) { 2400 /* is it above diagonal (in bd (compressed) numbering) */ 2401 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2402 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2403 m--; 2404 } 2405 2406 /* put in diagonal portion */ 2407 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2408 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2409 } 2410 2411 /* put in upper diagonal portion */ 2412 while (m-- > 0) { 2413 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2414 } 2415 } 2416 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2417 2418 /*-------------------------------------------------------------------- 2419 Gather all column indices to all processors 2420 */ 2421 for (i=0; i<size; i++) { 2422 recvcounts[i] = 0; 2423 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2424 recvcounts[i] += lens[j]; 2425 } 2426 } 2427 displs[0] = 0; 2428 for (i=1; i<size; i++) { 2429 displs[i] = displs[i-1] + recvcounts[i-1]; 2430 } 2431 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2432 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2433 #else 2434 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2435 #endif 2436 /*-------------------------------------------------------------------- 2437 Assemble the matrix into useable form (note numerical values not yet set) 2438 */ 2439 /* set the b->ilen (length of each row) values */ 2440 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2441 /* set the b->i indices */ 2442 b->i[0] = 0; 2443 for (i=1; i<=A->rmap->N/bs; i++) { 2444 b->i[i] = b->i[i-1] + lens[i-1]; 2445 } 2446 ierr = PetscFree(lens);CHKERRQ(ierr); 2447 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2448 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2449 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2450 2451 if (A->symmetric){ 2452 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2453 } else if (A->hermitian) { 2454 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2455 } else if (A->structurally_symmetric) { 2456 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2457 } 2458 *newmat = B; 2459 PetscFunctionReturn(0); 2460 } 2461 2462 #undef __FUNCT__ 2463 #define __FUNCT__ "MatSOR_MPIBAIJ" 2464 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2465 { 2466 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2467 PetscErrorCode ierr; 2468 Vec bb1 = 0; 2469 2470 PetscFunctionBegin; 2471 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2472 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2473 } 2474 2475 if (flag == SOR_APPLY_UPPER) { 2476 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2477 PetscFunctionReturn(0); 2478 } 2479 2480 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2481 if (flag & SOR_ZERO_INITIAL_GUESS) { 2482 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2483 its--; 2484 } 2485 2486 while (its--) { 2487 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2488 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2489 2490 /* update rhs: bb1 = bb - B*x */ 2491 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2492 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2493 2494 /* local sweep */ 2495 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2496 } 2497 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2498 if (flag & SOR_ZERO_INITIAL_GUESS) { 2499 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2500 its--; 2501 } 2502 while (its--) { 2503 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2504 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2505 2506 /* update rhs: bb1 = bb - B*x */ 2507 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2508 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2509 2510 /* local sweep */ 2511 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2512 } 2513 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2514 if (flag & SOR_ZERO_INITIAL_GUESS) { 2515 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2516 its--; 2517 } 2518 while (its--) { 2519 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2520 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2521 2522 /* update rhs: bb1 = bb - B*x */ 2523 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2524 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2525 2526 /* local sweep */ 2527 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2528 } 2529 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2530 2531 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 2532 PetscFunctionReturn(0); 2533 } 2534 2535 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2536 2537 2538 /* -------------------------------------------------------------------*/ 2539 static struct _MatOps MatOps_Values = { 2540 MatSetValues_MPIBAIJ, 2541 MatGetRow_MPIBAIJ, 2542 MatRestoreRow_MPIBAIJ, 2543 MatMult_MPIBAIJ, 2544 /* 4*/ MatMultAdd_MPIBAIJ, 2545 MatMultTranspose_MPIBAIJ, 2546 MatMultTransposeAdd_MPIBAIJ, 2547 0, 2548 0, 2549 0, 2550 /*10*/ 0, 2551 0, 2552 0, 2553 MatSOR_MPIBAIJ, 2554 MatTranspose_MPIBAIJ, 2555 /*15*/ MatGetInfo_MPIBAIJ, 2556 MatEqual_MPIBAIJ, 2557 MatGetDiagonal_MPIBAIJ, 2558 MatDiagonalScale_MPIBAIJ, 2559 MatNorm_MPIBAIJ, 2560 /*20*/ MatAssemblyBegin_MPIBAIJ, 2561 MatAssemblyEnd_MPIBAIJ, 2562 MatSetOption_MPIBAIJ, 2563 MatZeroEntries_MPIBAIJ, 2564 /*24*/ MatZeroRows_MPIBAIJ, 2565 0, 2566 0, 2567 0, 2568 0, 2569 /*29*/ MatSetUpPreallocation_MPIBAIJ, 2570 0, 2571 0, 2572 0, 2573 0, 2574 /*34*/ MatDuplicate_MPIBAIJ, 2575 0, 2576 0, 2577 0, 2578 0, 2579 /*39*/ MatAXPY_MPIBAIJ, 2580 MatGetSubMatrices_MPIBAIJ, 2581 MatIncreaseOverlap_MPIBAIJ, 2582 MatGetValues_MPIBAIJ, 2583 MatCopy_MPIBAIJ, 2584 /*44*/ 0, 2585 MatScale_MPIBAIJ, 2586 0, 2587 0, 2588 0, 2589 /*49*/ MatSetBlockSize_MPIBAIJ, 2590 0, 2591 0, 2592 0, 2593 0, 2594 /*54*/ MatFDColoringCreate_MPIBAIJ, 2595 0, 2596 MatSetUnfactored_MPIBAIJ, 2597 MatPermute_MPIBAIJ, 2598 MatSetValuesBlocked_MPIBAIJ, 2599 /*59*/ MatGetSubMatrix_MPIBAIJ, 2600 MatDestroy_MPIBAIJ, 2601 MatView_MPIBAIJ, 2602 0, 2603 0, 2604 /*64*/ 0, 2605 0, 2606 0, 2607 0, 2608 0, 2609 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2610 0, 2611 0, 2612 0, 2613 0, 2614 /*74*/ 0, 2615 MatFDColoringApply_BAIJ, 2616 0, 2617 0, 2618 0, 2619 /*79*/ 0, 2620 0, 2621 0, 2622 0, 2623 0, 2624 /*84*/ 0, 2625 0, 2626 0, 2627 0, 2628 0, 2629 /*89*/ 0, 2630 0, 2631 0, 2632 0, 2633 0, 2634 /*94*/ 0, 2635 0, 2636 0, 2637 0, 2638 0, 2639 /*99*/ 0, 2640 0, 2641 0, 2642 0, 2643 0, 2644 /*104*/0, 2645 MatRealPart_MPIBAIJ, 2646 MatImaginaryPart_MPIBAIJ, 2647 0, 2648 0, 2649 /*109*/0, 2650 0, 2651 0, 2652 0, 2653 0, 2654 /*114*/MatGetSeqNonzerostructure_MPIBAIJ, 2655 0, 2656 MatGetGhosts_MPIBAIJ, 2657 0, 2658 0, 2659 /*119*/0, 2660 0, 2661 0, 2662 0, 2663 MatLoadnew_MPIBAIJ 2664 }; 2665 2666 EXTERN_C_BEGIN 2667 #undef __FUNCT__ 2668 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2669 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2670 { 2671 PetscFunctionBegin; 2672 *a = ((Mat_MPIBAIJ *)A->data)->A; 2673 *iscopy = PETSC_FALSE; 2674 PetscFunctionReturn(0); 2675 } 2676 EXTERN_C_END 2677 2678 EXTERN_C_BEGIN 2679 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2680 EXTERN_C_END 2681 2682 EXTERN_C_BEGIN 2683 #undef __FUNCT__ 2684 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2685 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2686 { 2687 PetscInt m,rstart,cstart,cend; 2688 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2689 const PetscInt *JJ=0; 2690 PetscScalar *values=0; 2691 PetscErrorCode ierr; 2692 2693 PetscFunctionBegin; 2694 2695 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2696 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2697 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2698 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2699 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2700 m = B->rmap->n/bs; 2701 rstart = B->rmap->rstart/bs; 2702 cstart = B->cmap->rstart/bs; 2703 cend = B->cmap->rend/bs; 2704 2705 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2706 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2707 for (i=0; i<m; i++) { 2708 nz = ii[i+1] - ii[i]; 2709 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2710 nz_max = PetscMax(nz_max,nz); 2711 JJ = jj + ii[i]; 2712 for (j=0; j<nz; j++) { 2713 if (*JJ >= cstart) break; 2714 JJ++; 2715 } 2716 d = 0; 2717 for (; j<nz; j++) { 2718 if (*JJ++ >= cend) break; 2719 d++; 2720 } 2721 d_nnz[i] = d; 2722 o_nnz[i] = nz - d; 2723 } 2724 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2725 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2726 2727 values = (PetscScalar*)V; 2728 if (!values) { 2729 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2730 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2731 } 2732 for (i=0; i<m; i++) { 2733 PetscInt row = i + rstart; 2734 PetscInt ncols = ii[i+1] - ii[i]; 2735 const PetscInt *icols = jj + ii[i]; 2736 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2737 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2738 } 2739 2740 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2741 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2742 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2743 2744 PetscFunctionReturn(0); 2745 } 2746 EXTERN_C_END 2747 2748 #undef __FUNCT__ 2749 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2750 /*@C 2751 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2752 (the default parallel PETSc format). 2753 2754 Collective on MPI_Comm 2755 2756 Input Parameters: 2757 + A - the matrix 2758 . i - the indices into j for the start of each local row (starts with zero) 2759 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2760 - v - optional values in the matrix 2761 2762 Level: developer 2763 2764 .keywords: matrix, aij, compressed row, sparse, parallel 2765 2766 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2767 @*/ 2768 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2769 { 2770 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2771 2772 PetscFunctionBegin; 2773 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2774 if (f) { 2775 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2776 } 2777 PetscFunctionReturn(0); 2778 } 2779 2780 EXTERN_C_BEGIN 2781 #undef __FUNCT__ 2782 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2783 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2784 { 2785 Mat_MPIBAIJ *b; 2786 PetscErrorCode ierr; 2787 PetscInt i, newbs = PetscAbs(bs); 2788 2789 PetscFunctionBegin; 2790 if (bs < 0) { 2791 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2792 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2793 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2794 bs = PetscAbs(bs); 2795 } 2796 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 2797 bs = newbs; 2798 2799 2800 if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2801 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2802 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2803 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2804 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2805 2806 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2807 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2808 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2809 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2810 2811 if (d_nnz) { 2812 for (i=0; i<B->rmap->n/bs; i++) { 2813 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2814 } 2815 } 2816 if (o_nnz) { 2817 for (i=0; i<B->rmap->n/bs; i++) { 2818 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2819 } 2820 } 2821 2822 b = (Mat_MPIBAIJ*)B->data; 2823 b->bs2 = bs*bs; 2824 b->mbs = B->rmap->n/bs; 2825 b->nbs = B->cmap->n/bs; 2826 b->Mbs = B->rmap->N/bs; 2827 b->Nbs = B->cmap->N/bs; 2828 2829 for (i=0; i<=b->size; i++) { 2830 b->rangebs[i] = B->rmap->range[i]/bs; 2831 } 2832 b->rstartbs = B->rmap->rstart/bs; 2833 b->rendbs = B->rmap->rend/bs; 2834 b->cstartbs = B->cmap->rstart/bs; 2835 b->cendbs = B->cmap->rend/bs; 2836 2837 if (!B->preallocated) { 2838 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2839 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2840 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2841 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2842 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2843 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2844 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2845 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2846 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 2847 } 2848 2849 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2850 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2851 B->preallocated = PETSC_TRUE; 2852 PetscFunctionReturn(0); 2853 } 2854 EXTERN_C_END 2855 2856 EXTERN_C_BEGIN 2857 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2858 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2859 EXTERN_C_END 2860 2861 2862 EXTERN_C_BEGIN 2863 #undef __FUNCT__ 2864 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 2865 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj) 2866 { 2867 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2868 PetscErrorCode ierr; 2869 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2870 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2871 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2872 2873 PetscFunctionBegin; 2874 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 2875 ii[0] = 0; 2876 CHKMEMQ; 2877 for (i=0; i<M; i++) { 2878 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2879 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2880 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2881 /* remove one from count of matrix has diagonal */ 2882 for (j=id[i]; j<id[i+1]; j++) { 2883 if (jd[j] == i) {ii[i+1]--;break;} 2884 } 2885 CHKMEMQ; 2886 } 2887 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2888 cnt = 0; 2889 for (i=0; i<M; i++) { 2890 for (j=io[i]; j<io[i+1]; j++) { 2891 if (garray[jo[j]] > rstart) break; 2892 jj[cnt++] = garray[jo[j]]; 2893 CHKMEMQ; 2894 } 2895 for (k=id[i]; k<id[i+1]; k++) { 2896 if (jd[k] != i) { 2897 jj[cnt++] = rstart + jd[k]; 2898 CHKMEMQ; 2899 } 2900 } 2901 for (;j<io[i+1]; j++) { 2902 jj[cnt++] = garray[jo[j]]; 2903 CHKMEMQ; 2904 } 2905 } 2906 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 2907 PetscFunctionReturn(0); 2908 } 2909 EXTERN_C_END 2910 2911 EXTERN_C_BEGIN 2912 #if defined(PETSC_HAVE_MUMPS) 2913 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 2914 #endif 2915 EXTERN_C_END 2916 2917 /*MC 2918 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2919 2920 Options Database Keys: 2921 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2922 . -mat_block_size <bs> - set the blocksize used to store the matrix 2923 - -mat_use_hash_table <fact> 2924 2925 Level: beginner 2926 2927 .seealso: MatCreateMPIBAIJ 2928 M*/ 2929 2930 EXTERN_C_BEGIN 2931 #undef __FUNCT__ 2932 #define __FUNCT__ "MatCreate_MPIBAIJ" 2933 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2934 { 2935 Mat_MPIBAIJ *b; 2936 PetscErrorCode ierr; 2937 PetscTruth flg; 2938 2939 PetscFunctionBegin; 2940 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2941 B->data = (void*)b; 2942 2943 2944 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2945 B->mapping = 0; 2946 B->assembled = PETSC_FALSE; 2947 2948 B->insertmode = NOT_SET_VALUES; 2949 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 2950 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 2951 2952 /* build local table of row and column ownerships */ 2953 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 2954 2955 /* build cache for off array entries formed */ 2956 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 2957 b->donotstash = PETSC_FALSE; 2958 b->colmap = PETSC_NULL; 2959 b->garray = PETSC_NULL; 2960 b->roworiented = PETSC_TRUE; 2961 2962 /* stuff used in block assembly */ 2963 b->barray = 0; 2964 2965 /* stuff used for matrix vector multiply */ 2966 b->lvec = 0; 2967 b->Mvctx = 0; 2968 2969 /* stuff for MatGetRow() */ 2970 b->rowindices = 0; 2971 b->rowvalues = 0; 2972 b->getrowactive = PETSC_FALSE; 2973 2974 /* hash table stuff */ 2975 b->ht = 0; 2976 b->hd = 0; 2977 b->ht_size = 0; 2978 b->ht_flag = PETSC_FALSE; 2979 b->ht_fact = 0; 2980 b->ht_total_ct = 0; 2981 b->ht_insert_ct = 0; 2982 2983 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 2984 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2985 if (flg) { 2986 PetscReal fact = 1.39; 2987 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2988 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 2989 if (fact <= 1.0) fact = 1.39; 2990 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2991 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2992 } 2993 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2994 2995 #if defined(PETSC_HAVE_MUMPS) 2996 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 2997 #endif 2998 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 2999 "MatConvert_MPIBAIJ_MPIAdj", 3000 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3001 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3002 "MatStoreValues_MPIBAIJ", 3003 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3004 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3005 "MatRetrieveValues_MPIBAIJ", 3006 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3007 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3008 "MatGetDiagonalBlock_MPIBAIJ", 3009 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3010 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3011 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3012 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3013 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3014 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3015 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3016 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3017 "MatDiagonalScaleLocal_MPIBAIJ", 3018 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3019 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3020 "MatSetHashTableFactor_MPIBAIJ", 3021 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3022 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3023 PetscFunctionReturn(0); 3024 } 3025 EXTERN_C_END 3026 3027 /*MC 3028 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3029 3030 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3031 and MATMPIBAIJ otherwise. 3032 3033 Options Database Keys: 3034 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3035 3036 Level: beginner 3037 3038 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3039 M*/ 3040 3041 EXTERN_C_BEGIN 3042 #undef __FUNCT__ 3043 #define __FUNCT__ "MatCreate_BAIJ" 3044 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 3045 { 3046 PetscErrorCode ierr; 3047 PetscMPIInt size; 3048 3049 PetscFunctionBegin; 3050 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3051 if (size == 1) { 3052 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 3053 } else { 3054 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 3055 } 3056 PetscFunctionReturn(0); 3057 } 3058 EXTERN_C_END 3059 3060 #undef __FUNCT__ 3061 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3062 /*@C 3063 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3064 (block compressed row). For good matrix assembly performance 3065 the user should preallocate the matrix storage by setting the parameters 3066 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3067 performance can be increased by more than a factor of 50. 3068 3069 Collective on Mat 3070 3071 Input Parameters: 3072 + A - the matrix 3073 . bs - size of blockk 3074 . d_nz - number of block nonzeros per block row in diagonal portion of local 3075 submatrix (same for all local rows) 3076 . d_nnz - array containing the number of block nonzeros in the various block rows 3077 of the in diagonal portion of the local (possibly different for each block 3078 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3079 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3080 submatrix (same for all local rows). 3081 - o_nnz - array containing the number of nonzeros in the various block rows of the 3082 off-diagonal portion of the local submatrix (possibly different for 3083 each block row) or PETSC_NULL. 3084 3085 If the *_nnz parameter is given then the *_nz parameter is ignored 3086 3087 Options Database Keys: 3088 + -mat_block_size - size of the blocks to use 3089 - -mat_use_hash_table <fact> 3090 3091 Notes: 3092 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3093 than it must be used on all processors that share the object for that argument. 3094 3095 Storage Information: 3096 For a square global matrix we define each processor's diagonal portion 3097 to be its local rows and the corresponding columns (a square submatrix); 3098 each processor's off-diagonal portion encompasses the remainder of the 3099 local matrix (a rectangular submatrix). 3100 3101 The user can specify preallocated storage for the diagonal part of 3102 the local submatrix with either d_nz or d_nnz (not both). Set 3103 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3104 memory allocation. Likewise, specify preallocated storage for the 3105 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3106 3107 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3108 the figure below we depict these three local rows and all columns (0-11). 3109 3110 .vb 3111 0 1 2 3 4 5 6 7 8 9 10 11 3112 ------------------- 3113 row 3 | o o o d d d o o o o o o 3114 row 4 | o o o d d d o o o o o o 3115 row 5 | o o o d d d o o o o o o 3116 ------------------- 3117 .ve 3118 3119 Thus, any entries in the d locations are stored in the d (diagonal) 3120 submatrix, and any entries in the o locations are stored in the 3121 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3122 stored simply in the MATSEQBAIJ format for compressed row storage. 3123 3124 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3125 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3126 In general, for PDE problems in which most nonzeros are near the diagonal, 3127 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3128 or you will get TERRIBLE performance; see the users' manual chapter on 3129 matrices. 3130 3131 You can call MatGetInfo() to get information on how effective the preallocation was; 3132 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3133 You can also run with the option -info and look for messages with the string 3134 malloc in them to see if additional memory allocation was needed. 3135 3136 Level: intermediate 3137 3138 .keywords: matrix, block, aij, compressed row, sparse, parallel 3139 3140 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 3141 @*/ 3142 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3143 { 3144 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3145 3146 PetscFunctionBegin; 3147 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3148 if (f) { 3149 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3150 } 3151 PetscFunctionReturn(0); 3152 } 3153 3154 #undef __FUNCT__ 3155 #define __FUNCT__ "MatCreateMPIBAIJ" 3156 /*@C 3157 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 3158 (block compressed row). For good matrix assembly performance 3159 the user should preallocate the matrix storage by setting the parameters 3160 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3161 performance can be increased by more than a factor of 50. 3162 3163 Collective on MPI_Comm 3164 3165 Input Parameters: 3166 + comm - MPI communicator 3167 . bs - size of blockk 3168 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3169 This value should be the same as the local size used in creating the 3170 y vector for the matrix-vector product y = Ax. 3171 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3172 This value should be the same as the local size used in creating the 3173 x vector for the matrix-vector product y = Ax. 3174 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3175 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3176 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3177 submatrix (same for all local rows) 3178 . d_nnz - array containing the number of nonzero blocks in the various block rows 3179 of the in diagonal portion of the local (possibly different for each block 3180 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3181 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3182 submatrix (same for all local rows). 3183 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3184 off-diagonal portion of the local submatrix (possibly different for 3185 each block row) or PETSC_NULL. 3186 3187 Output Parameter: 3188 . A - the matrix 3189 3190 Options Database Keys: 3191 + -mat_block_size - size of the blocks to use 3192 - -mat_use_hash_table <fact> 3193 3194 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3195 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3196 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3197 3198 Notes: 3199 If the *_nnz parameter is given then the *_nz parameter is ignored 3200 3201 A nonzero block is any block that as 1 or more nonzeros in it 3202 3203 The user MUST specify either the local or global matrix dimensions 3204 (possibly both). 3205 3206 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3207 than it must be used on all processors that share the object for that argument. 3208 3209 Storage Information: 3210 For a square global matrix we define each processor's diagonal portion 3211 to be its local rows and the corresponding columns (a square submatrix); 3212 each processor's off-diagonal portion encompasses the remainder of the 3213 local matrix (a rectangular submatrix). 3214 3215 The user can specify preallocated storage for the diagonal part of 3216 the local submatrix with either d_nz or d_nnz (not both). Set 3217 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3218 memory allocation. Likewise, specify preallocated storage for the 3219 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3220 3221 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3222 the figure below we depict these three local rows and all columns (0-11). 3223 3224 .vb 3225 0 1 2 3 4 5 6 7 8 9 10 11 3226 ------------------- 3227 row 3 | o o o d d d o o o o o o 3228 row 4 | o o o d d d o o o o o o 3229 row 5 | o o o d d d o o o o o o 3230 ------------------- 3231 .ve 3232 3233 Thus, any entries in the d locations are stored in the d (diagonal) 3234 submatrix, and any entries in the o locations are stored in the 3235 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3236 stored simply in the MATSEQBAIJ format for compressed row storage. 3237 3238 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3239 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3240 In general, for PDE problems in which most nonzeros are near the diagonal, 3241 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3242 or you will get TERRIBLE performance; see the users' manual chapter on 3243 matrices. 3244 3245 Level: intermediate 3246 3247 .keywords: matrix, block, aij, compressed row, sparse, parallel 3248 3249 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3250 @*/ 3251 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3252 { 3253 PetscErrorCode ierr; 3254 PetscMPIInt size; 3255 3256 PetscFunctionBegin; 3257 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3258 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3259 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3260 if (size > 1) { 3261 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3262 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3263 } else { 3264 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3265 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3266 } 3267 PetscFunctionReturn(0); 3268 } 3269 3270 #undef __FUNCT__ 3271 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3272 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3273 { 3274 Mat mat; 3275 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3276 PetscErrorCode ierr; 3277 PetscInt len=0; 3278 3279 PetscFunctionBegin; 3280 *newmat = 0; 3281 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3282 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3283 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3284 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3285 3286 mat->factortype = matin->factortype; 3287 mat->preallocated = PETSC_TRUE; 3288 mat->assembled = PETSC_TRUE; 3289 mat->insertmode = NOT_SET_VALUES; 3290 3291 a = (Mat_MPIBAIJ*)mat->data; 3292 mat->rmap->bs = matin->rmap->bs; 3293 a->bs2 = oldmat->bs2; 3294 a->mbs = oldmat->mbs; 3295 a->nbs = oldmat->nbs; 3296 a->Mbs = oldmat->Mbs; 3297 a->Nbs = oldmat->Nbs; 3298 3299 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3300 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3301 3302 a->size = oldmat->size; 3303 a->rank = oldmat->rank; 3304 a->donotstash = oldmat->donotstash; 3305 a->roworiented = oldmat->roworiented; 3306 a->rowindices = 0; 3307 a->rowvalues = 0; 3308 a->getrowactive = PETSC_FALSE; 3309 a->barray = 0; 3310 a->rstartbs = oldmat->rstartbs; 3311 a->rendbs = oldmat->rendbs; 3312 a->cstartbs = oldmat->cstartbs; 3313 a->cendbs = oldmat->cendbs; 3314 3315 /* hash table stuff */ 3316 a->ht = 0; 3317 a->hd = 0; 3318 a->ht_size = 0; 3319 a->ht_flag = oldmat->ht_flag; 3320 a->ht_fact = oldmat->ht_fact; 3321 a->ht_total_ct = 0; 3322 a->ht_insert_ct = 0; 3323 3324 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3325 if (oldmat->colmap) { 3326 #if defined (PETSC_USE_CTABLE) 3327 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3328 #else 3329 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3330 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3331 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3332 #endif 3333 } else a->colmap = 0; 3334 3335 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3336 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3337 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3338 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3339 } else a->garray = 0; 3340 3341 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3342 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3343 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3344 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3345 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3346 3347 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3348 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3349 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3350 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3351 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3352 *newmat = mat; 3353 3354 PetscFunctionReturn(0); 3355 } 3356 3357 #undef __FUNCT__ 3358 #define __FUNCT__ "MatLoad_MPIBAIJ" 3359 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 3360 { 3361 Mat A; 3362 PetscErrorCode ierr; 3363 int fd; 3364 PetscInt i,nz,j,rstart,rend; 3365 PetscScalar *vals,*buf; 3366 MPI_Comm comm = ((PetscObject)viewer)->comm; 3367 MPI_Status status; 3368 PetscMPIInt rank,size,maxnz; 3369 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3370 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3371 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3372 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3373 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3374 PetscInt dcount,kmax,k,nzcount,tmp,mend; 3375 3376 PetscFunctionBegin; 3377 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3378 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3379 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3380 3381 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3382 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3383 if (!rank) { 3384 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3385 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3386 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3387 } 3388 3389 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3390 M = header[1]; N = header[2]; 3391 3392 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3393 3394 /* 3395 This code adds extra rows to make sure the number of rows is 3396 divisible by the blocksize 3397 */ 3398 Mbs = M/bs; 3399 extra_rows = bs - M + bs*Mbs; 3400 if (extra_rows == bs) extra_rows = 0; 3401 else Mbs++; 3402 if (extra_rows && !rank) { 3403 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3404 } 3405 3406 /* determine ownership of all rows */ 3407 mbs = Mbs/size + ((Mbs % size) > rank); 3408 m = mbs*bs; 3409 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3410 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3411 3412 /* process 0 needs enough room for process with most rows */ 3413 if (!rank) { 3414 mmax = rowners[1]; 3415 for (i=2; i<size; i++) { 3416 mmax = PetscMax(mmax,rowners[i]); 3417 } 3418 mmax*=bs; 3419 } else mmax = m; 3420 3421 rowners[0] = 0; 3422 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3423 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3424 rstart = rowners[rank]; 3425 rend = rowners[rank+1]; 3426 3427 /* distribute row lengths to all processors */ 3428 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3429 if (!rank) { 3430 mend = m; 3431 if (size == 1) mend = mend - extra_rows; 3432 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3433 for (j=mend; j<m; j++) locrowlens[j] = 1; 3434 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3435 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3436 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3437 for (j=0; j<m; j++) { 3438 procsnz[0] += locrowlens[j]; 3439 } 3440 for (i=1; i<size; i++) { 3441 mend = browners[i+1] - browners[i]; 3442 if (i == size-1) mend = mend - extra_rows; 3443 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3444 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3445 /* calculate the number of nonzeros on each processor */ 3446 for (j=0; j<browners[i+1]-browners[i]; j++) { 3447 procsnz[i] += rowlengths[j]; 3448 } 3449 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3450 } 3451 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3452 } else { 3453 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3454 } 3455 3456 if (!rank) { 3457 /* determine max buffer needed and allocate it */ 3458 maxnz = procsnz[0]; 3459 for (i=1; i<size; i++) { 3460 maxnz = PetscMax(maxnz,procsnz[i]); 3461 } 3462 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3463 3464 /* read in my part of the matrix column indices */ 3465 nz = procsnz[0]; 3466 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3467 mycols = ibuf; 3468 if (size == 1) nz -= extra_rows; 3469 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3470 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3471 3472 /* read in every ones (except the last) and ship off */ 3473 for (i=1; i<size-1; i++) { 3474 nz = procsnz[i]; 3475 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3476 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3477 } 3478 /* read in the stuff for the last proc */ 3479 if (size != 1) { 3480 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3481 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3482 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3483 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3484 } 3485 ierr = PetscFree(cols);CHKERRQ(ierr); 3486 } else { 3487 /* determine buffer space needed for message */ 3488 nz = 0; 3489 for (i=0; i<m; i++) { 3490 nz += locrowlens[i]; 3491 } 3492 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3493 mycols = ibuf; 3494 /* receive message of column indices*/ 3495 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3496 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3497 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3498 } 3499 3500 /* loop over local rows, determining number of off diagonal entries */ 3501 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3502 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3503 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3504 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3505 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3506 rowcount = 0; nzcount = 0; 3507 for (i=0; i<mbs; i++) { 3508 dcount = 0; 3509 odcount = 0; 3510 for (j=0; j<bs; j++) { 3511 kmax = locrowlens[rowcount]; 3512 for (k=0; k<kmax; k++) { 3513 tmp = mycols[nzcount++]/bs; 3514 if (!mask[tmp]) { 3515 mask[tmp] = 1; 3516 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3517 else masked1[dcount++] = tmp; 3518 } 3519 } 3520 rowcount++; 3521 } 3522 3523 dlens[i] = dcount; 3524 odlens[i] = odcount; 3525 3526 /* zero out the mask elements we set */ 3527 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3528 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3529 } 3530 3531 /* create our matrix */ 3532 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 3533 ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3534 ierr = MatSetType(A,type);CHKERRQ(ierr); 3535 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3536 3537 if (!rank) { 3538 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3539 /* read in my part of the matrix numerical values */ 3540 nz = procsnz[0]; 3541 vals = buf; 3542 mycols = ibuf; 3543 if (size == 1) nz -= extra_rows; 3544 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3545 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3546 3547 /* insert into matrix */ 3548 jj = rstart*bs; 3549 for (i=0; i<m; i++) { 3550 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3551 mycols += locrowlens[i]; 3552 vals += locrowlens[i]; 3553 jj++; 3554 } 3555 /* read in other processors (except the last one) and ship out */ 3556 for (i=1; i<size-1; i++) { 3557 nz = procsnz[i]; 3558 vals = buf; 3559 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3560 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3561 } 3562 /* the last proc */ 3563 if (size != 1){ 3564 nz = procsnz[i] - extra_rows; 3565 vals = buf; 3566 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3567 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3568 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3569 } 3570 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3571 } else { 3572 /* receive numeric values */ 3573 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3574 3575 /* receive message of values*/ 3576 vals = buf; 3577 mycols = ibuf; 3578 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 3579 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3580 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3581 3582 /* insert into matrix */ 3583 jj = rstart*bs; 3584 for (i=0; i<m; i++) { 3585 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3586 mycols += locrowlens[i]; 3587 vals += locrowlens[i]; 3588 jj++; 3589 } 3590 } 3591 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3592 ierr = PetscFree(buf);CHKERRQ(ierr); 3593 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3594 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3595 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3596 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3597 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3598 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3599 3600 *newmat = A; 3601 PetscFunctionReturn(0); 3602 } 3603 3604 #undef __FUNCT__ 3605 #define __FUNCT__ "MatLoadnew_MPIBAIJ" 3606 PetscErrorCode MatLoadnew_MPIBAIJ(PetscViewer viewer, Mat newmat) 3607 { 3608 PetscErrorCode ierr; 3609 int fd; 3610 PetscInt i,nz,j,rstart,rend; 3611 PetscScalar *vals,*buf; 3612 MPI_Comm comm = ((PetscObject)viewer)->comm; 3613 MPI_Status status; 3614 PetscMPIInt rank,size,maxnz; 3615 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3616 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3617 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3618 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3619 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3620 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3621 3622 PetscFunctionBegin; 3623 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3624 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3625 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3626 3627 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3628 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3629 if (!rank) { 3630 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3631 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3632 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3633 } 3634 3635 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3636 3637 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3638 M = header[1]; N = header[2]; 3639 3640 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3641 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3642 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3643 3644 /* If global sizes are set, check if they are consistent with that given in the file */ 3645 if (sizesset) { 3646 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3647 } 3648 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Incostintent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3649 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Incostintent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3650 3651 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3652 3653 /* 3654 This code adds extra rows to make sure the number of rows is 3655 divisible by the blocksize 3656 */ 3657 Mbs = M/bs; 3658 extra_rows = bs - M + bs*Mbs; 3659 if (extra_rows == bs) extra_rows = 0; 3660 else Mbs++; 3661 if (extra_rows && !rank) { 3662 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3663 } 3664 3665 /* determine ownership of all rows */ 3666 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3667 mbs = Mbs/size + ((Mbs % size) > rank); 3668 m = mbs*bs; 3669 } else { /* User set */ 3670 m = newmat->rmap->n; 3671 mbs = m/bs; 3672 } 3673 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3674 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3675 3676 /* process 0 needs enough room for process with most rows */ 3677 if (!rank) { 3678 mmax = rowners[1]; 3679 for (i=2; i<size; i++) { 3680 mmax = PetscMax(mmax,rowners[i]); 3681 } 3682 mmax*=bs; 3683 } else mmax = m; 3684 3685 rowners[0] = 0; 3686 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3687 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3688 rstart = rowners[rank]; 3689 rend = rowners[rank+1]; 3690 3691 /* distribute row lengths to all processors */ 3692 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3693 if (!rank) { 3694 mend = m; 3695 if (size == 1) mend = mend - extra_rows; 3696 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3697 for (j=mend; j<m; j++) locrowlens[j] = 1; 3698 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3699 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3700 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3701 for (j=0; j<m; j++) { 3702 procsnz[0] += locrowlens[j]; 3703 } 3704 for (i=1; i<size; i++) { 3705 mend = browners[i+1] - browners[i]; 3706 if (i == size-1) mend = mend - extra_rows; 3707 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3708 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3709 /* calculate the number of nonzeros on each processor */ 3710 for (j=0; j<browners[i+1]-browners[i]; j++) { 3711 procsnz[i] += rowlengths[j]; 3712 } 3713 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3714 } 3715 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3716 } else { 3717 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3718 } 3719 3720 if (!rank) { 3721 /* determine max buffer needed and allocate it */ 3722 maxnz = procsnz[0]; 3723 for (i=1; i<size; i++) { 3724 maxnz = PetscMax(maxnz,procsnz[i]); 3725 } 3726 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3727 3728 /* read in my part of the matrix column indices */ 3729 nz = procsnz[0]; 3730 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3731 mycols = ibuf; 3732 if (size == 1) nz -= extra_rows; 3733 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3734 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3735 3736 /* read in every ones (except the last) and ship off */ 3737 for (i=1; i<size-1; i++) { 3738 nz = procsnz[i]; 3739 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3740 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3741 } 3742 /* read in the stuff for the last proc */ 3743 if (size != 1) { 3744 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3745 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3746 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3747 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3748 } 3749 ierr = PetscFree(cols);CHKERRQ(ierr); 3750 } else { 3751 /* determine buffer space needed for message */ 3752 nz = 0; 3753 for (i=0; i<m; i++) { 3754 nz += locrowlens[i]; 3755 } 3756 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3757 mycols = ibuf; 3758 /* receive message of column indices*/ 3759 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3760 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3761 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3762 } 3763 3764 /* loop over local rows, determining number of off diagonal entries */ 3765 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3766 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3767 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3768 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3769 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3770 rowcount = 0; nzcount = 0; 3771 for (i=0; i<mbs; i++) { 3772 dcount = 0; 3773 odcount = 0; 3774 for (j=0; j<bs; j++) { 3775 kmax = locrowlens[rowcount]; 3776 for (k=0; k<kmax; k++) { 3777 tmp = mycols[nzcount++]/bs; 3778 if (!mask[tmp]) { 3779 mask[tmp] = 1; 3780 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3781 else masked1[dcount++] = tmp; 3782 } 3783 } 3784 rowcount++; 3785 } 3786 3787 dlens[i] = dcount; 3788 odlens[i] = odcount; 3789 3790 /* zero out the mask elements we set */ 3791 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3792 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3793 } 3794 3795 3796 if (!sizesset) { 3797 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3798 } 3799 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3800 3801 if (!rank) { 3802 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3803 /* read in my part of the matrix numerical values */ 3804 nz = procsnz[0]; 3805 vals = buf; 3806 mycols = ibuf; 3807 if (size == 1) nz -= extra_rows; 3808 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3809 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3810 3811 /* insert into matrix */ 3812 jj = rstart*bs; 3813 for (i=0; i<m; i++) { 3814 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3815 mycols += locrowlens[i]; 3816 vals += locrowlens[i]; 3817 jj++; 3818 } 3819 /* read in other processors (except the last one) and ship out */ 3820 for (i=1; i<size-1; i++) { 3821 nz = procsnz[i]; 3822 vals = buf; 3823 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3824 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3825 } 3826 /* the last proc */ 3827 if (size != 1){ 3828 nz = procsnz[i] - extra_rows; 3829 vals = buf; 3830 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3831 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3832 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3833 } 3834 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3835 } else { 3836 /* receive numeric values */ 3837 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3838 3839 /* receive message of values*/ 3840 vals = buf; 3841 mycols = ibuf; 3842 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 3843 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3844 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3845 3846 /* insert into matrix */ 3847 jj = rstart*bs; 3848 for (i=0; i<m; i++) { 3849 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3850 mycols += locrowlens[i]; 3851 vals += locrowlens[i]; 3852 jj++; 3853 } 3854 } 3855 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3856 ierr = PetscFree(buf);CHKERRQ(ierr); 3857 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3858 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3859 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3860 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3861 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3862 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3863 3864 PetscFunctionReturn(0); 3865 } 3866 3867 #undef __FUNCT__ 3868 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3869 /*@ 3870 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3871 3872 Input Parameters: 3873 . mat - the matrix 3874 . fact - factor 3875 3876 Collective on Mat 3877 3878 Level: advanced 3879 3880 Notes: 3881 This can also be set by the command line option: -mat_use_hash_table <fact> 3882 3883 .keywords: matrix, hashtable, factor, HT 3884 3885 .seealso: MatSetOption() 3886 @*/ 3887 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3888 { 3889 PetscErrorCode ierr,(*f)(Mat,PetscReal); 3890 3891 PetscFunctionBegin; 3892 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 3893 if (f) { 3894 ierr = (*f)(mat,fact);CHKERRQ(ierr); 3895 } 3896 PetscFunctionReturn(0); 3897 } 3898 3899 EXTERN_C_BEGIN 3900 #undef __FUNCT__ 3901 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3902 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3903 { 3904 Mat_MPIBAIJ *baij; 3905 3906 PetscFunctionBegin; 3907 baij = (Mat_MPIBAIJ*)mat->data; 3908 baij->ht_fact = fact; 3909 PetscFunctionReturn(0); 3910 } 3911 EXTERN_C_END 3912 3913 #undef __FUNCT__ 3914 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3915 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3916 { 3917 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3918 PetscFunctionBegin; 3919 *Ad = a->A; 3920 *Ao = a->B; 3921 *colmap = a->garray; 3922 PetscFunctionReturn(0); 3923 } 3924 3925 /* 3926 Special version for direct calls from Fortran (to eliminate two function call overheads 3927 */ 3928 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3929 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3930 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3931 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3932 #endif 3933 3934 #undef __FUNCT__ 3935 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3936 /*@C 3937 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3938 3939 Collective on Mat 3940 3941 Input Parameters: 3942 + mat - the matrix 3943 . min - number of input rows 3944 . im - input rows 3945 . nin - number of input columns 3946 . in - input columns 3947 . v - numerical values input 3948 - addvin - INSERT_VALUES or ADD_VALUES 3949 3950 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3951 3952 Level: advanced 3953 3954 .seealso: MatSetValuesBlocked() 3955 @*/ 3956 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3957 { 3958 /* convert input arguments to C version */ 3959 Mat mat = *matin; 3960 PetscInt m = *min, n = *nin; 3961 InsertMode addv = *addvin; 3962 3963 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3964 const MatScalar *value; 3965 MatScalar *barray=baij->barray; 3966 PetscTruth roworiented = baij->roworiented; 3967 PetscErrorCode ierr; 3968 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3969 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3970 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3971 3972 PetscFunctionBegin; 3973 /* tasks normally handled by MatSetValuesBlocked() */ 3974 if (mat->insertmode == NOT_SET_VALUES) { 3975 mat->insertmode = addv; 3976 } 3977 #if defined(PETSC_USE_DEBUG) 3978 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3979 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3980 #endif 3981 if (mat->assembled) { 3982 mat->was_assembled = PETSC_TRUE; 3983 mat->assembled = PETSC_FALSE; 3984 } 3985 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3986 3987 3988 if(!barray) { 3989 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3990 baij->barray = barray; 3991 } 3992 3993 if (roworiented) { 3994 stepval = (n-1)*bs; 3995 } else { 3996 stepval = (m-1)*bs; 3997 } 3998 for (i=0; i<m; i++) { 3999 if (im[i] < 0) continue; 4000 #if defined(PETSC_USE_DEBUG) 4001 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 4002 #endif 4003 if (im[i] >= rstart && im[i] < rend) { 4004 row = im[i] - rstart; 4005 for (j=0; j<n; j++) { 4006 /* If NumCol = 1 then a copy is not required */ 4007 if ((roworiented) && (n == 1)) { 4008 barray = (MatScalar*)v + i*bs2; 4009 } else if((!roworiented) && (m == 1)) { 4010 barray = (MatScalar*)v + j*bs2; 4011 } else { /* Here a copy is required */ 4012 if (roworiented) { 4013 value = v + i*(stepval+bs)*bs + j*bs; 4014 } else { 4015 value = v + j*(stepval+bs)*bs + i*bs; 4016 } 4017 for (ii=0; ii<bs; ii++,value+=stepval) { 4018 for (jj=0; jj<bs; jj++) { 4019 *barray++ = *value++; 4020 } 4021 } 4022 barray -=bs2; 4023 } 4024 4025 if (in[j] >= cstart && in[j] < cend){ 4026 col = in[j] - cstart; 4027 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4028 } 4029 else if (in[j] < 0) continue; 4030 #if defined(PETSC_USE_DEBUG) 4031 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 4032 #endif 4033 else { 4034 if (mat->was_assembled) { 4035 if (!baij->colmap) { 4036 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 4037 } 4038 4039 #if defined(PETSC_USE_DEBUG) 4040 #if defined (PETSC_USE_CTABLE) 4041 { PetscInt data; 4042 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 4043 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4044 } 4045 #else 4046 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4047 #endif 4048 #endif 4049 #if defined (PETSC_USE_CTABLE) 4050 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4051 col = (col - 1)/bs; 4052 #else 4053 col = (baij->colmap[in[j]] - 1)/bs; 4054 #endif 4055 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 4056 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 4057 col = in[j]; 4058 } 4059 } 4060 else col = in[j]; 4061 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4062 } 4063 } 4064 } else { 4065 if (!baij->donotstash) { 4066 if (roworiented) { 4067 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4068 } else { 4069 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4070 } 4071 } 4072 } 4073 } 4074 4075 /* task normally handled by MatSetValuesBlocked() */ 4076 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4077 PetscFunctionReturn(0); 4078 } 4079