1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 8 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 9 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 10 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 11 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 12 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 13 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 16 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 19 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 20 { 21 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 22 PetscErrorCode ierr; 23 PetscInt i,*idxb = 0; 24 PetscScalar *va,*vb; 25 Vec vtmp; 26 27 PetscFunctionBegin; 28 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 29 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 30 if (idx) { 31 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 32 } 33 34 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 35 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 36 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 37 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 38 39 for (i=0; i<A->rmap->n; i++){ 40 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 41 } 42 43 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 44 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 45 if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);} 46 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 47 PetscFunctionReturn(0); 48 } 49 50 EXTERN_C_BEGIN 51 #undef __FUNCT__ 52 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 53 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 54 { 55 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 56 PetscErrorCode ierr; 57 58 PetscFunctionBegin; 59 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 60 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 61 PetscFunctionReturn(0); 62 } 63 EXTERN_C_END 64 65 EXTERN_C_BEGIN 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 68 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 69 { 70 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 75 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 76 PetscFunctionReturn(0); 77 } 78 EXTERN_C_END 79 80 /* 81 Local utility routine that creates a mapping from the global column 82 number to the local number in the off-diagonal part of the local 83 storage of the matrix. This is done in a non scalable way since the 84 length of colmap equals the global matrix length. 85 */ 86 #undef __FUNCT__ 87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 89 { 90 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 91 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 92 PetscErrorCode ierr; 93 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 94 95 PetscFunctionBegin; 96 #if defined (PETSC_USE_CTABLE) 97 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 98 for (i=0; i<nbs; i++){ 99 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 100 } 101 #else 102 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 103 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 105 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 106 #endif 107 PetscFunctionReturn(0); 108 } 109 110 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 111 { \ 112 \ 113 brow = row/bs; \ 114 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 115 rmax = aimax[brow]; nrow = ailen[brow]; \ 116 bcol = col/bs; \ 117 ridx = row % bs; cidx = col % bs; \ 118 low = 0; high = nrow; \ 119 while (high-low > 3) { \ 120 t = (low+high)/2; \ 121 if (rp[t] > bcol) high = t; \ 122 else low = t; \ 123 } \ 124 for (_i=low; _i<high; _i++) { \ 125 if (rp[_i] > bcol) break; \ 126 if (rp[_i] == bcol) { \ 127 bap = ap + bs2*_i + bs*cidx + ridx; \ 128 if (addv == ADD_VALUES) *bap += value; \ 129 else *bap = value; \ 130 goto a_noinsert; \ 131 } \ 132 } \ 133 if (a->nonew == 1) goto a_noinsert; \ 134 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 135 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 136 N = nrow++ - 1; \ 137 /* shift up all the later entries in this row */ \ 138 for (ii=N; ii>=_i; ii--) { \ 139 rp[ii+1] = rp[ii]; \ 140 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 141 } \ 142 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 143 rp[_i] = bcol; \ 144 ap[bs2*_i + bs*cidx + ridx] = value; \ 145 a_noinsert:; \ 146 ailen[brow] = nrow; \ 147 } 148 149 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 150 { \ 151 brow = row/bs; \ 152 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 153 rmax = bimax[brow]; nrow = bilen[brow]; \ 154 bcol = col/bs; \ 155 ridx = row % bs; cidx = col % bs; \ 156 low = 0; high = nrow; \ 157 while (high-low > 3) { \ 158 t = (low+high)/2; \ 159 if (rp[t] > bcol) high = t; \ 160 else low = t; \ 161 } \ 162 for (_i=low; _i<high; _i++) { \ 163 if (rp[_i] > bcol) break; \ 164 if (rp[_i] == bcol) { \ 165 bap = ap + bs2*_i + bs*cidx + ridx; \ 166 if (addv == ADD_VALUES) *bap += value; \ 167 else *bap = value; \ 168 goto b_noinsert; \ 169 } \ 170 } \ 171 if (b->nonew == 1) goto b_noinsert; \ 172 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 173 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 174 CHKMEMQ;\ 175 N = nrow++ - 1; \ 176 /* shift up all the later entries in this row */ \ 177 for (ii=N; ii>=_i; ii--) { \ 178 rp[ii+1] = rp[ii]; \ 179 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 180 } \ 181 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 182 rp[_i] = bcol; \ 183 ap[bs2*_i + bs*cidx + ridx] = value; \ 184 b_noinsert:; \ 185 bilen[brow] = nrow; \ 186 } 187 188 #undef __FUNCT__ 189 #define __FUNCT__ "MatSetValues_MPIBAIJ" 190 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 191 { 192 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 193 MatScalar value; 194 PetscTruth roworiented = baij->roworiented; 195 PetscErrorCode ierr; 196 PetscInt i,j,row,col; 197 PetscInt rstart_orig=mat->rmap->rstart; 198 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 199 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 200 201 /* Some Variables required in the macro */ 202 Mat A = baij->A; 203 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 204 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 205 MatScalar *aa=a->a; 206 207 Mat B = baij->B; 208 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 209 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 210 MatScalar *ba=b->a; 211 212 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 213 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 214 MatScalar *ap,*bap; 215 216 PetscFunctionBegin; 217 if (v) PetscValidScalarPointer(v,6); 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_DEBUG) 221 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 222 #endif 223 if (im[i] >= rstart_orig && im[i] < rend_orig) { 224 row = im[i] - rstart_orig; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart_orig && in[j] < cend_orig){ 227 col = in[j] - cstart_orig; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 230 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 231 } else if (in[j] < 0) continue; 232 #if defined(PETSC_USE_DEBUG) 233 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1); 234 #endif 235 else { 236 if (mat->was_assembled) { 237 if (!baij->colmap) { 238 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 239 } 240 #if defined (PETSC_USE_CTABLE) 241 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 242 col = col - 1; 243 #else 244 col = baij->colmap[in[j]/bs] - 1; 245 #endif 246 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 247 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 248 col = in[j]; 249 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 250 B = baij->B; 251 b = (Mat_SeqBAIJ*)(B)->data; 252 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 253 ba=b->a; 254 } else col += in[j]%bs; 255 } else col = in[j]; 256 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 257 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 258 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 259 } 260 } 261 } else { 262 if (!baij->donotstash) { 263 if (roworiented) { 264 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 265 } else { 266 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 267 } 268 } 269 } 270 } 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 276 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 277 { 278 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 279 const PetscScalar *value; 280 MatScalar *barray=baij->barray; 281 PetscTruth roworiented = baij->roworiented; 282 PetscErrorCode ierr; 283 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 284 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 285 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 286 287 PetscFunctionBegin; 288 if(!barray) { 289 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 290 baij->barray = barray; 291 } 292 293 if (roworiented) { 294 stepval = (n-1)*bs; 295 } else { 296 stepval = (m-1)*bs; 297 } 298 for (i=0; i<m; i++) { 299 if (im[i] < 0) continue; 300 #if defined(PETSC_USE_DEBUG) 301 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 302 #endif 303 if (im[i] >= rstart && im[i] < rend) { 304 row = im[i] - rstart; 305 for (j=0; j<n; j++) { 306 /* If NumCol = 1 then a copy is not required */ 307 if ((roworiented) && (n == 1)) { 308 barray = (MatScalar*)v + i*bs2; 309 } else if((!roworiented) && (m == 1)) { 310 barray = (MatScalar*)v + j*bs2; 311 } else { /* Here a copy is required */ 312 if (roworiented) { 313 value = v + i*(stepval+bs)*bs + j*bs; 314 } else { 315 value = v + j*(stepval+bs)*bs + i*bs; 316 } 317 for (ii=0; ii<bs; ii++,value+=stepval) { 318 for (jj=0; jj<bs; jj++) { 319 *barray++ = *value++; 320 } 321 } 322 barray -=bs2; 323 } 324 325 if (in[j] >= cstart && in[j] < cend){ 326 col = in[j] - cstart; 327 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 328 } 329 else if (in[j] < 0) continue; 330 #if defined(PETSC_USE_DEBUG) 331 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 332 #endif 333 else { 334 if (mat->was_assembled) { 335 if (!baij->colmap) { 336 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 337 } 338 339 #if defined(PETSC_USE_DEBUG) 340 #if defined (PETSC_USE_CTABLE) 341 { PetscInt data; 342 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 343 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 344 } 345 #else 346 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 347 #endif 348 #endif 349 #if defined (PETSC_USE_CTABLE) 350 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 351 col = (col - 1)/bs; 352 #else 353 col = (baij->colmap[in[j]] - 1)/bs; 354 #endif 355 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 356 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 357 col = in[j]; 358 } 359 } 360 else col = in[j]; 361 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 362 } 363 } 364 } else { 365 if (!baij->donotstash) { 366 if (roworiented) { 367 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 368 } else { 369 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 370 } 371 } 372 } 373 } 374 PetscFunctionReturn(0); 375 } 376 377 #define HASH_KEY 0.6180339887 378 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 379 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 380 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 381 #undef __FUNCT__ 382 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 383 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 384 { 385 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 386 PetscTruth roworiented = baij->roworiented; 387 PetscErrorCode ierr; 388 PetscInt i,j,row,col; 389 PetscInt rstart_orig=mat->rmap->rstart; 390 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 391 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 392 PetscReal tmp; 393 MatScalar **HD = baij->hd,value; 394 #if defined(PETSC_USE_DEBUG) 395 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 396 #endif 397 398 PetscFunctionBegin; 399 if (v) PetscValidScalarPointer(v,6); 400 for (i=0; i<m; i++) { 401 #if defined(PETSC_USE_DEBUG) 402 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 403 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 404 #endif 405 row = im[i]; 406 if (row >= rstart_orig && row < rend_orig) { 407 for (j=0; j<n; j++) { 408 col = in[j]; 409 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 410 /* Look up PetscInto the Hash Table */ 411 key = (row/bs)*Nbs+(col/bs)+1; 412 h1 = HASH(size,key,tmp); 413 414 415 idx = h1; 416 #if defined(PETSC_USE_DEBUG) 417 insert_ct++; 418 total_ct++; 419 if (HT[idx] != key) { 420 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 421 if (idx == size) { 422 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 423 if (idx == h1) { 424 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 425 } 426 } 427 } 428 #else 429 if (HT[idx] != key) { 430 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 431 if (idx == size) { 432 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 433 if (idx == h1) { 434 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 435 } 436 } 437 } 438 #endif 439 /* A HASH table entry is found, so insert the values at the correct address */ 440 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 441 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 442 } 443 } else { 444 if (!baij->donotstash) { 445 if (roworiented) { 446 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 447 } else { 448 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 449 } 450 } 451 } 452 } 453 #if defined(PETSC_USE_DEBUG) 454 baij->ht_total_ct = total_ct; 455 baij->ht_insert_ct = insert_ct; 456 #endif 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 463 { 464 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 465 PetscTruth roworiented = baij->roworiented; 466 PetscErrorCode ierr; 467 PetscInt i,j,ii,jj,row,col; 468 PetscInt rstart=baij->rstartbs; 469 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 470 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 471 PetscReal tmp; 472 MatScalar **HD = baij->hd,*baij_a; 473 const PetscScalar *v_t,*value; 474 #if defined(PETSC_USE_DEBUG) 475 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 476 #endif 477 478 PetscFunctionBegin; 479 480 if (roworiented) { 481 stepval = (n-1)*bs; 482 } else { 483 stepval = (m-1)*bs; 484 } 485 for (i=0; i<m; i++) { 486 #if defined(PETSC_USE_DEBUG) 487 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 488 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 489 #endif 490 row = im[i]; 491 v_t = v + i*nbs2; 492 if (row >= rstart && row < rend) { 493 for (j=0; j<n; j++) { 494 col = in[j]; 495 496 /* Look up into the Hash Table */ 497 key = row*Nbs+col+1; 498 h1 = HASH(size,key,tmp); 499 500 idx = h1; 501 #if defined(PETSC_USE_DEBUG) 502 total_ct++; 503 insert_ct++; 504 if (HT[idx] != key) { 505 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 506 if (idx == size) { 507 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 508 if (idx == h1) { 509 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 510 } 511 } 512 } 513 #else 514 if (HT[idx] != key) { 515 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 516 if (idx == size) { 517 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 518 if (idx == h1) { 519 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 520 } 521 } 522 } 523 #endif 524 baij_a = HD[idx]; 525 if (roworiented) { 526 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 527 /* value = v + (i*(stepval+bs)+j)*bs; */ 528 value = v_t; 529 v_t += bs; 530 if (addv == ADD_VALUES) { 531 for (ii=0; ii<bs; ii++,value+=stepval) { 532 for (jj=ii; jj<bs2; jj+=bs) { 533 baij_a[jj] += *value++; 534 } 535 } 536 } else { 537 for (ii=0; ii<bs; ii++,value+=stepval) { 538 for (jj=ii; jj<bs2; jj+=bs) { 539 baij_a[jj] = *value++; 540 } 541 } 542 } 543 } else { 544 value = v + j*(stepval+bs)*bs + i*bs; 545 if (addv == ADD_VALUES) { 546 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 547 for (jj=0; jj<bs; jj++) { 548 baij_a[jj] += *value++; 549 } 550 } 551 } else { 552 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 553 for (jj=0; jj<bs; jj++) { 554 baij_a[jj] = *value++; 555 } 556 } 557 } 558 } 559 } 560 } else { 561 if (!baij->donotstash) { 562 if (roworiented) { 563 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 564 } else { 565 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 566 } 567 } 568 } 569 } 570 #if defined(PETSC_USE_DEBUG) 571 baij->ht_total_ct = total_ct; 572 baij->ht_insert_ct = insert_ct; 573 #endif 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatGetValues_MPIBAIJ" 579 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 580 { 581 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 582 PetscErrorCode ierr; 583 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 584 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 585 586 PetscFunctionBegin; 587 for (i=0; i<m; i++) { 588 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 589 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 590 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 591 row = idxm[i] - bsrstart; 592 for (j=0; j<n; j++) { 593 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 594 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 595 if (idxn[j] >= bscstart && idxn[j] < bscend){ 596 col = idxn[j] - bscstart; 597 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 598 } else { 599 if (!baij->colmap) { 600 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 601 } 602 #if defined (PETSC_USE_CTABLE) 603 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 604 data --; 605 #else 606 data = baij->colmap[idxn[j]/bs]-1; 607 #endif 608 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 609 else { 610 col = data + idxn[j]%bs; 611 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 612 } 613 } 614 } 615 } else { 616 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 617 } 618 } 619 PetscFunctionReturn(0); 620 } 621 622 #undef __FUNCT__ 623 #define __FUNCT__ "MatNorm_MPIBAIJ" 624 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 625 { 626 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 627 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 628 PetscErrorCode ierr; 629 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 630 PetscReal sum = 0.0; 631 MatScalar *v; 632 633 PetscFunctionBegin; 634 if (baij->size == 1) { 635 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 636 } else { 637 if (type == NORM_FROBENIUS) { 638 v = amat->a; 639 nz = amat->nz*bs2; 640 for (i=0; i<nz; i++) { 641 #if defined(PETSC_USE_COMPLEX) 642 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 643 #else 644 sum += (*v)*(*v); v++; 645 #endif 646 } 647 v = bmat->a; 648 nz = bmat->nz*bs2; 649 for (i=0; i<nz; i++) { 650 #if defined(PETSC_USE_COMPLEX) 651 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 652 #else 653 sum += (*v)*(*v); v++; 654 #endif 655 } 656 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 657 *nrm = sqrt(*nrm); 658 } else if (type == NORM_1) { /* max column sum */ 659 PetscReal *tmp,*tmp2; 660 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 661 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 662 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 663 v = amat->a; jj = amat->j; 664 for (i=0; i<amat->nz; i++) { 665 for (j=0; j<bs; j++){ 666 col = bs*(cstart + *jj) + j; /* column index */ 667 for (row=0; row<bs; row++){ 668 tmp[col] += PetscAbsScalar(*v); v++; 669 } 670 } 671 jj++; 672 } 673 v = bmat->a; jj = bmat->j; 674 for (i=0; i<bmat->nz; i++) { 675 for (j=0; j<bs; j++){ 676 col = bs*garray[*jj] + j; 677 for (row=0; row<bs; row++){ 678 tmp[col] += PetscAbsScalar(*v); v++; 679 } 680 } 681 jj++; 682 } 683 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 684 *nrm = 0.0; 685 for (j=0; j<mat->cmap->N; j++) { 686 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 687 } 688 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 689 } else if (type == NORM_INFINITY) { /* max row sum */ 690 PetscReal *sums; 691 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 692 sum = 0.0; 693 for (j=0; j<amat->mbs; j++) { 694 for (row=0; row<bs; row++) sums[row] = 0.0; 695 v = amat->a + bs2*amat->i[j]; 696 nz = amat->i[j+1]-amat->i[j]; 697 for (i=0; i<nz; i++) { 698 for (col=0; col<bs; col++){ 699 for (row=0; row<bs; row++){ 700 sums[row] += PetscAbsScalar(*v); v++; 701 } 702 } 703 } 704 v = bmat->a + bs2*bmat->i[j]; 705 nz = bmat->i[j+1]-bmat->i[j]; 706 for (i=0; i<nz; i++) { 707 for (col=0; col<bs; col++){ 708 for (row=0; row<bs; row++){ 709 sums[row] += PetscAbsScalar(*v); v++; 710 } 711 } 712 } 713 for (row=0; row<bs; row++){ 714 if (sums[row] > sum) sum = sums[row]; 715 } 716 } 717 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 718 ierr = PetscFree(sums);CHKERRQ(ierr); 719 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 720 } 721 PetscFunctionReturn(0); 722 } 723 724 /* 725 Creates the hash table, and sets the table 726 This table is created only once. 727 If new entried need to be added to the matrix 728 then the hash table has to be destroyed and 729 recreated. 730 */ 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 733 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 734 { 735 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 736 Mat A = baij->A,B=baij->B; 737 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 738 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 739 PetscErrorCode ierr; 740 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 741 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 742 PetscInt *HT,key; 743 MatScalar **HD; 744 PetscReal tmp; 745 #if defined(PETSC_USE_INFO) 746 PetscInt ct=0,max=0; 747 #endif 748 749 PetscFunctionBegin; 750 if (baij->ht) PetscFunctionReturn(0); 751 752 baij->ht_size = (PetscInt)(factor*nz); 753 ht_size = baij->ht_size; 754 755 /* Allocate Memory for Hash Table */ 756 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 757 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 758 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 759 HD = baij->hd; 760 HT = baij->ht; 761 762 /* Loop Over A */ 763 for (i=0; i<a->mbs; i++) { 764 for (j=ai[i]; j<ai[i+1]; j++) { 765 row = i+rstart; 766 col = aj[j]+cstart; 767 768 key = row*Nbs + col + 1; 769 h1 = HASH(ht_size,key,tmp); 770 for (k=0; k<ht_size; k++){ 771 if (!HT[(h1+k)%ht_size]) { 772 HT[(h1+k)%ht_size] = key; 773 HD[(h1+k)%ht_size] = a->a + j*bs2; 774 break; 775 #if defined(PETSC_USE_INFO) 776 } else { 777 ct++; 778 #endif 779 } 780 } 781 #if defined(PETSC_USE_INFO) 782 if (k> max) max = k; 783 #endif 784 } 785 } 786 /* Loop Over B */ 787 for (i=0; i<b->mbs; i++) { 788 for (j=bi[i]; j<bi[i+1]; j++) { 789 row = i+rstart; 790 col = garray[bj[j]]; 791 key = row*Nbs + col + 1; 792 h1 = HASH(ht_size,key,tmp); 793 for (k=0; k<ht_size; k++){ 794 if (!HT[(h1+k)%ht_size]) { 795 HT[(h1+k)%ht_size] = key; 796 HD[(h1+k)%ht_size] = b->a + j*bs2; 797 break; 798 #if defined(PETSC_USE_INFO) 799 } else { 800 ct++; 801 #endif 802 } 803 } 804 #if defined(PETSC_USE_INFO) 805 if (k> max) max = k; 806 #endif 807 } 808 } 809 810 /* Print Summary */ 811 #if defined(PETSC_USE_INFO) 812 for (i=0,j=0; i<ht_size; i++) { 813 if (HT[i]) {j++;} 814 } 815 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 816 #endif 817 PetscFunctionReturn(0); 818 } 819 820 #undef __FUNCT__ 821 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 822 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 823 { 824 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 825 PetscErrorCode ierr; 826 PetscInt nstash,reallocs; 827 InsertMode addv; 828 829 PetscFunctionBegin; 830 if (baij->donotstash) { 831 PetscFunctionReturn(0); 832 } 833 834 /* make sure all processors are either in INSERTMODE or ADDMODE */ 835 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 836 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 837 mat->insertmode = addv; /* in case this processor had no cache */ 838 839 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 840 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 841 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 842 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 843 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 844 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 845 PetscFunctionReturn(0); 846 } 847 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 850 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 851 { 852 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 853 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 854 PetscErrorCode ierr; 855 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 856 PetscInt *row,*col; 857 PetscTruth r1,r2,r3,other_disassembled; 858 MatScalar *val; 859 InsertMode addv = mat->insertmode; 860 PetscMPIInt n; 861 862 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 863 PetscFunctionBegin; 864 if (!baij->donotstash) { 865 while (1) { 866 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 867 if (!flg) break; 868 869 for (i=0; i<n;) { 870 /* Now identify the consecutive vals belonging to the same row */ 871 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 872 if (j < n) ncols = j-i; 873 else ncols = n-i; 874 /* Now assemble all these values with a single function call */ 875 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 876 i = j; 877 } 878 } 879 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 880 /* Now process the block-stash. Since the values are stashed column-oriented, 881 set the roworiented flag to column oriented, and after MatSetValues() 882 restore the original flags */ 883 r1 = baij->roworiented; 884 r2 = a->roworiented; 885 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 886 baij->roworiented = PETSC_FALSE; 887 a->roworiented = PETSC_FALSE; 888 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 889 while (1) { 890 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 891 if (!flg) break; 892 893 for (i=0; i<n;) { 894 /* Now identify the consecutive vals belonging to the same row */ 895 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 896 if (j < n) ncols = j-i; 897 else ncols = n-i; 898 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 899 i = j; 900 } 901 } 902 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 903 baij->roworiented = r1; 904 a->roworiented = r2; 905 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 906 } 907 908 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 909 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 910 911 /* determine if any processor has disassembled, if so we must 912 also disassemble ourselfs, in order that we may reassemble. */ 913 /* 914 if nonzero structure of submatrix B cannot change then we know that 915 no processor disassembled thus we can skip this stuff 916 */ 917 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 918 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 919 if (mat->was_assembled && !other_disassembled) { 920 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 921 } 922 } 923 924 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 925 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 926 } 927 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 928 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 929 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 930 931 #if defined(PETSC_USE_INFO) 932 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 933 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 934 baij->ht_total_ct = 0; 935 baij->ht_insert_ct = 0; 936 } 937 #endif 938 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 939 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 940 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 941 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 942 } 943 944 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 945 baij->rowvalues = 0; 946 PetscFunctionReturn(0); 947 } 948 949 #undef __FUNCT__ 950 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 951 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 952 { 953 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 954 PetscErrorCode ierr; 955 PetscMPIInt size = baij->size,rank = baij->rank; 956 PetscInt bs = mat->rmap->bs; 957 PetscTruth iascii,isdraw; 958 PetscViewer sviewer; 959 PetscViewerFormat format; 960 961 PetscFunctionBegin; 962 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 963 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 964 if (iascii) { 965 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 966 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 967 MatInfo info; 968 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 969 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 970 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 971 rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 972 mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 973 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 974 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 975 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 976 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 977 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 978 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 979 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 980 PetscFunctionReturn(0); 981 } else if (format == PETSC_VIEWER_ASCII_INFO) { 982 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 983 PetscFunctionReturn(0); 984 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 985 PetscFunctionReturn(0); 986 } 987 } 988 989 if (isdraw) { 990 PetscDraw draw; 991 PetscTruth isnull; 992 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 993 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 994 } 995 996 if (size == 1) { 997 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 998 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 999 } else { 1000 /* assemble the entire matrix onto first processor. */ 1001 Mat A; 1002 Mat_SeqBAIJ *Aloc; 1003 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1004 MatScalar *a; 1005 1006 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1007 /* Perhaps this should be the type of mat? */ 1008 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1009 if (!rank) { 1010 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1011 } else { 1012 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1013 } 1014 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1015 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1016 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1017 1018 /* copy over the A part */ 1019 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1020 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1021 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1022 1023 for (i=0; i<mbs; i++) { 1024 rvals[0] = bs*(baij->rstartbs + i); 1025 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1026 for (j=ai[i]; j<ai[i+1]; j++) { 1027 col = (baij->cstartbs+aj[j])*bs; 1028 for (k=0; k<bs; k++) { 1029 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1030 col++; a += bs; 1031 } 1032 } 1033 } 1034 /* copy over the B part */ 1035 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1036 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1037 for (i=0; i<mbs; i++) { 1038 rvals[0] = bs*(baij->rstartbs + i); 1039 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1040 for (j=ai[i]; j<ai[i+1]; j++) { 1041 col = baij->garray[aj[j]]*bs; 1042 for (k=0; k<bs; k++) { 1043 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1044 col++; a += bs; 1045 } 1046 } 1047 } 1048 ierr = PetscFree(rvals);CHKERRQ(ierr); 1049 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1050 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1051 /* 1052 Everyone has to call to draw the matrix since the graphics waits are 1053 synchronized across all processors that share the PetscDraw object 1054 */ 1055 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1056 if (!rank) { 1057 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1058 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1059 } 1060 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1061 ierr = MatDestroy(A);CHKERRQ(ierr); 1062 } 1063 PetscFunctionReturn(0); 1064 } 1065 1066 #undef __FUNCT__ 1067 #define __FUNCT__ "MatView_MPIBAIJ" 1068 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1069 { 1070 PetscErrorCode ierr; 1071 PetscTruth iascii,isdraw,issocket,isbinary; 1072 1073 PetscFunctionBegin; 1074 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1075 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1076 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1077 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1078 if (iascii || isdraw || issocket || isbinary) { 1079 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1080 } else { 1081 SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1082 } 1083 PetscFunctionReturn(0); 1084 } 1085 1086 #undef __FUNCT__ 1087 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1088 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1089 { 1090 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1091 PetscErrorCode ierr; 1092 1093 PetscFunctionBegin; 1094 #if defined(PETSC_USE_LOG) 1095 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1096 #endif 1097 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1098 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1099 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1100 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1101 #if defined (PETSC_USE_CTABLE) 1102 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 1103 #else 1104 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1105 #endif 1106 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1107 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1108 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1109 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1110 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1111 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1112 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1113 ierr = PetscFree(baij);CHKERRQ(ierr); 1114 1115 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1116 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1117 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1118 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1119 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1120 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1121 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1122 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1123 PetscFunctionReturn(0); 1124 } 1125 1126 #undef __FUNCT__ 1127 #define __FUNCT__ "MatMult_MPIBAIJ" 1128 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1129 { 1130 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1131 PetscErrorCode ierr; 1132 PetscInt nt; 1133 1134 PetscFunctionBegin; 1135 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1136 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1137 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1138 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1139 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1140 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1141 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1142 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1148 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1149 { 1150 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1151 PetscErrorCode ierr; 1152 1153 PetscFunctionBegin; 1154 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1155 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1156 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1157 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1158 PetscFunctionReturn(0); 1159 } 1160 1161 #undef __FUNCT__ 1162 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1163 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1164 { 1165 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1166 PetscErrorCode ierr; 1167 PetscTruth merged; 1168 1169 PetscFunctionBegin; 1170 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1171 /* do nondiagonal part */ 1172 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1173 if (!merged) { 1174 /* send it on its way */ 1175 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1176 /* do local part */ 1177 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1178 /* receive remote parts: note this assumes the values are not actually */ 1179 /* inserted in yy until the next line */ 1180 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1181 } else { 1182 /* do local part */ 1183 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1184 /* send it on its way */ 1185 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1186 /* values actually were received in the Begin() but we need to call this nop */ 1187 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1188 } 1189 PetscFunctionReturn(0); 1190 } 1191 1192 #undef __FUNCT__ 1193 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1194 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1195 { 1196 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1197 PetscErrorCode ierr; 1198 1199 PetscFunctionBegin; 1200 /* do nondiagonal part */ 1201 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1202 /* send it on its way */ 1203 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1204 /* do local part */ 1205 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1206 /* receive remote parts: note this assumes the values are not actually */ 1207 /* inserted in yy until the next line, which is true for my implementation*/ 1208 /* but is not perhaps always true. */ 1209 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1210 PetscFunctionReturn(0); 1211 } 1212 1213 /* 1214 This only works correctly for square matrices where the subblock A->A is the 1215 diagonal block 1216 */ 1217 #undef __FUNCT__ 1218 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1219 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1220 { 1221 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1222 PetscErrorCode ierr; 1223 1224 PetscFunctionBegin; 1225 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1226 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1227 PetscFunctionReturn(0); 1228 } 1229 1230 #undef __FUNCT__ 1231 #define __FUNCT__ "MatScale_MPIBAIJ" 1232 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1233 { 1234 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1235 PetscErrorCode ierr; 1236 1237 PetscFunctionBegin; 1238 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1239 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1240 PetscFunctionReturn(0); 1241 } 1242 1243 #undef __FUNCT__ 1244 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1245 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1246 { 1247 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1248 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1249 PetscErrorCode ierr; 1250 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1251 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1252 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1253 1254 PetscFunctionBegin; 1255 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1256 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1257 mat->getrowactive = PETSC_TRUE; 1258 1259 if (!mat->rowvalues && (idx || v)) { 1260 /* 1261 allocate enough space to hold information from the longest row. 1262 */ 1263 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1264 PetscInt max = 1,mbs = mat->mbs,tmp; 1265 for (i=0; i<mbs; i++) { 1266 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1267 if (max < tmp) { max = tmp; } 1268 } 1269 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1270 } 1271 lrow = row - brstart; 1272 1273 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1274 if (!v) {pvA = 0; pvB = 0;} 1275 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1276 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1277 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1278 nztot = nzA + nzB; 1279 1280 cmap = mat->garray; 1281 if (v || idx) { 1282 if (nztot) { 1283 /* Sort by increasing column numbers, assuming A and B already sorted */ 1284 PetscInt imark = -1; 1285 if (v) { 1286 *v = v_p = mat->rowvalues; 1287 for (i=0; i<nzB; i++) { 1288 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1289 else break; 1290 } 1291 imark = i; 1292 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1293 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1294 } 1295 if (idx) { 1296 *idx = idx_p = mat->rowindices; 1297 if (imark > -1) { 1298 for (i=0; i<imark; i++) { 1299 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1300 } 1301 } else { 1302 for (i=0; i<nzB; i++) { 1303 if (cmap[cworkB[i]/bs] < cstart) 1304 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1305 else break; 1306 } 1307 imark = i; 1308 } 1309 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1310 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1311 } 1312 } else { 1313 if (idx) *idx = 0; 1314 if (v) *v = 0; 1315 } 1316 } 1317 *nz = nztot; 1318 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1319 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1320 PetscFunctionReturn(0); 1321 } 1322 1323 #undef __FUNCT__ 1324 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1325 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1326 { 1327 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1328 1329 PetscFunctionBegin; 1330 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1331 baij->getrowactive = PETSC_FALSE; 1332 PetscFunctionReturn(0); 1333 } 1334 1335 #undef __FUNCT__ 1336 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1337 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1338 { 1339 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1340 PetscErrorCode ierr; 1341 1342 PetscFunctionBegin; 1343 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1344 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1345 PetscFunctionReturn(0); 1346 } 1347 1348 #undef __FUNCT__ 1349 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1350 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1351 { 1352 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1353 Mat A = a->A,B = a->B; 1354 PetscErrorCode ierr; 1355 PetscReal isend[5],irecv[5]; 1356 1357 PetscFunctionBegin; 1358 info->block_size = (PetscReal)matin->rmap->bs; 1359 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1360 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1361 isend[3] = info->memory; isend[4] = info->mallocs; 1362 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1363 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1364 isend[3] += info->memory; isend[4] += info->mallocs; 1365 if (flag == MAT_LOCAL) { 1366 info->nz_used = isend[0]; 1367 info->nz_allocated = isend[1]; 1368 info->nz_unneeded = isend[2]; 1369 info->memory = isend[3]; 1370 info->mallocs = isend[4]; 1371 } else if (flag == MAT_GLOBAL_MAX) { 1372 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1373 info->nz_used = irecv[0]; 1374 info->nz_allocated = irecv[1]; 1375 info->nz_unneeded = irecv[2]; 1376 info->memory = irecv[3]; 1377 info->mallocs = irecv[4]; 1378 } else if (flag == MAT_GLOBAL_SUM) { 1379 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1380 info->nz_used = irecv[0]; 1381 info->nz_allocated = irecv[1]; 1382 info->nz_unneeded = irecv[2]; 1383 info->memory = irecv[3]; 1384 info->mallocs = irecv[4]; 1385 } else { 1386 SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1387 } 1388 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1389 info->fill_ratio_needed = 0; 1390 info->factor_mallocs = 0; 1391 PetscFunctionReturn(0); 1392 } 1393 1394 #undef __FUNCT__ 1395 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1396 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg) 1397 { 1398 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1399 PetscErrorCode ierr; 1400 1401 PetscFunctionBegin; 1402 switch (op) { 1403 case MAT_NEW_NONZERO_LOCATIONS: 1404 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1405 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1406 case MAT_KEEP_NONZERO_PATTERN: 1407 case MAT_NEW_NONZERO_LOCATION_ERR: 1408 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1409 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1410 break; 1411 case MAT_ROW_ORIENTED: 1412 a->roworiented = flg; 1413 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1414 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1415 break; 1416 case MAT_NEW_DIAGONALS: 1417 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1418 break; 1419 case MAT_IGNORE_OFF_PROC_ENTRIES: 1420 a->donotstash = flg; 1421 break; 1422 case MAT_USE_HASH_TABLE: 1423 a->ht_flag = flg; 1424 break; 1425 case MAT_SYMMETRIC: 1426 case MAT_STRUCTURALLY_SYMMETRIC: 1427 case MAT_HERMITIAN: 1428 case MAT_SYMMETRY_ETERNAL: 1429 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1430 break; 1431 default: 1432 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op); 1433 } 1434 PetscFunctionReturn(0); 1435 } 1436 1437 #undef __FUNCT__ 1438 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1439 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1440 { 1441 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1442 Mat_SeqBAIJ *Aloc; 1443 Mat B; 1444 PetscErrorCode ierr; 1445 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1446 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1447 MatScalar *a; 1448 1449 PetscFunctionBegin; 1450 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1451 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1452 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1453 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1454 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1455 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1456 } else { 1457 B = *matout; 1458 } 1459 1460 /* copy over the A part */ 1461 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1462 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1463 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1464 1465 for (i=0; i<mbs; i++) { 1466 rvals[0] = bs*(baij->rstartbs + i); 1467 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1468 for (j=ai[i]; j<ai[i+1]; j++) { 1469 col = (baij->cstartbs+aj[j])*bs; 1470 for (k=0; k<bs; k++) { 1471 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1472 col++; a += bs; 1473 } 1474 } 1475 } 1476 /* copy over the B part */ 1477 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1478 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1479 for (i=0; i<mbs; i++) { 1480 rvals[0] = bs*(baij->rstartbs + i); 1481 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1482 for (j=ai[i]; j<ai[i+1]; j++) { 1483 col = baij->garray[aj[j]]*bs; 1484 for (k=0; k<bs; k++) { 1485 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1486 col++; a += bs; 1487 } 1488 } 1489 } 1490 ierr = PetscFree(rvals);CHKERRQ(ierr); 1491 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1492 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1493 1494 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1495 *matout = B; 1496 } else { 1497 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1498 } 1499 PetscFunctionReturn(0); 1500 } 1501 1502 #undef __FUNCT__ 1503 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1504 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1505 { 1506 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1507 Mat a = baij->A,b = baij->B; 1508 PetscErrorCode ierr; 1509 PetscInt s1,s2,s3; 1510 1511 PetscFunctionBegin; 1512 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1513 if (rr) { 1514 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1515 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1516 /* Overlap communication with computation. */ 1517 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1518 } 1519 if (ll) { 1520 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1521 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1522 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1523 } 1524 /* scale the diagonal block */ 1525 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1526 1527 if (rr) { 1528 /* Do a scatter end and then right scale the off-diagonal block */ 1529 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1530 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1531 } 1532 1533 PetscFunctionReturn(0); 1534 } 1535 1536 #undef __FUNCT__ 1537 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1538 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 1539 { 1540 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1541 PetscErrorCode ierr; 1542 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1543 PetscInt i,*owners = A->rmap->range; 1544 PetscInt *nprocs,j,idx,nsends,row; 1545 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1546 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1547 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1548 MPI_Comm comm = ((PetscObject)A)->comm; 1549 MPI_Request *send_waits,*recv_waits; 1550 MPI_Status recv_status,*send_status; 1551 #if defined(PETSC_DEBUG) 1552 PetscTruth found = PETSC_FALSE; 1553 #endif 1554 1555 PetscFunctionBegin; 1556 /* first count number of contributors to each processor */ 1557 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1558 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1559 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1560 j = 0; 1561 for (i=0; i<N; i++) { 1562 if (lastidx > (idx = rows[i])) j = 0; 1563 lastidx = idx; 1564 for (; j<size; j++) { 1565 if (idx >= owners[j] && idx < owners[j+1]) { 1566 nprocs[2*j]++; 1567 nprocs[2*j+1] = 1; 1568 owner[i] = j; 1569 #if defined(PETSC_DEBUG) 1570 found = PETSC_TRUE; 1571 #endif 1572 break; 1573 } 1574 } 1575 #if defined(PETSC_DEBUG) 1576 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1577 found = PETSC_FALSE; 1578 #endif 1579 } 1580 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1581 1582 /* inform other processors of number of messages and max length*/ 1583 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1584 1585 /* post receives: */ 1586 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1587 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1588 for (i=0; i<nrecvs; i++) { 1589 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1590 } 1591 1592 /* do sends: 1593 1) starts[i] gives the starting index in svalues for stuff going to 1594 the ith processor 1595 */ 1596 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1597 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1598 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1599 starts[0] = 0; 1600 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1601 for (i=0; i<N; i++) { 1602 svalues[starts[owner[i]]++] = rows[i]; 1603 } 1604 1605 starts[0] = 0; 1606 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1607 count = 0; 1608 for (i=0; i<size; i++) { 1609 if (nprocs[2*i+1]) { 1610 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1611 } 1612 } 1613 ierr = PetscFree(starts);CHKERRQ(ierr); 1614 1615 base = owners[rank]; 1616 1617 /* wait on receives */ 1618 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1619 count = nrecvs; 1620 slen = 0; 1621 while (count) { 1622 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1623 /* unpack receives into our local space */ 1624 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1625 source[imdex] = recv_status.MPI_SOURCE; 1626 lens[imdex] = n; 1627 slen += n; 1628 count--; 1629 } 1630 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1631 1632 /* move the data into the send scatter */ 1633 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1634 count = 0; 1635 for (i=0; i<nrecvs; i++) { 1636 values = rvalues + i*nmax; 1637 for (j=0; j<lens[i]; j++) { 1638 lrows[count++] = values[j] - base; 1639 } 1640 } 1641 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1642 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1643 ierr = PetscFree(owner);CHKERRQ(ierr); 1644 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1645 1646 /* actually zap the local rows */ 1647 /* 1648 Zero the required rows. If the "diagonal block" of the matrix 1649 is square and the user wishes to set the diagonal we use separate 1650 code so that MatSetValues() is not called for each diagonal allocating 1651 new memory, thus calling lots of mallocs and slowing things down. 1652 1653 */ 1654 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1655 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1656 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1657 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1658 } else if (diag != 0.0) { 1659 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1660 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1661 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1662 for (i=0; i<slen; i++) { 1663 row = lrows[i] + rstart_bs; 1664 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1665 } 1666 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1667 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1668 } else { 1669 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1670 } 1671 1672 ierr = PetscFree(lrows);CHKERRQ(ierr); 1673 1674 /* wait on sends */ 1675 if (nsends) { 1676 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1677 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1678 ierr = PetscFree(send_status);CHKERRQ(ierr); 1679 } 1680 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1681 ierr = PetscFree(svalues);CHKERRQ(ierr); 1682 1683 PetscFunctionReturn(0); 1684 } 1685 1686 #undef __FUNCT__ 1687 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1688 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1689 { 1690 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1691 PetscErrorCode ierr; 1692 1693 PetscFunctionBegin; 1694 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1695 PetscFunctionReturn(0); 1696 } 1697 1698 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1699 1700 #undef __FUNCT__ 1701 #define __FUNCT__ "MatEqual_MPIBAIJ" 1702 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1703 { 1704 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1705 Mat a,b,c,d; 1706 PetscTruth flg; 1707 PetscErrorCode ierr; 1708 1709 PetscFunctionBegin; 1710 a = matA->A; b = matA->B; 1711 c = matB->A; d = matB->B; 1712 1713 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1714 if (flg) { 1715 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1716 } 1717 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1718 PetscFunctionReturn(0); 1719 } 1720 1721 #undef __FUNCT__ 1722 #define __FUNCT__ "MatCopy_MPIBAIJ" 1723 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1724 { 1725 PetscErrorCode ierr; 1726 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1727 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1728 1729 PetscFunctionBegin; 1730 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1731 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1732 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1733 } else { 1734 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1735 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1736 } 1737 PetscFunctionReturn(0); 1738 } 1739 1740 #undef __FUNCT__ 1741 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1742 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1743 { 1744 PetscErrorCode ierr; 1745 1746 PetscFunctionBegin; 1747 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1748 PetscFunctionReturn(0); 1749 } 1750 1751 #undef __FUNCT__ 1752 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1753 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1754 { 1755 PetscErrorCode ierr; 1756 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1757 PetscBLASInt bnz,one=1; 1758 Mat_SeqBAIJ *x,*y; 1759 1760 PetscFunctionBegin; 1761 if (str == SAME_NONZERO_PATTERN) { 1762 PetscScalar alpha = a; 1763 x = (Mat_SeqBAIJ *)xx->A->data; 1764 y = (Mat_SeqBAIJ *)yy->A->data; 1765 bnz = PetscBLASIntCast(x->nz); 1766 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1767 x = (Mat_SeqBAIJ *)xx->B->data; 1768 y = (Mat_SeqBAIJ *)yy->B->data; 1769 bnz = PetscBLASIntCast(x->nz); 1770 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1771 } else { 1772 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1773 } 1774 PetscFunctionReturn(0); 1775 } 1776 1777 #undef __FUNCT__ 1778 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ" 1779 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs) 1780 { 1781 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1782 PetscInt rbs,cbs; 1783 PetscErrorCode ierr; 1784 1785 PetscFunctionBegin; 1786 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1787 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1788 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 1789 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 1790 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 1791 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 1792 PetscFunctionReturn(0); 1793 } 1794 1795 #undef __FUNCT__ 1796 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1797 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1798 { 1799 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1800 PetscErrorCode ierr; 1801 1802 PetscFunctionBegin; 1803 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1804 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1805 PetscFunctionReturn(0); 1806 } 1807 1808 #undef __FUNCT__ 1809 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1810 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1811 { 1812 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1813 PetscErrorCode ierr; 1814 1815 PetscFunctionBegin; 1816 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1817 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1818 PetscFunctionReturn(0); 1819 } 1820 1821 #undef __FUNCT__ 1822 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 1823 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1824 { 1825 PetscErrorCode ierr; 1826 IS iscol_local; 1827 PetscInt csize; 1828 1829 PetscFunctionBegin; 1830 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1831 if (call == MAT_REUSE_MATRIX) { 1832 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1833 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1834 } else { 1835 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1836 } 1837 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1838 if (call == MAT_INITIAL_MATRIX) { 1839 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1840 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 1841 } 1842 PetscFunctionReturn(0); 1843 } 1844 1845 #undef __FUNCT__ 1846 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 1847 /* 1848 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1849 in local and then by concatenating the local matrices the end result. 1850 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 1851 */ 1852 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 1853 { 1854 PetscErrorCode ierr; 1855 PetscMPIInt rank,size; 1856 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 1857 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 1858 Mat *local,M,Mreuse; 1859 MatScalar *vwork,*aa; 1860 MPI_Comm comm = ((PetscObject)mat)->comm; 1861 Mat_SeqBAIJ *aij; 1862 1863 1864 PetscFunctionBegin; 1865 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1866 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1867 1868 if (call == MAT_REUSE_MATRIX) { 1869 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1870 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1871 local = &Mreuse; 1872 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1873 } else { 1874 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1875 Mreuse = *local; 1876 ierr = PetscFree(local);CHKERRQ(ierr); 1877 } 1878 1879 /* 1880 m - number of local rows 1881 n - number of columns (same on all processors) 1882 rstart - first row in new global matrix generated 1883 */ 1884 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 1885 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1886 m = m/bs; 1887 n = n/bs; 1888 1889 if (call == MAT_INITIAL_MATRIX) { 1890 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1891 ii = aij->i; 1892 jj = aij->j; 1893 1894 /* 1895 Determine the number of non-zeros in the diagonal and off-diagonal 1896 portions of the matrix in order to do correct preallocation 1897 */ 1898 1899 /* first get start and end of "diagonal" columns */ 1900 if (csize == PETSC_DECIDE) { 1901 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 1902 if (mglobal == n*bs) { /* square matrix */ 1903 nlocal = m; 1904 } else { 1905 nlocal = n/size + ((n % size) > rank); 1906 } 1907 } else { 1908 nlocal = csize/bs; 1909 } 1910 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1911 rstart = rend - nlocal; 1912 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 1913 1914 /* next, compute all the lengths */ 1915 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 1916 olens = dlens + m; 1917 for (i=0; i<m; i++) { 1918 jend = ii[i+1] - ii[i]; 1919 olen = 0; 1920 dlen = 0; 1921 for (j=0; j<jend; j++) { 1922 if (*jj < rstart || *jj >= rend) olen++; 1923 else dlen++; 1924 jj++; 1925 } 1926 olens[i] = olen; 1927 dlens[i] = dlen; 1928 } 1929 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 1930 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 1931 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 1932 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 1933 ierr = PetscFree(dlens);CHKERRQ(ierr); 1934 } else { 1935 PetscInt ml,nl; 1936 1937 M = *newmat; 1938 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1939 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1940 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1941 /* 1942 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1943 rather than the slower MatSetValues(). 1944 */ 1945 M->was_assembled = PETSC_TRUE; 1946 M->assembled = PETSC_FALSE; 1947 } 1948 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1949 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1950 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1951 ii = aij->i; 1952 jj = aij->j; 1953 aa = aij->a; 1954 for (i=0; i<m; i++) { 1955 row = rstart/bs + i; 1956 nz = ii[i+1] - ii[i]; 1957 cwork = jj; jj += nz; 1958 vwork = aa; aa += nz; 1959 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1960 } 1961 1962 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1963 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1964 *newmat = M; 1965 1966 /* save submatrix used in processor for next request */ 1967 if (call == MAT_INITIAL_MATRIX) { 1968 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 1969 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 1970 } 1971 1972 PetscFunctionReturn(0); 1973 } 1974 1975 #undef __FUNCT__ 1976 #define __FUNCT__ "MatPermute_MPIBAIJ" 1977 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 1978 { 1979 MPI_Comm comm,pcomm; 1980 PetscInt first,local_size,nrows; 1981 const PetscInt *rows; 1982 PetscMPIInt size; 1983 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1984 PetscErrorCode ierr; 1985 1986 PetscFunctionBegin; 1987 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1988 /* make a collective version of 'rowp' */ 1989 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1990 if (pcomm==comm) { 1991 crowp = rowp; 1992 } else { 1993 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1994 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1995 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1996 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1997 } 1998 /* collect the global row permutation and invert it */ 1999 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2000 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2001 if (pcomm!=comm) { 2002 ierr = ISDestroy(crowp);CHKERRQ(ierr); 2003 } 2004 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2005 /* get the local target indices */ 2006 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2007 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2008 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2009 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 2010 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2011 ierr = ISDestroy(irowp);CHKERRQ(ierr); 2012 /* the column permutation is so much easier; 2013 make a local version of 'colp' and invert it */ 2014 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2015 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2016 if (size==1) { 2017 lcolp = colp; 2018 } else { 2019 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2020 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2021 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 2022 } 2023 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2024 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2025 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2026 if (size>1) { 2027 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2028 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 2029 } 2030 /* now we just get the submatrix */ 2031 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2032 /* clean up */ 2033 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 2034 ierr = ISDestroy(icolp);CHKERRQ(ierr); 2035 PetscFunctionReturn(0); 2036 } 2037 2038 #undef __FUNCT__ 2039 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2040 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2041 { 2042 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2043 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2044 2045 PetscFunctionBegin; 2046 if (nghosts) { *nghosts = B->nbs;} 2047 if (ghosts) {*ghosts = baij->garray;} 2048 PetscFunctionReturn(0); 2049 } 2050 2051 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat); 2052 2053 #undef __FUNCT__ 2054 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2055 /* 2056 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2057 */ 2058 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2059 { 2060 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2061 PetscErrorCode ierr; 2062 PetscMPIInt size,*ncolsonproc,*disp,nn; 2063 PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col; 2064 const PetscInt *is; 2065 PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj; 2066 PetscInt *rowhit,M,cstart,cend,colb; 2067 PetscInt *columnsforrow,l; 2068 IS *isa; 2069 PetscTruth done,flg; 2070 ISLocalToGlobalMapping map = mat->bmapping; 2071 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2072 2073 PetscFunctionBegin; 2074 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2075 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2076 2077 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2078 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2079 M = mat->rmap->n/bs; 2080 cstart = mat->cmap->rstart/bs; 2081 cend = mat->cmap->rend/bs; 2082 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2083 c->N = mat->cmap->N/bs; 2084 c->m = mat->rmap->n/bs; 2085 c->rstart = mat->rmap->rstart/bs; 2086 2087 c->ncolors = nis; 2088 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2089 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2090 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2091 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2092 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2093 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2094 2095 /* Allow access to data structures of local part of matrix */ 2096 if (!baij->colmap) { 2097 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2098 } 2099 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2100 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2101 2102 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2103 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2104 2105 for (i=0; i<nis; i++) { 2106 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2107 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2108 c->ncolumns[i] = n; 2109 if (n) { 2110 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2111 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2112 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2113 } else { 2114 c->columns[i] = 0; 2115 } 2116 2117 if (ctype == IS_COLORING_GLOBAL){ 2118 /* Determine the total (parallel) number of columns of this color */ 2119 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2120 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2121 2122 nn = PetscMPIIntCast(n); 2123 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2124 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2125 if (!nctot) { 2126 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2127 } 2128 2129 disp[0] = 0; 2130 for (j=1; j<size; j++) { 2131 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2132 } 2133 2134 /* Get complete list of columns for color on each processor */ 2135 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2136 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2137 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2138 } else if (ctype == IS_COLORING_GHOSTED){ 2139 /* Determine local number of columns of this color on this process, including ghost points */ 2140 nctot = n; 2141 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2142 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2143 } else { 2144 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2145 } 2146 2147 /* 2148 Mark all rows affect by these columns 2149 */ 2150 /* Temporary option to allow for debugging/testing */ 2151 flg = PETSC_FALSE; 2152 ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2153 if (!flg) {/*-----------------------------------------------------------------------------*/ 2154 /* crude, fast version */ 2155 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2156 /* loop over columns*/ 2157 for (j=0; j<nctot; j++) { 2158 if (ctype == IS_COLORING_GHOSTED) { 2159 col = ltog[cols[j]]; 2160 } else { 2161 col = cols[j]; 2162 } 2163 if (col >= cstart && col < cend) { 2164 /* column is in diagonal block of matrix */ 2165 rows = A_cj + A_ci[col-cstart]; 2166 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2167 } else { 2168 #if defined (PETSC_USE_CTABLE) 2169 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2170 colb --; 2171 #else 2172 colb = baij->colmap[col] - 1; 2173 #endif 2174 if (colb == -1) { 2175 m = 0; 2176 } else { 2177 colb = colb/bs; 2178 rows = B_cj + B_ci[colb]; 2179 m = B_ci[colb+1] - B_ci[colb]; 2180 } 2181 } 2182 /* loop over columns marking them in rowhit */ 2183 for (k=0; k<m; k++) { 2184 rowhit[*rows++] = col + 1; 2185 } 2186 } 2187 2188 /* count the number of hits */ 2189 nrows = 0; 2190 for (j=0; j<M; j++) { 2191 if (rowhit[j]) nrows++; 2192 } 2193 c->nrows[i] = nrows; 2194 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2195 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2196 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2197 nrows = 0; 2198 for (j=0; j<M; j++) { 2199 if (rowhit[j]) { 2200 c->rows[i][nrows] = j; 2201 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2202 nrows++; 2203 } 2204 } 2205 } else {/*-------------------------------------------------------------------------------*/ 2206 /* slow version, using rowhit as a linked list */ 2207 PetscInt currentcol,fm,mfm; 2208 rowhit[M] = M; 2209 nrows = 0; 2210 /* loop over columns*/ 2211 for (j=0; j<nctot; j++) { 2212 if (ctype == IS_COLORING_GHOSTED) { 2213 col = ltog[cols[j]]; 2214 } else { 2215 col = cols[j]; 2216 } 2217 if (col >= cstart && col < cend) { 2218 /* column is in diagonal block of matrix */ 2219 rows = A_cj + A_ci[col-cstart]; 2220 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2221 } else { 2222 #if defined (PETSC_USE_CTABLE) 2223 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2224 colb --; 2225 #else 2226 colb = baij->colmap[col] - 1; 2227 #endif 2228 if (colb == -1) { 2229 m = 0; 2230 } else { 2231 colb = colb/bs; 2232 rows = B_cj + B_ci[colb]; 2233 m = B_ci[colb+1] - B_ci[colb]; 2234 } 2235 } 2236 2237 /* loop over columns marking them in rowhit */ 2238 fm = M; /* fm points to first entry in linked list */ 2239 for (k=0; k<m; k++) { 2240 currentcol = *rows++; 2241 /* is it already in the list? */ 2242 do { 2243 mfm = fm; 2244 fm = rowhit[fm]; 2245 } while (fm < currentcol); 2246 /* not in list so add it */ 2247 if (fm != currentcol) { 2248 nrows++; 2249 columnsforrow[currentcol] = col; 2250 /* next three lines insert new entry into linked list */ 2251 rowhit[mfm] = currentcol; 2252 rowhit[currentcol] = fm; 2253 fm = currentcol; 2254 /* fm points to present position in list since we know the columns are sorted */ 2255 } else { 2256 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2257 } 2258 } 2259 } 2260 c->nrows[i] = nrows; 2261 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2262 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2263 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2264 /* now store the linked list of rows into c->rows[i] */ 2265 nrows = 0; 2266 fm = rowhit[M]; 2267 do { 2268 c->rows[i][nrows] = fm; 2269 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2270 fm = rowhit[fm]; 2271 } while (fm < M); 2272 } /* ---------------------------------------------------------------------------------------*/ 2273 ierr = PetscFree(cols);CHKERRQ(ierr); 2274 } 2275 2276 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2277 /* 2278 vscale will contain the "diagonal" on processor scalings followed by the off processor 2279 */ 2280 if (ctype == IS_COLORING_GLOBAL) { 2281 PetscInt *garray; 2282 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2283 for (i=0; i<baij->B->cmap->n/bs; i++) { 2284 for (j=0; j<bs; j++) { 2285 garray[i*bs+j] = bs*baij->garray[i]+j; 2286 } 2287 } 2288 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2289 ierr = PetscFree(garray);CHKERRQ(ierr); 2290 CHKMEMQ; 2291 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2292 for (k=0; k<c->ncolors; k++) { 2293 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2294 for (l=0; l<c->nrows[k]; l++) { 2295 col = c->columnsforrow[k][l]; 2296 if (col >= cstart && col < cend) { 2297 /* column is in diagonal block of matrix */ 2298 colb = col - cstart; 2299 } else { 2300 /* column is in "off-processor" part */ 2301 #if defined (PETSC_USE_CTABLE) 2302 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2303 colb --; 2304 #else 2305 colb = baij->colmap[col] - 1; 2306 #endif 2307 colb = colb/bs; 2308 colb += cend - cstart; 2309 } 2310 c->vscaleforrow[k][l] = colb; 2311 } 2312 } 2313 } else if (ctype == IS_COLORING_GHOSTED) { 2314 /* Get gtol mapping */ 2315 PetscInt N = mat->cmap->N, *gtol; 2316 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2317 for (i=0; i<N; i++) gtol[i] = -1; 2318 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2319 2320 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2321 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2322 for (k=0; k<c->ncolors; k++) { 2323 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2324 for (l=0; l<c->nrows[k]; l++) { 2325 col = c->columnsforrow[k][l]; /* global column index */ 2326 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2327 } 2328 } 2329 ierr = PetscFree(gtol);CHKERRQ(ierr); 2330 } 2331 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2332 2333 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2334 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2335 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2336 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2337 CHKMEMQ; 2338 PetscFunctionReturn(0); 2339 } 2340 2341 #undef __FUNCT__ 2342 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ" 2343 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat) 2344 { 2345 Mat B; 2346 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2347 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2348 Mat_SeqAIJ *b; 2349 PetscErrorCode ierr; 2350 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2351 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2352 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2353 2354 PetscFunctionBegin; 2355 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2356 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2357 2358 /* ---------------------------------------------------------------- 2359 Tell every processor the number of nonzeros per row 2360 */ 2361 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2362 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2363 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2364 } 2365 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2366 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2367 displs = recvcounts + size; 2368 for (i=0; i<size; i++) { 2369 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2370 displs[i] = A->rmap->range[i]/bs; 2371 } 2372 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2373 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2374 #else 2375 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2376 #endif 2377 /* --------------------------------------------------------------- 2378 Create the sequential matrix of the same type as the local block diagonal 2379 */ 2380 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2381 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2382 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2383 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2384 b = (Mat_SeqAIJ *)B->data; 2385 2386 /*-------------------------------------------------------------------- 2387 Copy my part of matrix column indices over 2388 */ 2389 sendcount = ad->nz + bd->nz; 2390 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2391 a_jsendbuf = ad->j; 2392 b_jsendbuf = bd->j; 2393 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2394 cnt = 0; 2395 for (i=0; i<n; i++) { 2396 2397 /* put in lower diagonal portion */ 2398 m = bd->i[i+1] - bd->i[i]; 2399 while (m > 0) { 2400 /* is it above diagonal (in bd (compressed) numbering) */ 2401 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2402 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2403 m--; 2404 } 2405 2406 /* put in diagonal portion */ 2407 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2408 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2409 } 2410 2411 /* put in upper diagonal portion */ 2412 while (m-- > 0) { 2413 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2414 } 2415 } 2416 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2417 2418 /*-------------------------------------------------------------------- 2419 Gather all column indices to all processors 2420 */ 2421 for (i=0; i<size; i++) { 2422 recvcounts[i] = 0; 2423 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2424 recvcounts[i] += lens[j]; 2425 } 2426 } 2427 displs[0] = 0; 2428 for (i=1; i<size; i++) { 2429 displs[i] = displs[i-1] + recvcounts[i-1]; 2430 } 2431 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2432 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2433 #else 2434 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2435 #endif 2436 /*-------------------------------------------------------------------- 2437 Assemble the matrix into useable form (note numerical values not yet set) 2438 */ 2439 /* set the b->ilen (length of each row) values */ 2440 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2441 /* set the b->i indices */ 2442 b->i[0] = 0; 2443 for (i=1; i<=A->rmap->N/bs; i++) { 2444 b->i[i] = b->i[i-1] + lens[i-1]; 2445 } 2446 ierr = PetscFree(lens);CHKERRQ(ierr); 2447 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2448 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2449 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2450 2451 if (A->symmetric){ 2452 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2453 } else if (A->hermitian) { 2454 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2455 } else if (A->structurally_symmetric) { 2456 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2457 } 2458 *newmat = B; 2459 PetscFunctionReturn(0); 2460 } 2461 2462 #undef __FUNCT__ 2463 #define __FUNCT__ "MatSOR_MPIBAIJ" 2464 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2465 { 2466 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2467 PetscErrorCode ierr; 2468 Vec bb1 = 0; 2469 2470 PetscFunctionBegin; 2471 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2472 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2473 } 2474 2475 if (flag == SOR_APPLY_UPPER) { 2476 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2477 PetscFunctionReturn(0); 2478 } 2479 2480 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2481 if (flag & SOR_ZERO_INITIAL_GUESS) { 2482 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2483 its--; 2484 } 2485 2486 while (its--) { 2487 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2488 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2489 2490 /* update rhs: bb1 = bb - B*x */ 2491 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2492 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2493 2494 /* local sweep */ 2495 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2496 } 2497 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2498 if (flag & SOR_ZERO_INITIAL_GUESS) { 2499 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2500 its--; 2501 } 2502 while (its--) { 2503 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2504 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2505 2506 /* update rhs: bb1 = bb - B*x */ 2507 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2508 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2509 2510 /* local sweep */ 2511 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2512 } 2513 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2514 if (flag & SOR_ZERO_INITIAL_GUESS) { 2515 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2516 its--; 2517 } 2518 while (its--) { 2519 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2520 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2521 2522 /* update rhs: bb1 = bb - B*x */ 2523 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2524 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2525 2526 /* local sweep */ 2527 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2528 } 2529 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2530 2531 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 2532 PetscFunctionReturn(0); 2533 } 2534 2535 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2536 2537 2538 /* -------------------------------------------------------------------*/ 2539 static struct _MatOps MatOps_Values = { 2540 MatSetValues_MPIBAIJ, 2541 MatGetRow_MPIBAIJ, 2542 MatRestoreRow_MPIBAIJ, 2543 MatMult_MPIBAIJ, 2544 /* 4*/ MatMultAdd_MPIBAIJ, 2545 MatMultTranspose_MPIBAIJ, 2546 MatMultTransposeAdd_MPIBAIJ, 2547 0, 2548 0, 2549 0, 2550 /*10*/ 0, 2551 0, 2552 0, 2553 MatSOR_MPIBAIJ, 2554 MatTranspose_MPIBAIJ, 2555 /*15*/ MatGetInfo_MPIBAIJ, 2556 MatEqual_MPIBAIJ, 2557 MatGetDiagonal_MPIBAIJ, 2558 MatDiagonalScale_MPIBAIJ, 2559 MatNorm_MPIBAIJ, 2560 /*20*/ MatAssemblyBegin_MPIBAIJ, 2561 MatAssemblyEnd_MPIBAIJ, 2562 MatSetOption_MPIBAIJ, 2563 MatZeroEntries_MPIBAIJ, 2564 /*24*/ MatZeroRows_MPIBAIJ, 2565 0, 2566 0, 2567 0, 2568 0, 2569 /*29*/ MatSetUpPreallocation_MPIBAIJ, 2570 0, 2571 0, 2572 0, 2573 0, 2574 /*34*/ MatDuplicate_MPIBAIJ, 2575 0, 2576 0, 2577 0, 2578 0, 2579 /*39*/ MatAXPY_MPIBAIJ, 2580 MatGetSubMatrices_MPIBAIJ, 2581 MatIncreaseOverlap_MPIBAIJ, 2582 MatGetValues_MPIBAIJ, 2583 MatCopy_MPIBAIJ, 2584 /*44*/ 0, 2585 MatScale_MPIBAIJ, 2586 0, 2587 0, 2588 0, 2589 /*49*/ MatSetBlockSize_MPIBAIJ, 2590 0, 2591 0, 2592 0, 2593 0, 2594 /*54*/ MatFDColoringCreate_MPIBAIJ, 2595 0, 2596 MatSetUnfactored_MPIBAIJ, 2597 MatPermute_MPIBAIJ, 2598 MatSetValuesBlocked_MPIBAIJ, 2599 /*59*/ MatGetSubMatrix_MPIBAIJ, 2600 MatDestroy_MPIBAIJ, 2601 MatView_MPIBAIJ, 2602 0, 2603 0, 2604 /*64*/ 0, 2605 0, 2606 0, 2607 0, 2608 0, 2609 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2610 0, 2611 0, 2612 0, 2613 0, 2614 /*74*/ 0, 2615 MatFDColoringApply_BAIJ, 2616 0, 2617 0, 2618 0, 2619 /*79*/ 0, 2620 0, 2621 0, 2622 0, 2623 MatLoad_MPIBAIJ, 2624 /*84*/ 0, 2625 0, 2626 0, 2627 0, 2628 0, 2629 /*89*/ 0, 2630 0, 2631 0, 2632 0, 2633 0, 2634 /*94*/ 0, 2635 0, 2636 0, 2637 0, 2638 0, 2639 /*99*/ 0, 2640 0, 2641 0, 2642 0, 2643 0, 2644 /*104*/0, 2645 MatRealPart_MPIBAIJ, 2646 MatImaginaryPart_MPIBAIJ, 2647 0, 2648 0, 2649 /*109*/0, 2650 0, 2651 0, 2652 0, 2653 0, 2654 /*114*/MatGetSeqNonzerostructure_MPIBAIJ, 2655 0, 2656 MatGetGhosts_MPIBAIJ 2657 }; 2658 2659 EXTERN_C_BEGIN 2660 #undef __FUNCT__ 2661 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2662 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2663 { 2664 PetscFunctionBegin; 2665 *a = ((Mat_MPIBAIJ *)A->data)->A; 2666 *iscopy = PETSC_FALSE; 2667 PetscFunctionReturn(0); 2668 } 2669 EXTERN_C_END 2670 2671 EXTERN_C_BEGIN 2672 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2673 EXTERN_C_END 2674 2675 EXTERN_C_BEGIN 2676 #undef __FUNCT__ 2677 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2678 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2679 { 2680 PetscInt m,rstart,cstart,cend; 2681 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2682 const PetscInt *JJ=0; 2683 PetscScalar *values=0; 2684 PetscErrorCode ierr; 2685 2686 PetscFunctionBegin; 2687 2688 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2689 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2690 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2691 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2692 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2693 m = B->rmap->n/bs; 2694 rstart = B->rmap->rstart/bs; 2695 cstart = B->cmap->rstart/bs; 2696 cend = B->cmap->rend/bs; 2697 2698 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2699 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2700 for (i=0; i<m; i++) { 2701 nz = ii[i+1] - ii[i]; 2702 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2703 nz_max = PetscMax(nz_max,nz); 2704 JJ = jj + ii[i]; 2705 for (j=0; j<nz; j++) { 2706 if (*JJ >= cstart) break; 2707 JJ++; 2708 } 2709 d = 0; 2710 for (; j<nz; j++) { 2711 if (*JJ++ >= cend) break; 2712 d++; 2713 } 2714 d_nnz[i] = d; 2715 o_nnz[i] = nz - d; 2716 } 2717 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2718 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2719 2720 values = (PetscScalar*)V; 2721 if (!values) { 2722 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2723 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2724 } 2725 for (i=0; i<m; i++) { 2726 PetscInt row = i + rstart; 2727 PetscInt ncols = ii[i+1] - ii[i]; 2728 const PetscInt *icols = jj + ii[i]; 2729 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2730 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2731 } 2732 2733 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2734 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2735 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2736 2737 PetscFunctionReturn(0); 2738 } 2739 EXTERN_C_END 2740 2741 #undef __FUNCT__ 2742 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2743 /*@C 2744 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2745 (the default parallel PETSc format). 2746 2747 Collective on MPI_Comm 2748 2749 Input Parameters: 2750 + A - the matrix 2751 . i - the indices into j for the start of each local row (starts with zero) 2752 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2753 - v - optional values in the matrix 2754 2755 Level: developer 2756 2757 .keywords: matrix, aij, compressed row, sparse, parallel 2758 2759 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2760 @*/ 2761 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2762 { 2763 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2764 2765 PetscFunctionBegin; 2766 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2767 if (f) { 2768 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2769 } 2770 PetscFunctionReturn(0); 2771 } 2772 2773 EXTERN_C_BEGIN 2774 #undef __FUNCT__ 2775 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2776 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2777 { 2778 Mat_MPIBAIJ *b; 2779 PetscErrorCode ierr; 2780 PetscInt i, newbs = PetscAbs(bs); 2781 2782 PetscFunctionBegin; 2783 if (bs < 0) { 2784 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2785 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2786 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2787 bs = PetscAbs(bs); 2788 } 2789 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 2790 bs = newbs; 2791 2792 2793 if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2794 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2795 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2796 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2797 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2798 2799 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2800 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2801 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2802 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2803 2804 if (d_nnz) { 2805 for (i=0; i<B->rmap->n/bs; i++) { 2806 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2807 } 2808 } 2809 if (o_nnz) { 2810 for (i=0; i<B->rmap->n/bs; i++) { 2811 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2812 } 2813 } 2814 2815 b = (Mat_MPIBAIJ*)B->data; 2816 b->bs2 = bs*bs; 2817 b->mbs = B->rmap->n/bs; 2818 b->nbs = B->cmap->n/bs; 2819 b->Mbs = B->rmap->N/bs; 2820 b->Nbs = B->cmap->N/bs; 2821 2822 for (i=0; i<=b->size; i++) { 2823 b->rangebs[i] = B->rmap->range[i]/bs; 2824 } 2825 b->rstartbs = B->rmap->rstart/bs; 2826 b->rendbs = B->rmap->rend/bs; 2827 b->cstartbs = B->cmap->rstart/bs; 2828 b->cendbs = B->cmap->rend/bs; 2829 2830 if (!B->preallocated) { 2831 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2832 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2833 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2834 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2835 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2836 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2837 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2838 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2839 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 2840 } 2841 2842 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2843 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2844 B->preallocated = PETSC_TRUE; 2845 PetscFunctionReturn(0); 2846 } 2847 EXTERN_C_END 2848 2849 EXTERN_C_BEGIN 2850 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2851 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2852 EXTERN_C_END 2853 2854 2855 EXTERN_C_BEGIN 2856 #undef __FUNCT__ 2857 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 2858 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj) 2859 { 2860 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2861 PetscErrorCode ierr; 2862 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2863 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2864 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2865 2866 PetscFunctionBegin; 2867 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 2868 ii[0] = 0; 2869 CHKMEMQ; 2870 for (i=0; i<M; i++) { 2871 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2872 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2873 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2874 /* remove one from count of matrix has diagonal */ 2875 for (j=id[i]; j<id[i+1]; j++) { 2876 if (jd[j] == i) {ii[i+1]--;break;} 2877 } 2878 CHKMEMQ; 2879 } 2880 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2881 cnt = 0; 2882 for (i=0; i<M; i++) { 2883 for (j=io[i]; j<io[i+1]; j++) { 2884 if (garray[jo[j]] > rstart) break; 2885 jj[cnt++] = garray[jo[j]]; 2886 CHKMEMQ; 2887 } 2888 for (k=id[i]; k<id[i+1]; k++) { 2889 if (jd[k] != i) { 2890 jj[cnt++] = rstart + jd[k]; 2891 CHKMEMQ; 2892 } 2893 } 2894 for (;j<io[i+1]; j++) { 2895 jj[cnt++] = garray[jo[j]]; 2896 CHKMEMQ; 2897 } 2898 } 2899 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 2900 PetscFunctionReturn(0); 2901 } 2902 EXTERN_C_END 2903 2904 EXTERN_C_BEGIN 2905 #if defined(PETSC_HAVE_MUMPS) 2906 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 2907 #endif 2908 EXTERN_C_END 2909 2910 /*MC 2911 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2912 2913 Options Database Keys: 2914 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2915 . -mat_block_size <bs> - set the blocksize used to store the matrix 2916 - -mat_use_hash_table <fact> 2917 2918 Level: beginner 2919 2920 .seealso: MatCreateMPIBAIJ 2921 M*/ 2922 2923 EXTERN_C_BEGIN 2924 #undef __FUNCT__ 2925 #define __FUNCT__ "MatCreate_MPIBAIJ" 2926 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2927 { 2928 Mat_MPIBAIJ *b; 2929 PetscErrorCode ierr; 2930 PetscTruth flg; 2931 2932 PetscFunctionBegin; 2933 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2934 B->data = (void*)b; 2935 2936 2937 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2938 B->mapping = 0; 2939 B->assembled = PETSC_FALSE; 2940 2941 B->insertmode = NOT_SET_VALUES; 2942 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 2943 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 2944 2945 /* build local table of row and column ownerships */ 2946 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 2947 2948 /* build cache for off array entries formed */ 2949 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 2950 b->donotstash = PETSC_FALSE; 2951 b->colmap = PETSC_NULL; 2952 b->garray = PETSC_NULL; 2953 b->roworiented = PETSC_TRUE; 2954 2955 /* stuff used in block assembly */ 2956 b->barray = 0; 2957 2958 /* stuff used for matrix vector multiply */ 2959 b->lvec = 0; 2960 b->Mvctx = 0; 2961 2962 /* stuff for MatGetRow() */ 2963 b->rowindices = 0; 2964 b->rowvalues = 0; 2965 b->getrowactive = PETSC_FALSE; 2966 2967 /* hash table stuff */ 2968 b->ht = 0; 2969 b->hd = 0; 2970 b->ht_size = 0; 2971 b->ht_flag = PETSC_FALSE; 2972 b->ht_fact = 0; 2973 b->ht_total_ct = 0; 2974 b->ht_insert_ct = 0; 2975 2976 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 2977 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2978 if (flg) { 2979 PetscReal fact = 1.39; 2980 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2981 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 2982 if (fact <= 1.0) fact = 1.39; 2983 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2984 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2985 } 2986 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2987 2988 #if defined(PETSC_HAVE_MUMPS) 2989 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 2990 #endif 2991 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 2992 "MatConvert_MPIBAIJ_MPIAdj", 2993 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 2994 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2995 "MatStoreValues_MPIBAIJ", 2996 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2997 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2998 "MatRetrieveValues_MPIBAIJ", 2999 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3000 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3001 "MatGetDiagonalBlock_MPIBAIJ", 3002 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3003 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3004 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3005 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3006 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3007 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3008 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3009 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3010 "MatDiagonalScaleLocal_MPIBAIJ", 3011 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3012 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3013 "MatSetHashTableFactor_MPIBAIJ", 3014 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3015 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3016 PetscFunctionReturn(0); 3017 } 3018 EXTERN_C_END 3019 3020 /*MC 3021 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3022 3023 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3024 and MATMPIBAIJ otherwise. 3025 3026 Options Database Keys: 3027 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3028 3029 Level: beginner 3030 3031 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3032 M*/ 3033 3034 EXTERN_C_BEGIN 3035 #undef __FUNCT__ 3036 #define __FUNCT__ "MatCreate_BAIJ" 3037 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 3038 { 3039 PetscErrorCode ierr; 3040 PetscMPIInt size; 3041 3042 PetscFunctionBegin; 3043 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3044 if (size == 1) { 3045 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 3046 } else { 3047 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 3048 } 3049 PetscFunctionReturn(0); 3050 } 3051 EXTERN_C_END 3052 3053 #undef __FUNCT__ 3054 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3055 /*@C 3056 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3057 (block compressed row). For good matrix assembly performance 3058 the user should preallocate the matrix storage by setting the parameters 3059 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3060 performance can be increased by more than a factor of 50. 3061 3062 Collective on Mat 3063 3064 Input Parameters: 3065 + A - the matrix 3066 . bs - size of blockk 3067 . d_nz - number of block nonzeros per block row in diagonal portion of local 3068 submatrix (same for all local rows) 3069 . d_nnz - array containing the number of block nonzeros in the various block rows 3070 of the in diagonal portion of the local (possibly different for each block 3071 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3072 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3073 submatrix (same for all local rows). 3074 - o_nnz - array containing the number of nonzeros in the various block rows of the 3075 off-diagonal portion of the local submatrix (possibly different for 3076 each block row) or PETSC_NULL. 3077 3078 If the *_nnz parameter is given then the *_nz parameter is ignored 3079 3080 Options Database Keys: 3081 + -mat_block_size - size of the blocks to use 3082 - -mat_use_hash_table <fact> 3083 3084 Notes: 3085 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3086 than it must be used on all processors that share the object for that argument. 3087 3088 Storage Information: 3089 For a square global matrix we define each processor's diagonal portion 3090 to be its local rows and the corresponding columns (a square submatrix); 3091 each processor's off-diagonal portion encompasses the remainder of the 3092 local matrix (a rectangular submatrix). 3093 3094 The user can specify preallocated storage for the diagonal part of 3095 the local submatrix with either d_nz or d_nnz (not both). Set 3096 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3097 memory allocation. Likewise, specify preallocated storage for the 3098 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3099 3100 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3101 the figure below we depict these three local rows and all columns (0-11). 3102 3103 .vb 3104 0 1 2 3 4 5 6 7 8 9 10 11 3105 ------------------- 3106 row 3 | o o o d d d o o o o o o 3107 row 4 | o o o d d d o o o o o o 3108 row 5 | o o o d d d o o o o o o 3109 ------------------- 3110 .ve 3111 3112 Thus, any entries in the d locations are stored in the d (diagonal) 3113 submatrix, and any entries in the o locations are stored in the 3114 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3115 stored simply in the MATSEQBAIJ format for compressed row storage. 3116 3117 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3118 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3119 In general, for PDE problems in which most nonzeros are near the diagonal, 3120 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3121 or you will get TERRIBLE performance; see the users' manual chapter on 3122 matrices. 3123 3124 You can call MatGetInfo() to get information on how effective the preallocation was; 3125 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3126 You can also run with the option -info and look for messages with the string 3127 malloc in them to see if additional memory allocation was needed. 3128 3129 Level: intermediate 3130 3131 .keywords: matrix, block, aij, compressed row, sparse, parallel 3132 3133 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 3134 @*/ 3135 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3136 { 3137 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3138 3139 PetscFunctionBegin; 3140 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3141 if (f) { 3142 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3143 } 3144 PetscFunctionReturn(0); 3145 } 3146 3147 #undef __FUNCT__ 3148 #define __FUNCT__ "MatCreateMPIBAIJ" 3149 /*@C 3150 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 3151 (block compressed row). For good matrix assembly performance 3152 the user should preallocate the matrix storage by setting the parameters 3153 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3154 performance can be increased by more than a factor of 50. 3155 3156 Collective on MPI_Comm 3157 3158 Input Parameters: 3159 + comm - MPI communicator 3160 . bs - size of blockk 3161 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3162 This value should be the same as the local size used in creating the 3163 y vector for the matrix-vector product y = Ax. 3164 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3165 This value should be the same as the local size used in creating the 3166 x vector for the matrix-vector product y = Ax. 3167 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3168 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3169 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3170 submatrix (same for all local rows) 3171 . d_nnz - array containing the number of nonzero blocks in the various block rows 3172 of the in diagonal portion of the local (possibly different for each block 3173 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3174 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3175 submatrix (same for all local rows). 3176 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3177 off-diagonal portion of the local submatrix (possibly different for 3178 each block row) or PETSC_NULL. 3179 3180 Output Parameter: 3181 . A - the matrix 3182 3183 Options Database Keys: 3184 + -mat_block_size - size of the blocks to use 3185 - -mat_use_hash_table <fact> 3186 3187 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3188 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3189 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3190 3191 Notes: 3192 If the *_nnz parameter is given then the *_nz parameter is ignored 3193 3194 A nonzero block is any block that as 1 or more nonzeros in it 3195 3196 The user MUST specify either the local or global matrix dimensions 3197 (possibly both). 3198 3199 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3200 than it must be used on all processors that share the object for that argument. 3201 3202 Storage Information: 3203 For a square global matrix we define each processor's diagonal portion 3204 to be its local rows and the corresponding columns (a square submatrix); 3205 each processor's off-diagonal portion encompasses the remainder of the 3206 local matrix (a rectangular submatrix). 3207 3208 The user can specify preallocated storage for the diagonal part of 3209 the local submatrix with either d_nz or d_nnz (not both). Set 3210 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3211 memory allocation. Likewise, specify preallocated storage for the 3212 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3213 3214 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3215 the figure below we depict these three local rows and all columns (0-11). 3216 3217 .vb 3218 0 1 2 3 4 5 6 7 8 9 10 11 3219 ------------------- 3220 row 3 | o o o d d d o o o o o o 3221 row 4 | o o o d d d o o o o o o 3222 row 5 | o o o d d d o o o o o o 3223 ------------------- 3224 .ve 3225 3226 Thus, any entries in the d locations are stored in the d (diagonal) 3227 submatrix, and any entries in the o locations are stored in the 3228 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3229 stored simply in the MATSEQBAIJ format for compressed row storage. 3230 3231 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3232 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3233 In general, for PDE problems in which most nonzeros are near the diagonal, 3234 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3235 or you will get TERRIBLE performance; see the users' manual chapter on 3236 matrices. 3237 3238 Level: intermediate 3239 3240 .keywords: matrix, block, aij, compressed row, sparse, parallel 3241 3242 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3243 @*/ 3244 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3245 { 3246 PetscErrorCode ierr; 3247 PetscMPIInt size; 3248 3249 PetscFunctionBegin; 3250 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3251 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3252 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3253 if (size > 1) { 3254 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3255 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3256 } else { 3257 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3258 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3259 } 3260 PetscFunctionReturn(0); 3261 } 3262 3263 #undef __FUNCT__ 3264 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3265 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3266 { 3267 Mat mat; 3268 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3269 PetscErrorCode ierr; 3270 PetscInt len=0; 3271 3272 PetscFunctionBegin; 3273 *newmat = 0; 3274 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3275 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3276 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3277 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3278 3279 mat->factortype = matin->factortype; 3280 mat->preallocated = PETSC_TRUE; 3281 mat->assembled = PETSC_TRUE; 3282 mat->insertmode = NOT_SET_VALUES; 3283 3284 a = (Mat_MPIBAIJ*)mat->data; 3285 mat->rmap->bs = matin->rmap->bs; 3286 a->bs2 = oldmat->bs2; 3287 a->mbs = oldmat->mbs; 3288 a->nbs = oldmat->nbs; 3289 a->Mbs = oldmat->Mbs; 3290 a->Nbs = oldmat->Nbs; 3291 3292 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3293 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3294 3295 a->size = oldmat->size; 3296 a->rank = oldmat->rank; 3297 a->donotstash = oldmat->donotstash; 3298 a->roworiented = oldmat->roworiented; 3299 a->rowindices = 0; 3300 a->rowvalues = 0; 3301 a->getrowactive = PETSC_FALSE; 3302 a->barray = 0; 3303 a->rstartbs = oldmat->rstartbs; 3304 a->rendbs = oldmat->rendbs; 3305 a->cstartbs = oldmat->cstartbs; 3306 a->cendbs = oldmat->cendbs; 3307 3308 /* hash table stuff */ 3309 a->ht = 0; 3310 a->hd = 0; 3311 a->ht_size = 0; 3312 a->ht_flag = oldmat->ht_flag; 3313 a->ht_fact = oldmat->ht_fact; 3314 a->ht_total_ct = 0; 3315 a->ht_insert_ct = 0; 3316 3317 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3318 if (oldmat->colmap) { 3319 #if defined (PETSC_USE_CTABLE) 3320 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3321 #else 3322 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3323 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3324 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3325 #endif 3326 } else a->colmap = 0; 3327 3328 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3329 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3330 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3331 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3332 } else a->garray = 0; 3333 3334 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3335 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3336 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3337 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3338 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3339 3340 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3341 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3342 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3343 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3344 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3345 *newmat = mat; 3346 3347 PetscFunctionReturn(0); 3348 } 3349 3350 #undef __FUNCT__ 3351 #define __FUNCT__ "MatLoad_MPIBAIJ" 3352 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 3353 { 3354 Mat A; 3355 PetscErrorCode ierr; 3356 int fd; 3357 PetscInt i,nz,j,rstart,rend; 3358 PetscScalar *vals,*buf; 3359 MPI_Comm comm = ((PetscObject)viewer)->comm; 3360 MPI_Status status; 3361 PetscMPIInt rank,size,maxnz; 3362 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3363 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3364 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3365 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3366 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3367 PetscInt dcount,kmax,k,nzcount,tmp,mend; 3368 3369 PetscFunctionBegin; 3370 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3371 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3372 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3373 3374 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3375 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3376 if (!rank) { 3377 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3378 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3379 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3380 } 3381 3382 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3383 M = header[1]; N = header[2]; 3384 3385 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3386 3387 /* 3388 This code adds extra rows to make sure the number of rows is 3389 divisible by the blocksize 3390 */ 3391 Mbs = M/bs; 3392 extra_rows = bs - M + bs*Mbs; 3393 if (extra_rows == bs) extra_rows = 0; 3394 else Mbs++; 3395 if (extra_rows && !rank) { 3396 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3397 } 3398 3399 /* determine ownership of all rows */ 3400 mbs = Mbs/size + ((Mbs % size) > rank); 3401 m = mbs*bs; 3402 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3403 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3404 3405 /* process 0 needs enough room for process with most rows */ 3406 if (!rank) { 3407 mmax = rowners[1]; 3408 for (i=2; i<size; i++) { 3409 mmax = PetscMax(mmax,rowners[i]); 3410 } 3411 mmax*=bs; 3412 } else mmax = m; 3413 3414 rowners[0] = 0; 3415 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3416 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3417 rstart = rowners[rank]; 3418 rend = rowners[rank+1]; 3419 3420 /* distribute row lengths to all processors */ 3421 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3422 if (!rank) { 3423 mend = m; 3424 if (size == 1) mend = mend - extra_rows; 3425 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3426 for (j=mend; j<m; j++) locrowlens[j] = 1; 3427 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3428 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3429 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3430 for (j=0; j<m; j++) { 3431 procsnz[0] += locrowlens[j]; 3432 } 3433 for (i=1; i<size; i++) { 3434 mend = browners[i+1] - browners[i]; 3435 if (i == size-1) mend = mend - extra_rows; 3436 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3437 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3438 /* calculate the number of nonzeros on each processor */ 3439 for (j=0; j<browners[i+1]-browners[i]; j++) { 3440 procsnz[i] += rowlengths[j]; 3441 } 3442 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3443 } 3444 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3445 } else { 3446 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3447 } 3448 3449 if (!rank) { 3450 /* determine max buffer needed and allocate it */ 3451 maxnz = procsnz[0]; 3452 for (i=1; i<size; i++) { 3453 maxnz = PetscMax(maxnz,procsnz[i]); 3454 } 3455 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3456 3457 /* read in my part of the matrix column indices */ 3458 nz = procsnz[0]; 3459 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3460 mycols = ibuf; 3461 if (size == 1) nz -= extra_rows; 3462 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3463 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3464 3465 /* read in every ones (except the last) and ship off */ 3466 for (i=1; i<size-1; i++) { 3467 nz = procsnz[i]; 3468 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3469 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3470 } 3471 /* read in the stuff for the last proc */ 3472 if (size != 1) { 3473 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3474 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3475 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3476 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3477 } 3478 ierr = PetscFree(cols);CHKERRQ(ierr); 3479 } else { 3480 /* determine buffer space needed for message */ 3481 nz = 0; 3482 for (i=0; i<m; i++) { 3483 nz += locrowlens[i]; 3484 } 3485 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3486 mycols = ibuf; 3487 /* receive message of column indices*/ 3488 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3489 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3490 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3491 } 3492 3493 /* loop over local rows, determining number of off diagonal entries */ 3494 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3495 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3496 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3497 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3498 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3499 rowcount = 0; nzcount = 0; 3500 for (i=0; i<mbs; i++) { 3501 dcount = 0; 3502 odcount = 0; 3503 for (j=0; j<bs; j++) { 3504 kmax = locrowlens[rowcount]; 3505 for (k=0; k<kmax; k++) { 3506 tmp = mycols[nzcount++]/bs; 3507 if (!mask[tmp]) { 3508 mask[tmp] = 1; 3509 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3510 else masked1[dcount++] = tmp; 3511 } 3512 } 3513 rowcount++; 3514 } 3515 3516 dlens[i] = dcount; 3517 odlens[i] = odcount; 3518 3519 /* zero out the mask elements we set */ 3520 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3521 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3522 } 3523 3524 /* create our matrix */ 3525 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 3526 ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3527 ierr = MatSetType(A,type);CHKERRQ(ierr); 3528 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3529 3530 if (!rank) { 3531 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3532 /* read in my part of the matrix numerical values */ 3533 nz = procsnz[0]; 3534 vals = buf; 3535 mycols = ibuf; 3536 if (size == 1) nz -= extra_rows; 3537 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3538 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3539 3540 /* insert into matrix */ 3541 jj = rstart*bs; 3542 for (i=0; i<m; i++) { 3543 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3544 mycols += locrowlens[i]; 3545 vals += locrowlens[i]; 3546 jj++; 3547 } 3548 /* read in other processors (except the last one) and ship out */ 3549 for (i=1; i<size-1; i++) { 3550 nz = procsnz[i]; 3551 vals = buf; 3552 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3553 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3554 } 3555 /* the last proc */ 3556 if (size != 1){ 3557 nz = procsnz[i] - extra_rows; 3558 vals = buf; 3559 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3560 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3561 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3562 } 3563 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3564 } else { 3565 /* receive numeric values */ 3566 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3567 3568 /* receive message of values*/ 3569 vals = buf; 3570 mycols = ibuf; 3571 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 3572 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3573 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3574 3575 /* insert into matrix */ 3576 jj = rstart*bs; 3577 for (i=0; i<m; i++) { 3578 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3579 mycols += locrowlens[i]; 3580 vals += locrowlens[i]; 3581 jj++; 3582 } 3583 } 3584 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3585 ierr = PetscFree(buf);CHKERRQ(ierr); 3586 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3587 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3588 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3589 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3590 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3591 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3592 3593 *newmat = A; 3594 PetscFunctionReturn(0); 3595 } 3596 3597 #undef __FUNCT__ 3598 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3599 /*@ 3600 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3601 3602 Input Parameters: 3603 . mat - the matrix 3604 . fact - factor 3605 3606 Collective on Mat 3607 3608 Level: advanced 3609 3610 Notes: 3611 This can also be set by the command line option: -mat_use_hash_table <fact> 3612 3613 .keywords: matrix, hashtable, factor, HT 3614 3615 .seealso: MatSetOption() 3616 @*/ 3617 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3618 { 3619 PetscErrorCode ierr,(*f)(Mat,PetscReal); 3620 3621 PetscFunctionBegin; 3622 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 3623 if (f) { 3624 ierr = (*f)(mat,fact);CHKERRQ(ierr); 3625 } 3626 PetscFunctionReturn(0); 3627 } 3628 3629 EXTERN_C_BEGIN 3630 #undef __FUNCT__ 3631 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3632 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3633 { 3634 Mat_MPIBAIJ *baij; 3635 3636 PetscFunctionBegin; 3637 baij = (Mat_MPIBAIJ*)mat->data; 3638 baij->ht_fact = fact; 3639 PetscFunctionReturn(0); 3640 } 3641 EXTERN_C_END 3642 3643 #undef __FUNCT__ 3644 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3645 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3646 { 3647 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3648 PetscFunctionBegin; 3649 *Ad = a->A; 3650 *Ao = a->B; 3651 *colmap = a->garray; 3652 PetscFunctionReturn(0); 3653 } 3654 3655 /* 3656 Special version for direct calls from Fortran (to eliminate two function call overheads 3657 */ 3658 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3659 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3660 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3661 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3662 #endif 3663 3664 #undef __FUNCT__ 3665 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3666 /*@C 3667 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3668 3669 Collective on Mat 3670 3671 Input Parameters: 3672 + mat - the matrix 3673 . min - number of input rows 3674 . im - input rows 3675 . nin - number of input columns 3676 . in - input columns 3677 . v - numerical values input 3678 - addvin - INSERT_VALUES or ADD_VALUES 3679 3680 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3681 3682 Level: advanced 3683 3684 .seealso: MatSetValuesBlocked() 3685 @*/ 3686 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3687 { 3688 /* convert input arguments to C version */ 3689 Mat mat = *matin; 3690 PetscInt m = *min, n = *nin; 3691 InsertMode addv = *addvin; 3692 3693 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3694 const MatScalar *value; 3695 MatScalar *barray=baij->barray; 3696 PetscTruth roworiented = baij->roworiented; 3697 PetscErrorCode ierr; 3698 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3699 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3700 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3701 3702 PetscFunctionBegin; 3703 /* tasks normally handled by MatSetValuesBlocked() */ 3704 if (mat->insertmode == NOT_SET_VALUES) { 3705 mat->insertmode = addv; 3706 } 3707 #if defined(PETSC_USE_DEBUG) 3708 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3709 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3710 #endif 3711 if (mat->assembled) { 3712 mat->was_assembled = PETSC_TRUE; 3713 mat->assembled = PETSC_FALSE; 3714 } 3715 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3716 3717 3718 if(!barray) { 3719 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3720 baij->barray = barray; 3721 } 3722 3723 if (roworiented) { 3724 stepval = (n-1)*bs; 3725 } else { 3726 stepval = (m-1)*bs; 3727 } 3728 for (i=0; i<m; i++) { 3729 if (im[i] < 0) continue; 3730 #if defined(PETSC_USE_DEBUG) 3731 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3732 #endif 3733 if (im[i] >= rstart && im[i] < rend) { 3734 row = im[i] - rstart; 3735 for (j=0; j<n; j++) { 3736 /* If NumCol = 1 then a copy is not required */ 3737 if ((roworiented) && (n == 1)) { 3738 barray = (MatScalar*)v + i*bs2; 3739 } else if((!roworiented) && (m == 1)) { 3740 barray = (MatScalar*)v + j*bs2; 3741 } else { /* Here a copy is required */ 3742 if (roworiented) { 3743 value = v + i*(stepval+bs)*bs + j*bs; 3744 } else { 3745 value = v + j*(stepval+bs)*bs + i*bs; 3746 } 3747 for (ii=0; ii<bs; ii++,value+=stepval) { 3748 for (jj=0; jj<bs; jj++) { 3749 *barray++ = *value++; 3750 } 3751 } 3752 barray -=bs2; 3753 } 3754 3755 if (in[j] >= cstart && in[j] < cend){ 3756 col = in[j] - cstart; 3757 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3758 } 3759 else if (in[j] < 0) continue; 3760 #if defined(PETSC_USE_DEBUG) 3761 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3762 #endif 3763 else { 3764 if (mat->was_assembled) { 3765 if (!baij->colmap) { 3766 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3767 } 3768 3769 #if defined(PETSC_USE_DEBUG) 3770 #if defined (PETSC_USE_CTABLE) 3771 { PetscInt data; 3772 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3773 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3774 } 3775 #else 3776 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3777 #endif 3778 #endif 3779 #if defined (PETSC_USE_CTABLE) 3780 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3781 col = (col - 1)/bs; 3782 #else 3783 col = (baij->colmap[in[j]] - 1)/bs; 3784 #endif 3785 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3786 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3787 col = in[j]; 3788 } 3789 } 3790 else col = in[j]; 3791 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3792 } 3793 } 3794 } else { 3795 if (!baij->donotstash) { 3796 if (roworiented) { 3797 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3798 } else { 3799 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3800 } 3801 } 3802 } 3803 } 3804 3805 /* task normally handled by MatSetValuesBlocked() */ 3806 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3807 PetscFunctionReturn(0); 3808 } 3809