1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 15 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 18 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 19 { 20 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt i,*idxb = 0; 23 PetscScalar *va,*vb; 24 Vec vtmp; 25 26 PetscFunctionBegin; 27 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 28 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 29 if (idx) { 30 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 31 } 32 33 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 34 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 35 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 36 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 37 38 for (i=0; i<A->rmap->n; i++){ 39 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 40 } 41 42 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 43 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 44 if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);} 45 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 46 PetscFunctionReturn(0); 47 } 48 49 EXTERN_C_BEGIN 50 #undef __FUNCT__ 51 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 52 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 53 { 54 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 55 PetscErrorCode ierr; 56 57 PetscFunctionBegin; 58 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 59 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 60 PetscFunctionReturn(0); 61 } 62 EXTERN_C_END 63 64 EXTERN_C_BEGIN 65 #undef __FUNCT__ 66 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 67 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 68 { 69 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 70 PetscErrorCode ierr; 71 72 PetscFunctionBegin; 73 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 74 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 75 PetscFunctionReturn(0); 76 } 77 EXTERN_C_END 78 79 /* 80 Local utility routine that creates a mapping from the global column 81 number to the local number in the off-diagonal part of the local 82 storage of the matrix. This is done in a non scable way since the 83 length of colmap equals the global matrix length. 84 */ 85 #undef __FUNCT__ 86 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 87 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 88 { 89 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 90 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 91 PetscErrorCode ierr; 92 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 93 94 PetscFunctionBegin; 95 #if defined (PETSC_USE_CTABLE) 96 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 97 for (i=0; i<nbs; i++){ 98 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 99 } 100 #else 101 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 102 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 103 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 105 #endif 106 PetscFunctionReturn(0); 107 } 108 109 #define CHUNKSIZE 10 110 111 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 112 { \ 113 \ 114 brow = row/bs; \ 115 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 116 rmax = aimax[brow]; nrow = ailen[brow]; \ 117 bcol = col/bs; \ 118 ridx = row % bs; cidx = col % bs; \ 119 low = 0; high = nrow; \ 120 while (high-low > 3) { \ 121 t = (low+high)/2; \ 122 if (rp[t] > bcol) high = t; \ 123 else low = t; \ 124 } \ 125 for (_i=low; _i<high; _i++) { \ 126 if (rp[_i] > bcol) break; \ 127 if (rp[_i] == bcol) { \ 128 bap = ap + bs2*_i + bs*cidx + ridx; \ 129 if (addv == ADD_VALUES) *bap += value; \ 130 else *bap = value; \ 131 goto a_noinsert; \ 132 } \ 133 } \ 134 if (a->nonew == 1) goto a_noinsert; \ 135 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 136 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 137 N = nrow++ - 1; \ 138 /* shift up all the later entries in this row */ \ 139 for (ii=N; ii>=_i; ii--) { \ 140 rp[ii+1] = rp[ii]; \ 141 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 142 } \ 143 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 144 rp[_i] = bcol; \ 145 ap[bs2*_i + bs*cidx + ridx] = value; \ 146 a_noinsert:; \ 147 ailen[brow] = nrow; \ 148 } 149 150 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 151 { \ 152 brow = row/bs; \ 153 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 154 rmax = bimax[brow]; nrow = bilen[brow]; \ 155 bcol = col/bs; \ 156 ridx = row % bs; cidx = col % bs; \ 157 low = 0; high = nrow; \ 158 while (high-low > 3) { \ 159 t = (low+high)/2; \ 160 if (rp[t] > bcol) high = t; \ 161 else low = t; \ 162 } \ 163 for (_i=low; _i<high; _i++) { \ 164 if (rp[_i] > bcol) break; \ 165 if (rp[_i] == bcol) { \ 166 bap = ap + bs2*_i + bs*cidx + ridx; \ 167 if (addv == ADD_VALUES) *bap += value; \ 168 else *bap = value; \ 169 goto b_noinsert; \ 170 } \ 171 } \ 172 if (b->nonew == 1) goto b_noinsert; \ 173 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 174 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 175 CHKMEMQ;\ 176 N = nrow++ - 1; \ 177 /* shift up all the later entries in this row */ \ 178 for (ii=N; ii>=_i; ii--) { \ 179 rp[ii+1] = rp[ii]; \ 180 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 181 } \ 182 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 183 rp[_i] = bcol; \ 184 ap[bs2*_i + bs*cidx + ridx] = value; \ 185 b_noinsert:; \ 186 bilen[brow] = nrow; \ 187 } 188 189 #undef __FUNCT__ 190 #define __FUNCT__ "MatSetValues_MPIBAIJ" 191 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 192 { 193 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 194 MatScalar value; 195 PetscTruth roworiented = baij->roworiented; 196 PetscErrorCode ierr; 197 PetscInt i,j,row,col; 198 PetscInt rstart_orig=mat->rmap->rstart; 199 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 200 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 201 202 /* Some Variables required in the macro */ 203 Mat A = baij->A; 204 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 205 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 206 MatScalar *aa=a->a; 207 208 Mat B = baij->B; 209 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 210 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 211 MatScalar *ba=b->a; 212 213 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 214 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 215 MatScalar *ap,*bap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_DEBUG) 221 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 222 #endif 223 if (im[i] >= rstart_orig && im[i] < rend_orig) { 224 row = im[i] - rstart_orig; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart_orig && in[j] < cend_orig){ 227 col = in[j] - cstart_orig; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 230 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 231 } else if (in[j] < 0) continue; 232 #if defined(PETSC_USE_DEBUG) 233 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1);} 234 #endif 235 else { 236 if (mat->was_assembled) { 237 if (!baij->colmap) { 238 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 239 } 240 #if defined (PETSC_USE_CTABLE) 241 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 242 col = col - 1; 243 #else 244 col = baij->colmap[in[j]/bs] - 1; 245 #endif 246 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 247 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 248 col = in[j]; 249 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 250 B = baij->B; 251 b = (Mat_SeqBAIJ*)(B)->data; 252 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 253 ba=b->a; 254 } else col += in[j]%bs; 255 } else col = in[j]; 256 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 257 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 258 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 259 } 260 } 261 } else { 262 if (!baij->donotstash) { 263 if (roworiented) { 264 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 265 } else { 266 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 267 } 268 } 269 } 270 } 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 276 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 277 { 278 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 279 const PetscScalar *value; 280 MatScalar *barray=baij->barray; 281 PetscTruth roworiented = baij->roworiented; 282 PetscErrorCode ierr; 283 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 284 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 285 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 286 287 PetscFunctionBegin; 288 if(!barray) { 289 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 290 baij->barray = barray; 291 } 292 293 if (roworiented) { 294 stepval = (n-1)*bs; 295 } else { 296 stepval = (m-1)*bs; 297 } 298 for (i=0; i<m; i++) { 299 if (im[i] < 0) continue; 300 #if defined(PETSC_USE_DEBUG) 301 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 302 #endif 303 if (im[i] >= rstart && im[i] < rend) { 304 row = im[i] - rstart; 305 for (j=0; j<n; j++) { 306 /* If NumCol = 1 then a copy is not required */ 307 if ((roworiented) && (n == 1)) { 308 barray = (MatScalar*)v + i*bs2; 309 } else if((!roworiented) && (m == 1)) { 310 barray = (MatScalar*)v + j*bs2; 311 } else { /* Here a copy is required */ 312 if (roworiented) { 313 value = v + i*(stepval+bs)*bs + j*bs; 314 } else { 315 value = v + j*(stepval+bs)*bs + i*bs; 316 } 317 for (ii=0; ii<bs; ii++,value+=stepval) { 318 for (jj=0; jj<bs; jj++) { 319 *barray++ = *value++; 320 } 321 } 322 barray -=bs2; 323 } 324 325 if (in[j] >= cstart && in[j] < cend){ 326 col = in[j] - cstart; 327 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 328 } 329 else if (in[j] < 0) continue; 330 #if defined(PETSC_USE_DEBUG) 331 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 332 #endif 333 else { 334 if (mat->was_assembled) { 335 if (!baij->colmap) { 336 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 337 } 338 339 #if defined(PETSC_USE_DEBUG) 340 #if defined (PETSC_USE_CTABLE) 341 { PetscInt data; 342 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 343 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 344 } 345 #else 346 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 347 #endif 348 #endif 349 #if defined (PETSC_USE_CTABLE) 350 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 351 col = (col - 1)/bs; 352 #else 353 col = (baij->colmap[in[j]] - 1)/bs; 354 #endif 355 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 356 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 357 col = in[j]; 358 } 359 } 360 else col = in[j]; 361 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 362 } 363 } 364 } else { 365 if (!baij->donotstash) { 366 if (roworiented) { 367 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 368 } else { 369 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 370 } 371 } 372 } 373 } 374 PetscFunctionReturn(0); 375 } 376 377 #define HASH_KEY 0.6180339887 378 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 379 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 380 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 381 #undef __FUNCT__ 382 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 383 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 384 { 385 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 386 PetscTruth roworiented = baij->roworiented; 387 PetscErrorCode ierr; 388 PetscInt i,j,row,col; 389 PetscInt rstart_orig=mat->rmap->rstart; 390 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 391 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 392 PetscReal tmp; 393 MatScalar **HD = baij->hd,value; 394 #if defined(PETSC_USE_DEBUG) 395 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 396 #endif 397 398 PetscFunctionBegin; 399 400 for (i=0; i<m; i++) { 401 #if defined(PETSC_USE_DEBUG) 402 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 403 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 404 #endif 405 row = im[i]; 406 if (row >= rstart_orig && row < rend_orig) { 407 for (j=0; j<n; j++) { 408 col = in[j]; 409 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 410 /* Look up PetscInto the Hash Table */ 411 key = (row/bs)*Nbs+(col/bs)+1; 412 h1 = HASH(size,key,tmp); 413 414 415 idx = h1; 416 #if defined(PETSC_USE_DEBUG) 417 insert_ct++; 418 total_ct++; 419 if (HT[idx] != key) { 420 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 421 if (idx == size) { 422 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 423 if (idx == h1) { 424 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 425 } 426 } 427 } 428 #else 429 if (HT[idx] != key) { 430 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 431 if (idx == size) { 432 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 433 if (idx == h1) { 434 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 435 } 436 } 437 } 438 #endif 439 /* A HASH table entry is found, so insert the values at the correct address */ 440 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 441 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 442 } 443 } else { 444 if (!baij->donotstash) { 445 if (roworiented) { 446 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 447 } else { 448 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 449 } 450 } 451 } 452 } 453 #if defined(PETSC_USE_DEBUG) 454 baij->ht_total_ct = total_ct; 455 baij->ht_insert_ct = insert_ct; 456 #endif 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 463 { 464 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 465 PetscTruth roworiented = baij->roworiented; 466 PetscErrorCode ierr; 467 PetscInt i,j,ii,jj,row,col; 468 PetscInt rstart=baij->rstartbs; 469 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 470 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 471 PetscReal tmp; 472 MatScalar **HD = baij->hd,*baij_a; 473 const PetscScalar *v_t,*value; 474 #if defined(PETSC_USE_DEBUG) 475 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 476 #endif 477 478 PetscFunctionBegin; 479 480 if (roworiented) { 481 stepval = (n-1)*bs; 482 } else { 483 stepval = (m-1)*bs; 484 } 485 for (i=0; i<m; i++) { 486 #if defined(PETSC_USE_DEBUG) 487 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 488 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 489 #endif 490 row = im[i]; 491 v_t = v + i*nbs2; 492 if (row >= rstart && row < rend) { 493 for (j=0; j<n; j++) { 494 col = in[j]; 495 496 /* Look up into the Hash Table */ 497 key = row*Nbs+col+1; 498 h1 = HASH(size,key,tmp); 499 500 idx = h1; 501 #if defined(PETSC_USE_DEBUG) 502 total_ct++; 503 insert_ct++; 504 if (HT[idx] != key) { 505 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 506 if (idx == size) { 507 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 508 if (idx == h1) { 509 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 510 } 511 } 512 } 513 #else 514 if (HT[idx] != key) { 515 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 516 if (idx == size) { 517 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 518 if (idx == h1) { 519 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 520 } 521 } 522 } 523 #endif 524 baij_a = HD[idx]; 525 if (roworiented) { 526 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 527 /* value = v + (i*(stepval+bs)+j)*bs; */ 528 value = v_t; 529 v_t += bs; 530 if (addv == ADD_VALUES) { 531 for (ii=0; ii<bs; ii++,value+=stepval) { 532 for (jj=ii; jj<bs2; jj+=bs) { 533 baij_a[jj] += *value++; 534 } 535 } 536 } else { 537 for (ii=0; ii<bs; ii++,value+=stepval) { 538 for (jj=ii; jj<bs2; jj+=bs) { 539 baij_a[jj] = *value++; 540 } 541 } 542 } 543 } else { 544 value = v + j*(stepval+bs)*bs + i*bs; 545 if (addv == ADD_VALUES) { 546 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 547 for (jj=0; jj<bs; jj++) { 548 baij_a[jj] += *value++; 549 } 550 } 551 } else { 552 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 553 for (jj=0; jj<bs; jj++) { 554 baij_a[jj] = *value++; 555 } 556 } 557 } 558 } 559 } 560 } else { 561 if (!baij->donotstash) { 562 if (roworiented) { 563 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 564 } else { 565 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 566 } 567 } 568 } 569 } 570 #if defined(PETSC_USE_DEBUG) 571 baij->ht_total_ct = total_ct; 572 baij->ht_insert_ct = insert_ct; 573 #endif 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatGetValues_MPIBAIJ" 579 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 580 { 581 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 582 PetscErrorCode ierr; 583 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 584 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 585 586 PetscFunctionBegin; 587 for (i=0; i<m; i++) { 588 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 589 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 590 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 591 row = idxm[i] - bsrstart; 592 for (j=0; j<n; j++) { 593 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 594 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 595 if (idxn[j] >= bscstart && idxn[j] < bscend){ 596 col = idxn[j] - bscstart; 597 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 598 } else { 599 if (!baij->colmap) { 600 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 601 } 602 #if defined (PETSC_USE_CTABLE) 603 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 604 data --; 605 #else 606 data = baij->colmap[idxn[j]/bs]-1; 607 #endif 608 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 609 else { 610 col = data + idxn[j]%bs; 611 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 612 } 613 } 614 } 615 } else { 616 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 617 } 618 } 619 PetscFunctionReturn(0); 620 } 621 622 #undef __FUNCT__ 623 #define __FUNCT__ "MatNorm_MPIBAIJ" 624 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 625 { 626 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 627 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 628 PetscErrorCode ierr; 629 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 630 PetscReal sum = 0.0; 631 MatScalar *v; 632 633 PetscFunctionBegin; 634 if (baij->size == 1) { 635 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 636 } else { 637 if (type == NORM_FROBENIUS) { 638 v = amat->a; 639 nz = amat->nz*bs2; 640 for (i=0; i<nz; i++) { 641 #if defined(PETSC_USE_COMPLEX) 642 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 643 #else 644 sum += (*v)*(*v); v++; 645 #endif 646 } 647 v = bmat->a; 648 nz = bmat->nz*bs2; 649 for (i=0; i<nz; i++) { 650 #if defined(PETSC_USE_COMPLEX) 651 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 652 #else 653 sum += (*v)*(*v); v++; 654 #endif 655 } 656 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 657 *nrm = sqrt(*nrm); 658 } else if (type == NORM_1) { /* max column sum */ 659 PetscReal *tmp,*tmp2; 660 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 661 ierr = PetscMalloc((2*mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 662 tmp2 = tmp + mat->cmap->N; 663 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 664 v = amat->a; jj = amat->j; 665 for (i=0; i<amat->nz; i++) { 666 for (j=0; j<bs; j++){ 667 col = bs*(cstart + *jj) + j; /* column index */ 668 for (row=0; row<bs; row++){ 669 tmp[col] += PetscAbsScalar(*v); v++; 670 } 671 } 672 jj++; 673 } 674 v = bmat->a; jj = bmat->j; 675 for (i=0; i<bmat->nz; i++) { 676 for (j=0; j<bs; j++){ 677 col = bs*garray[*jj] + j; 678 for (row=0; row<bs; row++){ 679 tmp[col] += PetscAbsScalar(*v); v++; 680 } 681 } 682 jj++; 683 } 684 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 685 *nrm = 0.0; 686 for (j=0; j<mat->cmap->N; j++) { 687 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 688 } 689 ierr = PetscFree(tmp);CHKERRQ(ierr); 690 } else if (type == NORM_INFINITY) { /* max row sum */ 691 PetscReal *sums; 692 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr) 693 sum = 0.0; 694 for (j=0; j<amat->mbs; j++) { 695 for (row=0; row<bs; row++) sums[row] = 0.0; 696 v = amat->a + bs2*amat->i[j]; 697 nz = amat->i[j+1]-amat->i[j]; 698 for (i=0; i<nz; i++) { 699 for (col=0; col<bs; col++){ 700 for (row=0; row<bs; row++){ 701 sums[row] += PetscAbsScalar(*v); v++; 702 } 703 } 704 } 705 v = bmat->a + bs2*bmat->i[j]; 706 nz = bmat->i[j+1]-bmat->i[j]; 707 for (i=0; i<nz; i++) { 708 for (col=0; col<bs; col++){ 709 for (row=0; row<bs; row++){ 710 sums[row] += PetscAbsScalar(*v); v++; 711 } 712 } 713 } 714 for (row=0; row<bs; row++){ 715 if (sums[row] > sum) sum = sums[row]; 716 } 717 } 718 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 719 ierr = PetscFree(sums);CHKERRQ(ierr); 720 } else { 721 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 722 } 723 } 724 PetscFunctionReturn(0); 725 } 726 727 /* 728 Creates the hash table, and sets the table 729 This table is created only once. 730 If new entried need to be added to the matrix 731 then the hash table has to be destroyed and 732 recreated. 733 */ 734 #undef __FUNCT__ 735 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 736 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 737 { 738 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 739 Mat A = baij->A,B=baij->B; 740 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 741 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 742 PetscErrorCode ierr; 743 PetscInt size,bs2=baij->bs2,rstart=baij->rstartbs; 744 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 745 PetscInt *HT,key; 746 MatScalar **HD; 747 PetscReal tmp; 748 #if defined(PETSC_USE_INFO) 749 PetscInt ct=0,max=0; 750 #endif 751 752 PetscFunctionBegin; 753 baij->ht_size=(PetscInt)(factor*nz); 754 size = baij->ht_size; 755 756 if (baij->ht) { 757 PetscFunctionReturn(0); 758 } 759 760 /* Allocate Memory for Hash Table */ 761 ierr = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 762 baij->ht = (PetscInt*)(baij->hd + size); 763 HD = baij->hd; 764 HT = baij->ht; 765 766 767 ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr); 768 769 770 /* Loop Over A */ 771 for (i=0; i<a->mbs; i++) { 772 for (j=ai[i]; j<ai[i+1]; j++) { 773 row = i+rstart; 774 col = aj[j]+cstart; 775 776 key = row*Nbs + col + 1; 777 h1 = HASH(size,key,tmp); 778 for (k=0; k<size; k++){ 779 if (!HT[(h1+k)%size]) { 780 HT[(h1+k)%size] = key; 781 HD[(h1+k)%size] = a->a + j*bs2; 782 break; 783 #if defined(PETSC_USE_INFO) 784 } else { 785 ct++; 786 #endif 787 } 788 } 789 #if defined(PETSC_USE_INFO) 790 if (k> max) max = k; 791 #endif 792 } 793 } 794 /* Loop Over B */ 795 for (i=0; i<b->mbs; i++) { 796 for (j=bi[i]; j<bi[i+1]; j++) { 797 row = i+rstart; 798 col = garray[bj[j]]; 799 key = row*Nbs + col + 1; 800 h1 = HASH(size,key,tmp); 801 for (k=0; k<size; k++){ 802 if (!HT[(h1+k)%size]) { 803 HT[(h1+k)%size] = key; 804 HD[(h1+k)%size] = b->a + j*bs2; 805 break; 806 #if defined(PETSC_USE_INFO) 807 } else { 808 ct++; 809 #endif 810 } 811 } 812 #if defined(PETSC_USE_INFO) 813 if (k> max) max = k; 814 #endif 815 } 816 } 817 818 /* Print Summary */ 819 #if defined(PETSC_USE_INFO) 820 for (i=0,j=0; i<size; i++) { 821 if (HT[i]) {j++;} 822 } 823 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 824 #endif 825 PetscFunctionReturn(0); 826 } 827 828 #undef __FUNCT__ 829 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 830 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 831 { 832 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 833 PetscErrorCode ierr; 834 PetscInt nstash,reallocs; 835 InsertMode addv; 836 837 PetscFunctionBegin; 838 if (baij->donotstash) { 839 PetscFunctionReturn(0); 840 } 841 842 /* make sure all processors are either in INSERTMODE or ADDMODE */ 843 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 844 if (addv == (ADD_VALUES|INSERT_VALUES)) { 845 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 846 } 847 mat->insertmode = addv; /* in case this processor had no cache */ 848 849 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 850 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 851 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 852 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 853 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 854 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 855 PetscFunctionReturn(0); 856 } 857 858 #undef __FUNCT__ 859 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 860 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 861 { 862 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 863 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 864 PetscErrorCode ierr; 865 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 866 PetscInt *row,*col; 867 PetscTruth r1,r2,r3,other_disassembled; 868 MatScalar *val; 869 InsertMode addv = mat->insertmode; 870 PetscMPIInt n; 871 872 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 873 PetscFunctionBegin; 874 if (!baij->donotstash) { 875 while (1) { 876 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 877 if (!flg) break; 878 879 for (i=0; i<n;) { 880 /* Now identify the consecutive vals belonging to the same row */ 881 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 882 if (j < n) ncols = j-i; 883 else ncols = n-i; 884 /* Now assemble all these values with a single function call */ 885 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 886 i = j; 887 } 888 } 889 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 890 /* Now process the block-stash. Since the values are stashed column-oriented, 891 set the roworiented flag to column oriented, and after MatSetValues() 892 restore the original flags */ 893 r1 = baij->roworiented; 894 r2 = a->roworiented; 895 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 896 baij->roworiented = PETSC_FALSE; 897 a->roworiented = PETSC_FALSE; 898 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 899 while (1) { 900 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 901 if (!flg) break; 902 903 for (i=0; i<n;) { 904 /* Now identify the consecutive vals belonging to the same row */ 905 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 906 if (j < n) ncols = j-i; 907 else ncols = n-i; 908 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 909 i = j; 910 } 911 } 912 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 913 baij->roworiented = r1; 914 a->roworiented = r2; 915 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 916 } 917 918 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 919 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 920 921 /* determine if any processor has disassembled, if so we must 922 also disassemble ourselfs, in order that we may reassemble. */ 923 /* 924 if nonzero structure of submatrix B cannot change then we know that 925 no processor disassembled thus we can skip this stuff 926 */ 927 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 928 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 929 if (mat->was_assembled && !other_disassembled) { 930 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 931 } 932 } 933 934 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 935 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 936 } 937 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 938 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 939 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 940 941 #if defined(PETSC_USE_INFO) 942 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 943 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 944 baij->ht_total_ct = 0; 945 baij->ht_insert_ct = 0; 946 } 947 #endif 948 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 949 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 950 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 951 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 952 } 953 954 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 955 baij->rowvalues = 0; 956 PetscFunctionReturn(0); 957 } 958 959 #undef __FUNCT__ 960 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 961 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 962 { 963 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 964 PetscErrorCode ierr; 965 PetscMPIInt size = baij->size,rank = baij->rank; 966 PetscInt bs = mat->rmap->bs; 967 PetscTruth iascii,isdraw; 968 PetscViewer sviewer; 969 PetscViewerFormat format; 970 971 PetscFunctionBegin; 972 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 973 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 974 if (iascii) { 975 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 976 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 977 MatInfo info; 978 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 979 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 980 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 981 rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 982 mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 983 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 984 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 985 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 986 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 987 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 988 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 989 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 990 PetscFunctionReturn(0); 991 } else if (format == PETSC_VIEWER_ASCII_INFO) { 992 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 995 PetscFunctionReturn(0); 996 } 997 } 998 999 if (isdraw) { 1000 PetscDraw draw; 1001 PetscTruth isnull; 1002 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1003 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1004 } 1005 1006 if (size == 1) { 1007 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1008 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1009 } else { 1010 /* assemble the entire matrix onto first processor. */ 1011 Mat A; 1012 Mat_SeqBAIJ *Aloc; 1013 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1014 MatScalar *a; 1015 1016 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1017 /* Perhaps this should be the type of mat? */ 1018 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1019 if (!rank) { 1020 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1021 } else { 1022 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1023 } 1024 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1025 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1026 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1027 1028 /* copy over the A part */ 1029 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1030 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1031 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1032 1033 for (i=0; i<mbs; i++) { 1034 rvals[0] = bs*(baij->rstartbs + i); 1035 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1036 for (j=ai[i]; j<ai[i+1]; j++) { 1037 col = (baij->cstartbs+aj[j])*bs; 1038 for (k=0; k<bs; k++) { 1039 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1040 col++; a += bs; 1041 } 1042 } 1043 } 1044 /* copy over the B part */ 1045 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1046 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1047 for (i=0; i<mbs; i++) { 1048 rvals[0] = bs*(baij->rstartbs + i); 1049 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1050 for (j=ai[i]; j<ai[i+1]; j++) { 1051 col = baij->garray[aj[j]]*bs; 1052 for (k=0; k<bs; k++) { 1053 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1054 col++; a += bs; 1055 } 1056 } 1057 } 1058 ierr = PetscFree(rvals);CHKERRQ(ierr); 1059 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1060 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1061 /* 1062 Everyone has to call to draw the matrix since the graphics waits are 1063 synchronized across all processors that share the PetscDraw object 1064 */ 1065 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1066 if (!rank) { 1067 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1068 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1069 } 1070 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1071 ierr = MatDestroy(A);CHKERRQ(ierr); 1072 } 1073 PetscFunctionReturn(0); 1074 } 1075 1076 #undef __FUNCT__ 1077 #define __FUNCT__ "MatView_MPIBAIJ" 1078 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1079 { 1080 PetscErrorCode ierr; 1081 PetscTruth iascii,isdraw,issocket,isbinary; 1082 1083 PetscFunctionBegin; 1084 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1085 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1086 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1087 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1088 if (iascii || isdraw || issocket || isbinary) { 1089 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1090 } else { 1091 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1092 } 1093 PetscFunctionReturn(0); 1094 } 1095 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1098 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1099 { 1100 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1101 PetscErrorCode ierr; 1102 1103 PetscFunctionBegin; 1104 #if defined(PETSC_USE_LOG) 1105 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1106 #endif 1107 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1108 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1109 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1110 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1111 #if defined (PETSC_USE_CTABLE) 1112 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 1113 #else 1114 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1115 #endif 1116 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1117 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1118 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1119 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1120 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1121 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1122 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1123 ierr = PetscFree(baij);CHKERRQ(ierr); 1124 1125 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1126 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1127 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1128 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1129 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1130 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1131 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1132 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1133 PetscFunctionReturn(0); 1134 } 1135 1136 #undef __FUNCT__ 1137 #define __FUNCT__ "MatMult_MPIBAIJ" 1138 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1139 { 1140 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1141 PetscErrorCode ierr; 1142 PetscInt nt; 1143 1144 PetscFunctionBegin; 1145 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1146 if (nt != A->cmap->n) { 1147 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1148 } 1149 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1150 if (nt != A->rmap->n) { 1151 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1152 } 1153 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1154 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1155 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1156 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1157 PetscFunctionReturn(0); 1158 } 1159 1160 #undef __FUNCT__ 1161 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1162 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1163 { 1164 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1165 PetscErrorCode ierr; 1166 1167 PetscFunctionBegin; 1168 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1169 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1170 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1171 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1172 PetscFunctionReturn(0); 1173 } 1174 1175 #undef __FUNCT__ 1176 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1177 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1178 { 1179 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1180 PetscErrorCode ierr; 1181 PetscTruth merged; 1182 1183 PetscFunctionBegin; 1184 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1185 /* do nondiagonal part */ 1186 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1187 if (!merged) { 1188 /* send it on its way */ 1189 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1190 /* do local part */ 1191 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1192 /* receive remote parts: note this assumes the values are not actually */ 1193 /* inserted in yy until the next line */ 1194 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1195 } else { 1196 /* do local part */ 1197 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1198 /* send it on its way */ 1199 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1200 /* values actually were received in the Begin() but we need to call this nop */ 1201 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1202 } 1203 PetscFunctionReturn(0); 1204 } 1205 1206 #undef __FUNCT__ 1207 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1208 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1209 { 1210 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1211 PetscErrorCode ierr; 1212 1213 PetscFunctionBegin; 1214 /* do nondiagonal part */ 1215 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1216 /* send it on its way */ 1217 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1218 /* do local part */ 1219 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1220 /* receive remote parts: note this assumes the values are not actually */ 1221 /* inserted in yy until the next line, which is true for my implementation*/ 1222 /* but is not perhaps always true. */ 1223 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 /* 1228 This only works correctly for square matrices where the subblock A->A is the 1229 diagonal block 1230 */ 1231 #undef __FUNCT__ 1232 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1233 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1234 { 1235 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1236 PetscErrorCode ierr; 1237 1238 PetscFunctionBegin; 1239 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1240 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatScale_MPIBAIJ" 1246 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1247 { 1248 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1249 PetscErrorCode ierr; 1250 1251 PetscFunctionBegin; 1252 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1253 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1254 PetscFunctionReturn(0); 1255 } 1256 1257 #undef __FUNCT__ 1258 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1259 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1260 { 1261 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1262 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1263 PetscErrorCode ierr; 1264 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1265 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1266 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1267 1268 PetscFunctionBegin; 1269 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1270 mat->getrowactive = PETSC_TRUE; 1271 1272 if (!mat->rowvalues && (idx || v)) { 1273 /* 1274 allocate enough space to hold information from the longest row. 1275 */ 1276 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1277 PetscInt max = 1,mbs = mat->mbs,tmp; 1278 for (i=0; i<mbs; i++) { 1279 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1280 if (max < tmp) { max = tmp; } 1281 } 1282 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1283 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1284 } 1285 1286 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1287 lrow = row - brstart; 1288 1289 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1290 if (!v) {pvA = 0; pvB = 0;} 1291 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1292 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1293 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1294 nztot = nzA + nzB; 1295 1296 cmap = mat->garray; 1297 if (v || idx) { 1298 if (nztot) { 1299 /* Sort by increasing column numbers, assuming A and B already sorted */ 1300 PetscInt imark = -1; 1301 if (v) { 1302 *v = v_p = mat->rowvalues; 1303 for (i=0; i<nzB; i++) { 1304 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1305 else break; 1306 } 1307 imark = i; 1308 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1309 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1310 } 1311 if (idx) { 1312 *idx = idx_p = mat->rowindices; 1313 if (imark > -1) { 1314 for (i=0; i<imark; i++) { 1315 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1316 } 1317 } else { 1318 for (i=0; i<nzB; i++) { 1319 if (cmap[cworkB[i]/bs] < cstart) 1320 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1321 else break; 1322 } 1323 imark = i; 1324 } 1325 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1326 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1327 } 1328 } else { 1329 if (idx) *idx = 0; 1330 if (v) *v = 0; 1331 } 1332 } 1333 *nz = nztot; 1334 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1335 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1336 PetscFunctionReturn(0); 1337 } 1338 1339 #undef __FUNCT__ 1340 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1341 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1342 { 1343 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1344 1345 PetscFunctionBegin; 1346 if (!baij->getrowactive) { 1347 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1348 } 1349 baij->getrowactive = PETSC_FALSE; 1350 PetscFunctionReturn(0); 1351 } 1352 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1355 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1356 { 1357 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1358 PetscErrorCode ierr; 1359 1360 PetscFunctionBegin; 1361 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1362 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1368 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1369 { 1370 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1371 Mat A = a->A,B = a->B; 1372 PetscErrorCode ierr; 1373 PetscReal isend[5],irecv[5]; 1374 1375 PetscFunctionBegin; 1376 info->block_size = (PetscReal)matin->rmap->bs; 1377 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1378 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1379 isend[3] = info->memory; isend[4] = info->mallocs; 1380 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1381 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1382 isend[3] += info->memory; isend[4] += info->mallocs; 1383 if (flag == MAT_LOCAL) { 1384 info->nz_used = isend[0]; 1385 info->nz_allocated = isend[1]; 1386 info->nz_unneeded = isend[2]; 1387 info->memory = isend[3]; 1388 info->mallocs = isend[4]; 1389 } else if (flag == MAT_GLOBAL_MAX) { 1390 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1391 info->nz_used = irecv[0]; 1392 info->nz_allocated = irecv[1]; 1393 info->nz_unneeded = irecv[2]; 1394 info->memory = irecv[3]; 1395 info->mallocs = irecv[4]; 1396 } else if (flag == MAT_GLOBAL_SUM) { 1397 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1398 info->nz_used = irecv[0]; 1399 info->nz_allocated = irecv[1]; 1400 info->nz_unneeded = irecv[2]; 1401 info->memory = irecv[3]; 1402 info->mallocs = irecv[4]; 1403 } else { 1404 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1405 } 1406 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1407 info->fill_ratio_needed = 0; 1408 info->factor_mallocs = 0; 1409 PetscFunctionReturn(0); 1410 } 1411 1412 #undef __FUNCT__ 1413 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1414 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg) 1415 { 1416 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1417 PetscErrorCode ierr; 1418 1419 PetscFunctionBegin; 1420 switch (op) { 1421 case MAT_NEW_NONZERO_LOCATIONS: 1422 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1423 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1424 case MAT_KEEP_NONZERO_PATTERN: 1425 case MAT_NEW_NONZERO_LOCATION_ERR: 1426 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1427 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1428 break; 1429 case MAT_ROW_ORIENTED: 1430 a->roworiented = flg; 1431 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1432 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1433 break; 1434 case MAT_NEW_DIAGONALS: 1435 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1436 break; 1437 case MAT_IGNORE_OFF_PROC_ENTRIES: 1438 a->donotstash = flg; 1439 break; 1440 case MAT_USE_HASH_TABLE: 1441 a->ht_flag = flg; 1442 break; 1443 case MAT_SYMMETRIC: 1444 case MAT_STRUCTURALLY_SYMMETRIC: 1445 case MAT_HERMITIAN: 1446 case MAT_SYMMETRY_ETERNAL: 1447 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1448 break; 1449 default: 1450 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1451 } 1452 PetscFunctionReturn(0); 1453 } 1454 1455 #undef __FUNCT__ 1456 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1457 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1458 { 1459 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1460 Mat_SeqBAIJ *Aloc; 1461 Mat B; 1462 PetscErrorCode ierr; 1463 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1464 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1465 MatScalar *a; 1466 1467 PetscFunctionBegin; 1468 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1469 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1470 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1471 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1472 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1473 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1474 } else { 1475 B = *matout; 1476 } 1477 1478 /* copy over the A part */ 1479 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1480 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1481 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1482 1483 for (i=0; i<mbs; i++) { 1484 rvals[0] = bs*(baij->rstartbs + i); 1485 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1486 for (j=ai[i]; j<ai[i+1]; j++) { 1487 col = (baij->cstartbs+aj[j])*bs; 1488 for (k=0; k<bs; k++) { 1489 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1490 col++; a += bs; 1491 } 1492 } 1493 } 1494 /* copy over the B part */ 1495 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1496 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1497 for (i=0; i<mbs; i++) { 1498 rvals[0] = bs*(baij->rstartbs + i); 1499 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1500 for (j=ai[i]; j<ai[i+1]; j++) { 1501 col = baij->garray[aj[j]]*bs; 1502 for (k=0; k<bs; k++) { 1503 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1504 col++; a += bs; 1505 } 1506 } 1507 } 1508 ierr = PetscFree(rvals);CHKERRQ(ierr); 1509 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1510 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1511 1512 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1513 *matout = B; 1514 } else { 1515 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1516 } 1517 PetscFunctionReturn(0); 1518 } 1519 1520 #undef __FUNCT__ 1521 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1522 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1523 { 1524 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1525 Mat a = baij->A,b = baij->B; 1526 PetscErrorCode ierr; 1527 PetscInt s1,s2,s3; 1528 1529 PetscFunctionBegin; 1530 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1531 if (rr) { 1532 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1533 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1534 /* Overlap communication with computation. */ 1535 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1536 } 1537 if (ll) { 1538 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1539 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1540 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1541 } 1542 /* scale the diagonal block */ 1543 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1544 1545 if (rr) { 1546 /* Do a scatter end and then right scale the off-diagonal block */ 1547 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1548 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1549 } 1550 1551 PetscFunctionReturn(0); 1552 } 1553 1554 #undef __FUNCT__ 1555 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1556 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 1557 { 1558 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1559 PetscErrorCode ierr; 1560 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1561 PetscInt i,*owners = A->rmap->range; 1562 PetscInt *nprocs,j,idx,nsends,row; 1563 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1564 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1565 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1566 MPI_Comm comm = ((PetscObject)A)->comm; 1567 MPI_Request *send_waits,*recv_waits; 1568 MPI_Status recv_status,*send_status; 1569 #if defined(PETSC_DEBUG) 1570 PetscTruth found = PETSC_FALSE; 1571 #endif 1572 1573 PetscFunctionBegin; 1574 /* first count number of contributors to each processor */ 1575 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1576 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1577 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1578 j = 0; 1579 for (i=0; i<N; i++) { 1580 if (lastidx > (idx = rows[i])) j = 0; 1581 lastidx = idx; 1582 for (; j<size; j++) { 1583 if (idx >= owners[j] && idx < owners[j+1]) { 1584 nprocs[2*j]++; 1585 nprocs[2*j+1] = 1; 1586 owner[i] = j; 1587 #if defined(PETSC_DEBUG) 1588 found = PETSC_TRUE; 1589 #endif 1590 break; 1591 } 1592 } 1593 #if defined(PETSC_DEBUG) 1594 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1595 found = PETSC_FALSE; 1596 #endif 1597 } 1598 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1599 1600 /* inform other processors of number of messages and max length*/ 1601 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1602 1603 /* post receives: */ 1604 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1605 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1606 for (i=0; i<nrecvs; i++) { 1607 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1608 } 1609 1610 /* do sends: 1611 1) starts[i] gives the starting index in svalues for stuff going to 1612 the ith processor 1613 */ 1614 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1615 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1616 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1617 starts[0] = 0; 1618 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1619 for (i=0; i<N; i++) { 1620 svalues[starts[owner[i]]++] = rows[i]; 1621 } 1622 1623 starts[0] = 0; 1624 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1625 count = 0; 1626 for (i=0; i<size; i++) { 1627 if (nprocs[2*i+1]) { 1628 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1629 } 1630 } 1631 ierr = PetscFree(starts);CHKERRQ(ierr); 1632 1633 base = owners[rank]; 1634 1635 /* wait on receives */ 1636 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 1637 source = lens + nrecvs; 1638 count = nrecvs; slen = 0; 1639 while (count) { 1640 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1641 /* unpack receives into our local space */ 1642 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1643 source[imdex] = recv_status.MPI_SOURCE; 1644 lens[imdex] = n; 1645 slen += n; 1646 count--; 1647 } 1648 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1649 1650 /* move the data into the send scatter */ 1651 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1652 count = 0; 1653 for (i=0; i<nrecvs; i++) { 1654 values = rvalues + i*nmax; 1655 for (j=0; j<lens[i]; j++) { 1656 lrows[count++] = values[j] - base; 1657 } 1658 } 1659 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1660 ierr = PetscFree(lens);CHKERRQ(ierr); 1661 ierr = PetscFree(owner);CHKERRQ(ierr); 1662 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1663 1664 /* actually zap the local rows */ 1665 /* 1666 Zero the required rows. If the "diagonal block" of the matrix 1667 is square and the user wishes to set the diagonal we use separate 1668 code so that MatSetValues() is not called for each diagonal allocating 1669 new memory, thus calling lots of mallocs and slowing things down. 1670 1671 Contributed by: Matthew Knepley 1672 */ 1673 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1674 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1675 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1676 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1677 } else if (diag != 0.0) { 1678 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1679 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1680 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1681 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1682 } 1683 for (i=0; i<slen; i++) { 1684 row = lrows[i] + rstart_bs; 1685 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1686 } 1687 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1688 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1689 } else { 1690 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1691 } 1692 1693 ierr = PetscFree(lrows);CHKERRQ(ierr); 1694 1695 /* wait on sends */ 1696 if (nsends) { 1697 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1698 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1699 ierr = PetscFree(send_status);CHKERRQ(ierr); 1700 } 1701 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1702 ierr = PetscFree(svalues);CHKERRQ(ierr); 1703 1704 PetscFunctionReturn(0); 1705 } 1706 1707 #undef __FUNCT__ 1708 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1709 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1710 { 1711 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1712 PetscErrorCode ierr; 1713 1714 PetscFunctionBegin; 1715 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1716 PetscFunctionReturn(0); 1717 } 1718 1719 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1720 1721 #undef __FUNCT__ 1722 #define __FUNCT__ "MatEqual_MPIBAIJ" 1723 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1724 { 1725 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1726 Mat a,b,c,d; 1727 PetscTruth flg; 1728 PetscErrorCode ierr; 1729 1730 PetscFunctionBegin; 1731 a = matA->A; b = matA->B; 1732 c = matB->A; d = matB->B; 1733 1734 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1735 if (flg) { 1736 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1737 } 1738 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1739 PetscFunctionReturn(0); 1740 } 1741 1742 #undef __FUNCT__ 1743 #define __FUNCT__ "MatCopy_MPIBAIJ" 1744 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1745 { 1746 PetscErrorCode ierr; 1747 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1748 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1749 1750 PetscFunctionBegin; 1751 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1752 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1753 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1754 } else { 1755 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1756 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1757 } 1758 PetscFunctionReturn(0); 1759 } 1760 1761 #undef __FUNCT__ 1762 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1763 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1764 { 1765 PetscErrorCode ierr; 1766 1767 PetscFunctionBegin; 1768 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1769 PetscFunctionReturn(0); 1770 } 1771 1772 #include "petscblaslapack.h" 1773 #undef __FUNCT__ 1774 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1775 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1776 { 1777 PetscErrorCode ierr; 1778 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1779 PetscBLASInt bnz,one=1; 1780 Mat_SeqBAIJ *x,*y; 1781 1782 PetscFunctionBegin; 1783 if (str == SAME_NONZERO_PATTERN) { 1784 PetscScalar alpha = a; 1785 x = (Mat_SeqBAIJ *)xx->A->data; 1786 y = (Mat_SeqBAIJ *)yy->A->data; 1787 bnz = PetscBLASIntCast(x->nz); 1788 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1789 x = (Mat_SeqBAIJ *)xx->B->data; 1790 y = (Mat_SeqBAIJ *)yy->B->data; 1791 bnz = PetscBLASIntCast(x->nz); 1792 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1793 } else { 1794 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1795 } 1796 PetscFunctionReturn(0); 1797 } 1798 1799 #undef __FUNCT__ 1800 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1801 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1802 { 1803 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1804 PetscErrorCode ierr; 1805 1806 PetscFunctionBegin; 1807 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1808 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1809 PetscFunctionReturn(0); 1810 } 1811 1812 #undef __FUNCT__ 1813 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1814 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1815 { 1816 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1817 PetscErrorCode ierr; 1818 1819 PetscFunctionBegin; 1820 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1821 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1822 PetscFunctionReturn(0); 1823 } 1824 1825 #undef __FUNCT__ 1826 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 1827 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1828 { 1829 PetscErrorCode ierr; 1830 IS iscol_local; 1831 PetscInt csize; 1832 1833 PetscFunctionBegin; 1834 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1835 if (call == MAT_REUSE_MATRIX) { 1836 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1837 if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1838 } else { 1839 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1840 } 1841 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1842 if (call == MAT_INITIAL_MATRIX) { 1843 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1844 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 1845 } 1846 PetscFunctionReturn(0); 1847 } 1848 1849 #undef __FUNCT__ 1850 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 1851 /* 1852 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1853 in local and then by concatenating the local matrices the end result. 1854 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 1855 */ 1856 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 1857 { 1858 PetscErrorCode ierr; 1859 PetscMPIInt rank,size; 1860 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 1861 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 1862 Mat *local,M,Mreuse; 1863 MatScalar *vwork,*aa; 1864 MPI_Comm comm = ((PetscObject)mat)->comm; 1865 Mat_SeqBAIJ *aij; 1866 1867 1868 PetscFunctionBegin; 1869 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1870 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1871 1872 if (call == MAT_REUSE_MATRIX) { 1873 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1874 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1875 local = &Mreuse; 1876 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1877 } else { 1878 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1879 Mreuse = *local; 1880 ierr = PetscFree(local);CHKERRQ(ierr); 1881 } 1882 1883 /* 1884 m - number of local rows 1885 n - number of columns (same on all processors) 1886 rstart - first row in new global matrix generated 1887 */ 1888 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 1889 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1890 m = m/bs; 1891 n = n/bs; 1892 1893 if (call == MAT_INITIAL_MATRIX) { 1894 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1895 ii = aij->i; 1896 jj = aij->j; 1897 1898 /* 1899 Determine the number of non-zeros in the diagonal and off-diagonal 1900 portions of the matrix in order to do correct preallocation 1901 */ 1902 1903 /* first get start and end of "diagonal" columns */ 1904 if (csize == PETSC_DECIDE) { 1905 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 1906 if (mglobal == n*bs) { /* square matrix */ 1907 nlocal = m; 1908 } else { 1909 nlocal = n/size + ((n % size) > rank); 1910 } 1911 } else { 1912 nlocal = csize/bs; 1913 } 1914 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1915 rstart = rend - nlocal; 1916 if (rank == size - 1 && rend != n) { 1917 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 1918 } 1919 1920 /* next, compute all the lengths */ 1921 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 1922 olens = dlens + m; 1923 for (i=0; i<m; i++) { 1924 jend = ii[i+1] - ii[i]; 1925 olen = 0; 1926 dlen = 0; 1927 for (j=0; j<jend; j++) { 1928 if (*jj < rstart || *jj >= rend) olen++; 1929 else dlen++; 1930 jj++; 1931 } 1932 olens[i] = olen; 1933 dlens[i] = dlen; 1934 } 1935 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 1936 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 1937 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 1938 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 1939 ierr = PetscFree(dlens);CHKERRQ(ierr); 1940 } else { 1941 PetscInt ml,nl; 1942 1943 M = *newmat; 1944 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1945 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1946 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1947 /* 1948 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1949 rather than the slower MatSetValues(). 1950 */ 1951 M->was_assembled = PETSC_TRUE; 1952 M->assembled = PETSC_FALSE; 1953 } 1954 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1955 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1956 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1957 ii = aij->i; 1958 jj = aij->j; 1959 aa = aij->a; 1960 for (i=0; i<m; i++) { 1961 row = rstart/bs + i; 1962 nz = ii[i+1] - ii[i]; 1963 cwork = jj; jj += nz; 1964 vwork = aa; aa += nz; 1965 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1966 } 1967 1968 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1969 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1970 *newmat = M; 1971 1972 /* save submatrix used in processor for next request */ 1973 if (call == MAT_INITIAL_MATRIX) { 1974 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 1975 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 1976 } 1977 1978 PetscFunctionReturn(0); 1979 } 1980 1981 #undef __FUNCT__ 1982 #define __FUNCT__ "MatPermute_MPIBAIJ" 1983 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 1984 { 1985 MPI_Comm comm,pcomm; 1986 PetscInt first,local_size,nrows; 1987 const PetscInt *rows; 1988 PetscMPIInt size; 1989 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1990 PetscErrorCode ierr; 1991 1992 PetscFunctionBegin; 1993 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1994 /* make a collective version of 'rowp' */ 1995 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1996 if (pcomm==comm) { 1997 crowp = rowp; 1998 } else { 1999 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2000 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2001 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 2002 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2003 } 2004 /* collect the global row permutation and invert it */ 2005 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2006 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2007 if (pcomm!=comm) { 2008 ierr = ISDestroy(crowp);CHKERRQ(ierr); 2009 } 2010 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2011 /* get the local target indices */ 2012 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2013 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2014 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2015 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 2016 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2017 ierr = ISDestroy(irowp);CHKERRQ(ierr); 2018 /* the column permutation is so much easier; 2019 make a local version of 'colp' and invert it */ 2020 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2021 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2022 if (size==1) { 2023 lcolp = colp; 2024 } else { 2025 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2026 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2027 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 2028 } 2029 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2030 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2031 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2032 if (size>1) { 2033 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2034 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 2035 } 2036 /* now we just get the submatrix */ 2037 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2038 /* clean up */ 2039 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 2040 ierr = ISDestroy(icolp);CHKERRQ(ierr); 2041 PetscFunctionReturn(0); 2042 } 2043 2044 #undef __FUNCT__ 2045 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2046 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2047 { 2048 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2049 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2050 2051 PetscFunctionBegin; 2052 if (nghosts) { *nghosts = B->nbs;} 2053 if (ghosts) {*ghosts = baij->garray;} 2054 PetscFunctionReturn(0); 2055 } 2056 2057 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat); 2058 2059 #undef __FUNCT__ 2060 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2061 /* 2062 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2063 */ 2064 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2065 { 2066 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2067 PetscErrorCode ierr; 2068 PetscMPIInt size,*ncolsonproc,*disp,nn; 2069 PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col; 2070 const PetscInt *is; 2071 PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj; 2072 PetscInt *rowhit,M,cstart,cend,colb; 2073 PetscInt *columnsforrow,l; 2074 IS *isa; 2075 PetscTruth done,flg; 2076 ISLocalToGlobalMapping map = mat->bmapping; 2077 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2078 2079 PetscFunctionBegin; 2080 if (!mat->assembled) { 2081 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2082 } 2083 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2084 2085 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2086 2087 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2088 M = mat->rmap->n/bs; 2089 cstart = mat->cmap->rstart/bs; 2090 cend = mat->cmap->rend/bs; 2091 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2092 c->N = mat->cmap->N/bs; 2093 c->m = mat->rmap->n/bs; 2094 c->rstart = mat->rmap->rstart/bs; 2095 2096 c->ncolors = nis; 2097 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2098 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2099 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2100 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2101 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2102 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2103 2104 /* Allow access to data structures of local part of matrix */ 2105 if (!baij->colmap) { 2106 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2107 } 2108 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2109 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2110 2111 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2112 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2113 2114 for (i=0; i<nis; i++) { 2115 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2116 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2117 c->ncolumns[i] = n; 2118 if (n) { 2119 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2120 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2121 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2122 } else { 2123 c->columns[i] = 0; 2124 } 2125 2126 if (ctype == IS_COLORING_GLOBAL){ 2127 /* Determine the total (parallel) number of columns of this color */ 2128 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2129 ierr = PetscMalloc(2*size*sizeof(PetscInt*),&ncolsonproc);CHKERRQ(ierr); 2130 disp = ncolsonproc + size; 2131 2132 nn = PetscMPIIntCast(n); 2133 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2134 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2135 if (!nctot) { 2136 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2137 } 2138 2139 disp[0] = 0; 2140 for (j=1; j<size; j++) { 2141 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2142 } 2143 2144 /* Get complete list of columns for color on each processor */ 2145 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2146 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2147 ierr = PetscFree(ncolsonproc);CHKERRQ(ierr); 2148 } else if (ctype == IS_COLORING_GHOSTED){ 2149 /* Determine local number of columns of this color on this process, including ghost points */ 2150 nctot = n; 2151 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2152 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2153 } else { 2154 SETERRQ(PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2155 } 2156 2157 /* 2158 Mark all rows affect by these columns 2159 */ 2160 /* Temporary option to allow for debugging/testing */ 2161 flg = PETSC_FALSE; 2162 ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2163 if (!flg) {/*-----------------------------------------------------------------------------*/ 2164 /* crude, fast version */ 2165 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2166 /* loop over columns*/ 2167 for (j=0; j<nctot; j++) { 2168 if (ctype == IS_COLORING_GHOSTED) { 2169 col = ltog[cols[j]]; 2170 } else { 2171 col = cols[j]; 2172 } 2173 if (col >= cstart && col < cend) { 2174 /* column is in diagonal block of matrix */ 2175 rows = A_cj + A_ci[col-cstart]; 2176 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2177 } else { 2178 #if defined (PETSC_USE_CTABLE) 2179 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr) 2180 colb --; 2181 #else 2182 colb = baij->colmap[col] - 1; 2183 #endif 2184 if (colb == -1) { 2185 m = 0; 2186 } else { 2187 colb = colb/bs; 2188 rows = B_cj + B_ci[colb]; 2189 m = B_ci[colb+1] - B_ci[colb]; 2190 } 2191 } 2192 /* loop over columns marking them in rowhit */ 2193 for (k=0; k<m; k++) { 2194 rowhit[*rows++] = col + 1; 2195 } 2196 } 2197 2198 /* count the number of hits */ 2199 nrows = 0; 2200 for (j=0; j<M; j++) { 2201 if (rowhit[j]) nrows++; 2202 } 2203 c->nrows[i] = nrows; 2204 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2205 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2206 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2207 nrows = 0; 2208 for (j=0; j<M; j++) { 2209 if (rowhit[j]) { 2210 c->rows[i][nrows] = j; 2211 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2212 nrows++; 2213 } 2214 } 2215 } else {/*-------------------------------------------------------------------------------*/ 2216 /* slow version, using rowhit as a linked list */ 2217 PetscInt currentcol,fm,mfm; 2218 rowhit[M] = M; 2219 nrows = 0; 2220 /* loop over columns*/ 2221 for (j=0; j<nctot; j++) { 2222 if (ctype == IS_COLORING_GHOSTED) { 2223 col = ltog[cols[j]]; 2224 } else { 2225 col = cols[j]; 2226 } 2227 if (col >= cstart && col < cend) { 2228 /* column is in diagonal block of matrix */ 2229 rows = A_cj + A_ci[col-cstart]; 2230 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2231 } else { 2232 #if defined (PETSC_USE_CTABLE) 2233 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2234 colb --; 2235 #else 2236 colb = baij->colmap[col] - 1; 2237 #endif 2238 if (colb == -1) { 2239 m = 0; 2240 } else { 2241 colb = colb/bs; 2242 rows = B_cj + B_ci[colb]; 2243 m = B_ci[colb+1] - B_ci[colb]; 2244 } 2245 } 2246 2247 /* loop over columns marking them in rowhit */ 2248 fm = M; /* fm points to first entry in linked list */ 2249 for (k=0; k<m; k++) { 2250 currentcol = *rows++; 2251 /* is it already in the list? */ 2252 do { 2253 mfm = fm; 2254 fm = rowhit[fm]; 2255 } while (fm < currentcol); 2256 /* not in list so add it */ 2257 if (fm != currentcol) { 2258 nrows++; 2259 columnsforrow[currentcol] = col; 2260 /* next three lines insert new entry into linked list */ 2261 rowhit[mfm] = currentcol; 2262 rowhit[currentcol] = fm; 2263 fm = currentcol; 2264 /* fm points to present position in list since we know the columns are sorted */ 2265 } else { 2266 SETERRQ(PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2267 } 2268 } 2269 } 2270 c->nrows[i] = nrows; 2271 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2272 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2273 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2274 /* now store the linked list of rows into c->rows[i] */ 2275 nrows = 0; 2276 fm = rowhit[M]; 2277 do { 2278 c->rows[i][nrows] = fm; 2279 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2280 fm = rowhit[fm]; 2281 } while (fm < M); 2282 } /* ---------------------------------------------------------------------------------------*/ 2283 ierr = PetscFree(cols);CHKERRQ(ierr); 2284 } 2285 2286 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2287 /* 2288 vscale will contain the "diagonal" on processor scalings followed by the off processor 2289 */ 2290 if (ctype == IS_COLORING_GLOBAL) { 2291 PetscInt *garray; 2292 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2293 for (i=0; i<baij->B->cmap->n/bs; i++) { 2294 for (j=0; j<bs; j++) { 2295 garray[i*bs+j] = bs*baij->garray[i]+j; 2296 } 2297 } 2298 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2299 ierr = PetscFree(garray);CHKERRQ(ierr); 2300 CHKMEMQ; 2301 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2302 for (k=0; k<c->ncolors; k++) { 2303 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2304 for (l=0; l<c->nrows[k]; l++) { 2305 col = c->columnsforrow[k][l]; 2306 if (col >= cstart && col < cend) { 2307 /* column is in diagonal block of matrix */ 2308 colb = col - cstart; 2309 } else { 2310 /* column is in "off-processor" part */ 2311 #if defined (PETSC_USE_CTABLE) 2312 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2313 colb --; 2314 #else 2315 colb = baij->colmap[col] - 1; 2316 #endif 2317 colb = colb/bs; 2318 colb += cend - cstart; 2319 } 2320 c->vscaleforrow[k][l] = colb; 2321 } 2322 } 2323 } else if (ctype == IS_COLORING_GHOSTED) { 2324 /* Get gtol mapping */ 2325 PetscInt N = mat->cmap->N, *gtol; 2326 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2327 for (i=0; i<N; i++) gtol[i] = -1; 2328 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2329 2330 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2331 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2332 for (k=0; k<c->ncolors; k++) { 2333 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2334 for (l=0; l<c->nrows[k]; l++) { 2335 col = c->columnsforrow[k][l]; /* global column index */ 2336 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2337 } 2338 } 2339 ierr = PetscFree(gtol);CHKERRQ(ierr); 2340 } 2341 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2342 2343 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2344 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2345 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2346 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2347 CHKMEMQ; 2348 PetscFunctionReturn(0); 2349 } 2350 2351 #undef __FUNCT__ 2352 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ" 2353 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat) 2354 { 2355 Mat B; 2356 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2357 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2358 Mat_SeqAIJ *b; 2359 PetscErrorCode ierr; 2360 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2361 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2362 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2363 2364 PetscFunctionBegin; 2365 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2366 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2367 2368 /* ---------------------------------------------------------------- 2369 Tell every processor the number of nonzeros per row 2370 */ 2371 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2372 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2373 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2374 } 2375 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2376 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2377 displs = recvcounts + size; 2378 for (i=0; i<size; i++) { 2379 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2380 displs[i] = A->rmap->range[i]/bs; 2381 } 2382 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2383 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2384 #else 2385 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2386 #endif 2387 /* --------------------------------------------------------------- 2388 Create the sequential matrix of the same type as the local block diagonal 2389 */ 2390 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2391 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2392 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2393 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2394 b = (Mat_SeqAIJ *)B->data; 2395 2396 /*-------------------------------------------------------------------- 2397 Copy my part of matrix column indices over 2398 */ 2399 sendcount = ad->nz + bd->nz; 2400 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2401 a_jsendbuf = ad->j; 2402 b_jsendbuf = bd->j; 2403 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2404 cnt = 0; 2405 for (i=0; i<n; i++) { 2406 2407 /* put in lower diagonal portion */ 2408 m = bd->i[i+1] - bd->i[i]; 2409 while (m > 0) { 2410 /* is it above diagonal (in bd (compressed) numbering) */ 2411 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2412 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2413 m--; 2414 } 2415 2416 /* put in diagonal portion */ 2417 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2418 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2419 } 2420 2421 /* put in upper diagonal portion */ 2422 while (m-- > 0) { 2423 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2424 } 2425 } 2426 if (cnt != sendcount) SETERRQ2(PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2427 2428 /*-------------------------------------------------------------------- 2429 Gather all column indices to all processors 2430 */ 2431 for (i=0; i<size; i++) { 2432 recvcounts[i] = 0; 2433 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2434 recvcounts[i] += lens[j]; 2435 } 2436 } 2437 displs[0] = 0; 2438 for (i=1; i<size; i++) { 2439 displs[i] = displs[i-1] + recvcounts[i-1]; 2440 } 2441 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2442 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2443 #else 2444 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2445 #endif 2446 /*-------------------------------------------------------------------- 2447 Assemble the matrix into useable form (note numerical values not yet set) 2448 */ 2449 /* set the b->ilen (length of each row) values */ 2450 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2451 /* set the b->i indices */ 2452 b->i[0] = 0; 2453 for (i=1; i<=A->rmap->N/bs; i++) { 2454 b->i[i] = b->i[i-1] + lens[i-1]; 2455 } 2456 ierr = PetscFree(lens);CHKERRQ(ierr); 2457 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2458 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2459 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2460 2461 if (A->symmetric){ 2462 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2463 } else if (A->hermitian) { 2464 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2465 } else if (A->structurally_symmetric) { 2466 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2467 } 2468 *newmat = B; 2469 PetscFunctionReturn(0); 2470 } 2471 2472 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2473 2474 2475 /* -------------------------------------------------------------------*/ 2476 static struct _MatOps MatOps_Values = { 2477 MatSetValues_MPIBAIJ, 2478 MatGetRow_MPIBAIJ, 2479 MatRestoreRow_MPIBAIJ, 2480 MatMult_MPIBAIJ, 2481 /* 4*/ MatMultAdd_MPIBAIJ, 2482 MatMultTranspose_MPIBAIJ, 2483 MatMultTransposeAdd_MPIBAIJ, 2484 0, 2485 0, 2486 0, 2487 /*10*/ 0, 2488 0, 2489 0, 2490 0, 2491 MatTranspose_MPIBAIJ, 2492 /*15*/ MatGetInfo_MPIBAIJ, 2493 MatEqual_MPIBAIJ, 2494 MatGetDiagonal_MPIBAIJ, 2495 MatDiagonalScale_MPIBAIJ, 2496 MatNorm_MPIBAIJ, 2497 /*20*/ MatAssemblyBegin_MPIBAIJ, 2498 MatAssemblyEnd_MPIBAIJ, 2499 MatSetOption_MPIBAIJ, 2500 MatZeroEntries_MPIBAIJ, 2501 /*24*/ MatZeroRows_MPIBAIJ, 2502 0, 2503 0, 2504 0, 2505 0, 2506 /*29*/ MatSetUpPreallocation_MPIBAIJ, 2507 0, 2508 0, 2509 0, 2510 0, 2511 /*34*/ MatDuplicate_MPIBAIJ, 2512 0, 2513 0, 2514 0, 2515 0, 2516 /*39*/ MatAXPY_MPIBAIJ, 2517 MatGetSubMatrices_MPIBAIJ, 2518 MatIncreaseOverlap_MPIBAIJ, 2519 MatGetValues_MPIBAIJ, 2520 MatCopy_MPIBAIJ, 2521 /*44*/ 0, 2522 MatScale_MPIBAIJ, 2523 0, 2524 0, 2525 0, 2526 /*49*/ 0, 2527 0, 2528 0, 2529 0, 2530 0, 2531 /*54*/ MatFDColoringCreate_MPIBAIJ, 2532 0, 2533 MatSetUnfactored_MPIBAIJ, 2534 MatPermute_MPIBAIJ, 2535 MatSetValuesBlocked_MPIBAIJ, 2536 /*59*/ MatGetSubMatrix_MPIBAIJ, 2537 MatDestroy_MPIBAIJ, 2538 MatView_MPIBAIJ, 2539 0, 2540 0, 2541 /*64*/ 0, 2542 0, 2543 0, 2544 0, 2545 0, 2546 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2547 0, 2548 0, 2549 0, 2550 0, 2551 /*74*/ 0, 2552 MatFDColoringApply_BAIJ, 2553 0, 2554 0, 2555 0, 2556 /*79*/ 0, 2557 0, 2558 0, 2559 0, 2560 MatLoad_MPIBAIJ, 2561 /*84*/ 0, 2562 0, 2563 0, 2564 0, 2565 0, 2566 /*89*/ 0, 2567 0, 2568 0, 2569 0, 2570 0, 2571 /*94*/ 0, 2572 0, 2573 0, 2574 0, 2575 0, 2576 /*99*/ 0, 2577 0, 2578 0, 2579 0, 2580 0, 2581 /*104*/0, 2582 MatRealPart_MPIBAIJ, 2583 MatImaginaryPart_MPIBAIJ, 2584 0, 2585 0, 2586 /*109*/0, 2587 0, 2588 0, 2589 0, 2590 0, 2591 /*114*/MatGetSeqNonzerostructure_MPIBAIJ, 2592 0, 2593 MatGetGhosts_MPIBAIJ 2594 }; 2595 2596 EXTERN_C_BEGIN 2597 #undef __FUNCT__ 2598 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2599 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2600 { 2601 PetscFunctionBegin; 2602 *a = ((Mat_MPIBAIJ *)A->data)->A; 2603 *iscopy = PETSC_FALSE; 2604 PetscFunctionReturn(0); 2605 } 2606 EXTERN_C_END 2607 2608 EXTERN_C_BEGIN 2609 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2610 EXTERN_C_END 2611 2612 EXTERN_C_BEGIN 2613 #undef __FUNCT__ 2614 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2615 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2616 { 2617 PetscInt m,rstart,cstart,cend; 2618 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2619 const PetscInt *JJ=0; 2620 PetscScalar *values=0; 2621 PetscErrorCode ierr; 2622 2623 PetscFunctionBegin; 2624 2625 if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2626 ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2627 ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2628 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2629 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2630 m = B->rmap->n/bs; 2631 rstart = B->rmap->rstart/bs; 2632 cstart = B->cmap->rstart/bs; 2633 cend = B->cmap->rend/bs; 2634 2635 if (ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2636 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2637 o_nnz = d_nnz + m; 2638 for (i=0; i<m; i++) { 2639 nz = ii[i+1] - ii[i]; 2640 if (nz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2641 nz_max = PetscMax(nz_max,nz); 2642 JJ = jj + ii[i]; 2643 for (j=0; j<nz; j++) { 2644 if (*JJ >= cstart) break; 2645 JJ++; 2646 } 2647 d = 0; 2648 for (; j<nz; j++) { 2649 if (*JJ++ >= cend) break; 2650 d++; 2651 } 2652 d_nnz[i] = d; 2653 o_nnz[i] = nz - d; 2654 } 2655 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2656 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2657 2658 values = (PetscScalar*)V; 2659 if (!values) { 2660 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2661 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2662 } 2663 for (i=0; i<m; i++) { 2664 PetscInt row = i + rstart; 2665 PetscInt ncols = ii[i+1] - ii[i]; 2666 const PetscInt *icols = jj + ii[i]; 2667 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2668 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2669 } 2670 2671 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2672 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2673 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2674 2675 PetscFunctionReturn(0); 2676 } 2677 EXTERN_C_END 2678 2679 #undef __FUNCT__ 2680 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2681 /*@C 2682 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2683 (the default parallel PETSc format). 2684 2685 Collective on MPI_Comm 2686 2687 Input Parameters: 2688 + A - the matrix 2689 . i - the indices into j for the start of each local row (starts with zero) 2690 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2691 - v - optional values in the matrix 2692 2693 Level: developer 2694 2695 .keywords: matrix, aij, compressed row, sparse, parallel 2696 2697 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2698 @*/ 2699 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2700 { 2701 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2702 2703 PetscFunctionBegin; 2704 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2705 if (f) { 2706 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2707 } 2708 PetscFunctionReturn(0); 2709 } 2710 2711 EXTERN_C_BEGIN 2712 #undef __FUNCT__ 2713 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2714 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2715 { 2716 Mat_MPIBAIJ *b; 2717 PetscErrorCode ierr; 2718 PetscInt i, newbs = PetscAbs(bs); 2719 2720 PetscFunctionBegin; 2721 if (bs < 0) { 2722 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2723 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2724 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2725 bs = PetscAbs(bs); 2726 } 2727 if ((d_nnz || o_nnz) && newbs != bs) { 2728 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 2729 } 2730 bs = newbs; 2731 2732 2733 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2734 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2735 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2736 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2737 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2738 2739 ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2740 ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2741 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2742 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2743 2744 if (d_nnz) { 2745 for (i=0; i<B->rmap->n/bs; i++) { 2746 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2747 } 2748 } 2749 if (o_nnz) { 2750 for (i=0; i<B->rmap->n/bs; i++) { 2751 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2752 } 2753 } 2754 2755 b = (Mat_MPIBAIJ*)B->data; 2756 b->bs2 = bs*bs; 2757 b->mbs = B->rmap->n/bs; 2758 b->nbs = B->cmap->n/bs; 2759 b->Mbs = B->rmap->N/bs; 2760 b->Nbs = B->cmap->N/bs; 2761 2762 for (i=0; i<=b->size; i++) { 2763 b->rangebs[i] = B->rmap->range[i]/bs; 2764 } 2765 b->rstartbs = B->rmap->rstart/bs; 2766 b->rendbs = B->rmap->rend/bs; 2767 b->cstartbs = B->cmap->rstart/bs; 2768 b->cendbs = B->cmap->rend/bs; 2769 2770 if (!B->preallocated) { 2771 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2772 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2773 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2774 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2775 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2776 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2777 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2778 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2779 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 2780 } 2781 2782 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2783 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2784 B->preallocated = PETSC_TRUE; 2785 PetscFunctionReturn(0); 2786 } 2787 EXTERN_C_END 2788 2789 EXTERN_C_BEGIN 2790 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2791 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2792 EXTERN_C_END 2793 2794 2795 EXTERN_C_BEGIN 2796 #undef __FUNCT__ 2797 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 2798 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj) 2799 { 2800 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2801 PetscErrorCode ierr; 2802 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2803 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2804 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2805 2806 PetscFunctionBegin; 2807 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 2808 ii[0] = 0; 2809 CHKMEMQ; 2810 for (i=0; i<M; i++) { 2811 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2812 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2813 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2814 /* remove one from count of matrix has diagonal */ 2815 for (j=id[i]; j<id[i+1]; j++) { 2816 if (jd[j] == i) {ii[i+1]--;break;} 2817 } 2818 CHKMEMQ; 2819 } 2820 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2821 cnt = 0; 2822 for (i=0; i<M; i++) { 2823 for (j=io[i]; j<io[i+1]; j++) { 2824 if (garray[jo[j]] > rstart) break; 2825 jj[cnt++] = garray[jo[j]]; 2826 CHKMEMQ; 2827 } 2828 for (k=id[i]; k<id[i+1]; k++) { 2829 if (jd[k] != i) { 2830 jj[cnt++] = rstart + jd[k]; 2831 CHKMEMQ; 2832 } 2833 } 2834 for (;j<io[i+1]; j++) { 2835 jj[cnt++] = garray[jo[j]]; 2836 CHKMEMQ; 2837 } 2838 } 2839 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 2840 PetscFunctionReturn(0); 2841 } 2842 EXTERN_C_END 2843 2844 /*MC 2845 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2846 2847 Options Database Keys: 2848 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2849 . -mat_block_size <bs> - set the blocksize used to store the matrix 2850 - -mat_use_hash_table <fact> 2851 2852 Level: beginner 2853 2854 .seealso: MatCreateMPIBAIJ 2855 M*/ 2856 2857 EXTERN_C_BEGIN 2858 #undef __FUNCT__ 2859 #define __FUNCT__ "MatCreate_MPIBAIJ" 2860 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2861 { 2862 Mat_MPIBAIJ *b; 2863 PetscErrorCode ierr; 2864 PetscTruth flg; 2865 2866 PetscFunctionBegin; 2867 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2868 B->data = (void*)b; 2869 2870 2871 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2872 B->mapping = 0; 2873 B->assembled = PETSC_FALSE; 2874 2875 B->insertmode = NOT_SET_VALUES; 2876 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 2877 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 2878 2879 /* build local table of row and column ownerships */ 2880 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 2881 2882 /* build cache for off array entries formed */ 2883 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 2884 b->donotstash = PETSC_FALSE; 2885 b->colmap = PETSC_NULL; 2886 b->garray = PETSC_NULL; 2887 b->roworiented = PETSC_TRUE; 2888 2889 /* stuff used in block assembly */ 2890 b->barray = 0; 2891 2892 /* stuff used for matrix vector multiply */ 2893 b->lvec = 0; 2894 b->Mvctx = 0; 2895 2896 /* stuff for MatGetRow() */ 2897 b->rowindices = 0; 2898 b->rowvalues = 0; 2899 b->getrowactive = PETSC_FALSE; 2900 2901 /* hash table stuff */ 2902 b->ht = 0; 2903 b->hd = 0; 2904 b->ht_size = 0; 2905 b->ht_flag = PETSC_FALSE; 2906 b->ht_fact = 0; 2907 b->ht_total_ct = 0; 2908 b->ht_insert_ct = 0; 2909 2910 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 2911 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2912 if (flg) { 2913 PetscReal fact = 1.39; 2914 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2915 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 2916 if (fact <= 1.0) fact = 1.39; 2917 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2918 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2919 } 2920 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2921 2922 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 2923 "MatConvert_MPIBAIJ_MPIAdj", 2924 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 2925 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2926 "MatStoreValues_MPIBAIJ", 2927 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2928 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2929 "MatRetrieveValues_MPIBAIJ", 2930 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2931 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2932 "MatGetDiagonalBlock_MPIBAIJ", 2933 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2934 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2935 "MatMPIBAIJSetPreallocation_MPIBAIJ", 2936 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2937 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 2938 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 2939 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 2940 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 2941 "MatDiagonalScaleLocal_MPIBAIJ", 2942 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2943 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 2944 "MatSetHashTableFactor_MPIBAIJ", 2945 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2946 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 2947 PetscFunctionReturn(0); 2948 } 2949 EXTERN_C_END 2950 2951 /*MC 2952 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2953 2954 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2955 and MATMPIBAIJ otherwise. 2956 2957 Options Database Keys: 2958 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2959 2960 Level: beginner 2961 2962 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2963 M*/ 2964 2965 EXTERN_C_BEGIN 2966 #undef __FUNCT__ 2967 #define __FUNCT__ "MatCreate_BAIJ" 2968 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 2969 { 2970 PetscErrorCode ierr; 2971 PetscMPIInt size; 2972 2973 PetscFunctionBegin; 2974 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2975 if (size == 1) { 2976 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2977 } else { 2978 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2979 } 2980 PetscFunctionReturn(0); 2981 } 2982 EXTERN_C_END 2983 2984 #undef __FUNCT__ 2985 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2986 /*@C 2987 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2988 (block compressed row). For good matrix assembly performance 2989 the user should preallocate the matrix storage by setting the parameters 2990 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2991 performance can be increased by more than a factor of 50. 2992 2993 Collective on Mat 2994 2995 Input Parameters: 2996 + A - the matrix 2997 . bs - size of blockk 2998 . d_nz - number of block nonzeros per block row in diagonal portion of local 2999 submatrix (same for all local rows) 3000 . d_nnz - array containing the number of block nonzeros in the various block rows 3001 of the in diagonal portion of the local (possibly different for each block 3002 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3003 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3004 submatrix (same for all local rows). 3005 - o_nnz - array containing the number of nonzeros in the various block rows of the 3006 off-diagonal portion of the local submatrix (possibly different for 3007 each block row) or PETSC_NULL. 3008 3009 If the *_nnz parameter is given then the *_nz parameter is ignored 3010 3011 Options Database Keys: 3012 + -mat_block_size - size of the blocks to use 3013 - -mat_use_hash_table <fact> 3014 3015 Notes: 3016 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3017 than it must be used on all processors that share the object for that argument. 3018 3019 Storage Information: 3020 For a square global matrix we define each processor's diagonal portion 3021 to be its local rows and the corresponding columns (a square submatrix); 3022 each processor's off-diagonal portion encompasses the remainder of the 3023 local matrix (a rectangular submatrix). 3024 3025 The user can specify preallocated storage for the diagonal part of 3026 the local submatrix with either d_nz or d_nnz (not both). Set 3027 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3028 memory allocation. Likewise, specify preallocated storage for the 3029 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3030 3031 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3032 the figure below we depict these three local rows and all columns (0-11). 3033 3034 .vb 3035 0 1 2 3 4 5 6 7 8 9 10 11 3036 ------------------- 3037 row 3 | o o o d d d o o o o o o 3038 row 4 | o o o d d d o o o o o o 3039 row 5 | o o o d d d o o o o o o 3040 ------------------- 3041 .ve 3042 3043 Thus, any entries in the d locations are stored in the d (diagonal) 3044 submatrix, and any entries in the o locations are stored in the 3045 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3046 stored simply in the MATSEQBAIJ format for compressed row storage. 3047 3048 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3049 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3050 In general, for PDE problems in which most nonzeros are near the diagonal, 3051 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3052 or you will get TERRIBLE performance; see the users' manual chapter on 3053 matrices. 3054 3055 You can call MatGetInfo() to get information on how effective the preallocation was; 3056 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3057 You can also run with the option -info and look for messages with the string 3058 malloc in them to see if additional memory allocation was needed. 3059 3060 Level: intermediate 3061 3062 .keywords: matrix, block, aij, compressed row, sparse, parallel 3063 3064 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 3065 @*/ 3066 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3067 { 3068 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3069 3070 PetscFunctionBegin; 3071 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3072 if (f) { 3073 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3074 } 3075 PetscFunctionReturn(0); 3076 } 3077 3078 #undef __FUNCT__ 3079 #define __FUNCT__ "MatCreateMPIBAIJ" 3080 /*@C 3081 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 3082 (block compressed row). For good matrix assembly performance 3083 the user should preallocate the matrix storage by setting the parameters 3084 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3085 performance can be increased by more than a factor of 50. 3086 3087 Collective on MPI_Comm 3088 3089 Input Parameters: 3090 + comm - MPI communicator 3091 . bs - size of blockk 3092 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3093 This value should be the same as the local size used in creating the 3094 y vector for the matrix-vector product y = Ax. 3095 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3096 This value should be the same as the local size used in creating the 3097 x vector for the matrix-vector product y = Ax. 3098 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3099 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3100 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3101 submatrix (same for all local rows) 3102 . d_nnz - array containing the number of nonzero blocks in the various block rows 3103 of the in diagonal portion of the local (possibly different for each block 3104 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3105 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3106 submatrix (same for all local rows). 3107 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3108 off-diagonal portion of the local submatrix (possibly different for 3109 each block row) or PETSC_NULL. 3110 3111 Output Parameter: 3112 . A - the matrix 3113 3114 Options Database Keys: 3115 + -mat_block_size - size of the blocks to use 3116 - -mat_use_hash_table <fact> 3117 3118 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3119 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3120 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3121 3122 Notes: 3123 If the *_nnz parameter is given then the *_nz parameter is ignored 3124 3125 A nonzero block is any block that as 1 or more nonzeros in it 3126 3127 The user MUST specify either the local or global matrix dimensions 3128 (possibly both). 3129 3130 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3131 than it must be used on all processors that share the object for that argument. 3132 3133 Storage Information: 3134 For a square global matrix we define each processor's diagonal portion 3135 to be its local rows and the corresponding columns (a square submatrix); 3136 each processor's off-diagonal portion encompasses the remainder of the 3137 local matrix (a rectangular submatrix). 3138 3139 The user can specify preallocated storage for the diagonal part of 3140 the local submatrix with either d_nz or d_nnz (not both). Set 3141 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3142 memory allocation. Likewise, specify preallocated storage for the 3143 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3144 3145 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3146 the figure below we depict these three local rows and all columns (0-11). 3147 3148 .vb 3149 0 1 2 3 4 5 6 7 8 9 10 11 3150 ------------------- 3151 row 3 | o o o d d d o o o o o o 3152 row 4 | o o o d d d o o o o o o 3153 row 5 | o o o d d d o o o o o o 3154 ------------------- 3155 .ve 3156 3157 Thus, any entries in the d locations are stored in the d (diagonal) 3158 submatrix, and any entries in the o locations are stored in the 3159 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3160 stored simply in the MATSEQBAIJ format for compressed row storage. 3161 3162 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3163 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3164 In general, for PDE problems in which most nonzeros are near the diagonal, 3165 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3166 or you will get TERRIBLE performance; see the users' manual chapter on 3167 matrices. 3168 3169 Level: intermediate 3170 3171 .keywords: matrix, block, aij, compressed row, sparse, parallel 3172 3173 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3174 @*/ 3175 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3176 { 3177 PetscErrorCode ierr; 3178 PetscMPIInt size; 3179 3180 PetscFunctionBegin; 3181 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3182 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3183 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3184 if (size > 1) { 3185 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3186 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3187 } else { 3188 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3189 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3190 } 3191 PetscFunctionReturn(0); 3192 } 3193 3194 #undef __FUNCT__ 3195 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3196 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3197 { 3198 Mat mat; 3199 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3200 PetscErrorCode ierr; 3201 PetscInt len=0; 3202 3203 PetscFunctionBegin; 3204 *newmat = 0; 3205 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3206 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3207 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3208 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3209 3210 mat->factor = matin->factor; 3211 mat->preallocated = PETSC_TRUE; 3212 mat->assembled = PETSC_TRUE; 3213 mat->insertmode = NOT_SET_VALUES; 3214 3215 a = (Mat_MPIBAIJ*)mat->data; 3216 mat->rmap->bs = matin->rmap->bs; 3217 a->bs2 = oldmat->bs2; 3218 a->mbs = oldmat->mbs; 3219 a->nbs = oldmat->nbs; 3220 a->Mbs = oldmat->Mbs; 3221 a->Nbs = oldmat->Nbs; 3222 3223 ierr = PetscMapCopy(((PetscObject)matin)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr); 3224 ierr = PetscMapCopy(((PetscObject)matin)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr); 3225 3226 a->size = oldmat->size; 3227 a->rank = oldmat->rank; 3228 a->donotstash = oldmat->donotstash; 3229 a->roworiented = oldmat->roworiented; 3230 a->rowindices = 0; 3231 a->rowvalues = 0; 3232 a->getrowactive = PETSC_FALSE; 3233 a->barray = 0; 3234 a->rstartbs = oldmat->rstartbs; 3235 a->rendbs = oldmat->rendbs; 3236 a->cstartbs = oldmat->cstartbs; 3237 a->cendbs = oldmat->cendbs; 3238 3239 /* hash table stuff */ 3240 a->ht = 0; 3241 a->hd = 0; 3242 a->ht_size = 0; 3243 a->ht_flag = oldmat->ht_flag; 3244 a->ht_fact = oldmat->ht_fact; 3245 a->ht_total_ct = 0; 3246 a->ht_insert_ct = 0; 3247 3248 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3249 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 3250 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3251 if (oldmat->colmap) { 3252 #if defined (PETSC_USE_CTABLE) 3253 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3254 #else 3255 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3256 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3257 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3258 #endif 3259 } else a->colmap = 0; 3260 3261 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3262 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3263 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3264 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3265 } else a->garray = 0; 3266 3267 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3268 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3269 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3270 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3271 3272 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3273 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3274 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3275 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3276 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3277 *newmat = mat; 3278 3279 PetscFunctionReturn(0); 3280 } 3281 3282 #include "petscsys.h" 3283 3284 #undef __FUNCT__ 3285 #define __FUNCT__ "MatLoad_MPIBAIJ" 3286 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 3287 { 3288 Mat A; 3289 PetscErrorCode ierr; 3290 int fd; 3291 PetscInt i,nz,j,rstart,rend; 3292 PetscScalar *vals,*buf; 3293 MPI_Comm comm = ((PetscObject)viewer)->comm; 3294 MPI_Status status; 3295 PetscMPIInt rank,size,maxnz; 3296 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3297 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3298 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3299 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3300 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3301 PetscInt dcount,kmax,k,nzcount,tmp,mend; 3302 3303 PetscFunctionBegin; 3304 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3305 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3306 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3307 3308 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3309 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3310 if (!rank) { 3311 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3312 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3313 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3314 } 3315 3316 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3317 M = header[1]; N = header[2]; 3318 3319 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 3320 3321 /* 3322 This code adds extra rows to make sure the number of rows is 3323 divisible by the blocksize 3324 */ 3325 Mbs = M/bs; 3326 extra_rows = bs - M + bs*Mbs; 3327 if (extra_rows == bs) extra_rows = 0; 3328 else Mbs++; 3329 if (extra_rows && !rank) { 3330 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3331 } 3332 3333 /* determine ownership of all rows */ 3334 mbs = Mbs/size + ((Mbs % size) > rank); 3335 m = mbs*bs; 3336 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3337 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3338 3339 /* process 0 needs enough room for process with most rows */ 3340 if (!rank) { 3341 mmax = rowners[1]; 3342 for (i=2; i<size; i++) { 3343 mmax = PetscMax(mmax,rowners[i]); 3344 } 3345 mmax*=bs; 3346 } else mmax = m; 3347 3348 rowners[0] = 0; 3349 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3350 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3351 rstart = rowners[rank]; 3352 rend = rowners[rank+1]; 3353 3354 /* distribute row lengths to all processors */ 3355 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3356 if (!rank) { 3357 mend = m; 3358 if (size == 1) mend = mend - extra_rows; 3359 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3360 for (j=mend; j<m; j++) locrowlens[j] = 1; 3361 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3362 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3363 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3364 for (j=0; j<m; j++) { 3365 procsnz[0] += locrowlens[j]; 3366 } 3367 for (i=1; i<size; i++) { 3368 mend = browners[i+1] - browners[i]; 3369 if (i == size-1) mend = mend - extra_rows; 3370 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3371 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3372 /* calculate the number of nonzeros on each processor */ 3373 for (j=0; j<browners[i+1]-browners[i]; j++) { 3374 procsnz[i] += rowlengths[j]; 3375 } 3376 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3377 } 3378 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3379 } else { 3380 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3381 } 3382 3383 if (!rank) { 3384 /* determine max buffer needed and allocate it */ 3385 maxnz = procsnz[0]; 3386 for (i=1; i<size; i++) { 3387 maxnz = PetscMax(maxnz,procsnz[i]); 3388 } 3389 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3390 3391 /* read in my part of the matrix column indices */ 3392 nz = procsnz[0]; 3393 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3394 mycols = ibuf; 3395 if (size == 1) nz -= extra_rows; 3396 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3397 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3398 3399 /* read in every ones (except the last) and ship off */ 3400 for (i=1; i<size-1; i++) { 3401 nz = procsnz[i]; 3402 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3403 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3404 } 3405 /* read in the stuff for the last proc */ 3406 if (size != 1) { 3407 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3408 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3409 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3410 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3411 } 3412 ierr = PetscFree(cols);CHKERRQ(ierr); 3413 } else { 3414 /* determine buffer space needed for message */ 3415 nz = 0; 3416 for (i=0; i<m; i++) { 3417 nz += locrowlens[i]; 3418 } 3419 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3420 mycols = ibuf; 3421 /* receive message of column indices*/ 3422 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3423 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3424 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3425 } 3426 3427 /* loop over local rows, determining number of off diagonal entries */ 3428 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3429 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3430 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3431 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3432 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3433 rowcount = 0; nzcount = 0; 3434 for (i=0; i<mbs; i++) { 3435 dcount = 0; 3436 odcount = 0; 3437 for (j=0; j<bs; j++) { 3438 kmax = locrowlens[rowcount]; 3439 for (k=0; k<kmax; k++) { 3440 tmp = mycols[nzcount++]/bs; 3441 if (!mask[tmp]) { 3442 mask[tmp] = 1; 3443 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3444 else masked1[dcount++] = tmp; 3445 } 3446 } 3447 rowcount++; 3448 } 3449 3450 dlens[i] = dcount; 3451 odlens[i] = odcount; 3452 3453 /* zero out the mask elements we set */ 3454 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3455 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3456 } 3457 3458 /* create our matrix */ 3459 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 3460 ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3461 ierr = MatSetType(A,type);CHKERRQ(ierr) 3462 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3463 3464 if (!rank) { 3465 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3466 /* read in my part of the matrix numerical values */ 3467 nz = procsnz[0]; 3468 vals = buf; 3469 mycols = ibuf; 3470 if (size == 1) nz -= extra_rows; 3471 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3472 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3473 3474 /* insert into matrix */ 3475 jj = rstart*bs; 3476 for (i=0; i<m; i++) { 3477 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3478 mycols += locrowlens[i]; 3479 vals += locrowlens[i]; 3480 jj++; 3481 } 3482 /* read in other processors (except the last one) and ship out */ 3483 for (i=1; i<size-1; i++) { 3484 nz = procsnz[i]; 3485 vals = buf; 3486 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3487 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3488 } 3489 /* the last proc */ 3490 if (size != 1){ 3491 nz = procsnz[i] - extra_rows; 3492 vals = buf; 3493 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3494 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3495 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3496 } 3497 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3498 } else { 3499 /* receive numeric values */ 3500 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3501 3502 /* receive message of values*/ 3503 vals = buf; 3504 mycols = ibuf; 3505 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 3506 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3507 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3508 3509 /* insert into matrix */ 3510 jj = rstart*bs; 3511 for (i=0; i<m; i++) { 3512 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3513 mycols += locrowlens[i]; 3514 vals += locrowlens[i]; 3515 jj++; 3516 } 3517 } 3518 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3519 ierr = PetscFree(buf);CHKERRQ(ierr); 3520 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3521 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3522 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3523 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3524 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3525 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3526 3527 *newmat = A; 3528 PetscFunctionReturn(0); 3529 } 3530 3531 #undef __FUNCT__ 3532 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3533 /*@ 3534 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3535 3536 Input Parameters: 3537 . mat - the matrix 3538 . fact - factor 3539 3540 Collective on Mat 3541 3542 Level: advanced 3543 3544 Notes: 3545 This can also be set by the command line option: -mat_use_hash_table <fact> 3546 3547 .keywords: matrix, hashtable, factor, HT 3548 3549 .seealso: MatSetOption() 3550 @*/ 3551 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3552 { 3553 PetscErrorCode ierr,(*f)(Mat,PetscReal); 3554 3555 PetscFunctionBegin; 3556 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 3557 if (f) { 3558 ierr = (*f)(mat,fact);CHKERRQ(ierr); 3559 } 3560 PetscFunctionReturn(0); 3561 } 3562 3563 EXTERN_C_BEGIN 3564 #undef __FUNCT__ 3565 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3566 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3567 { 3568 Mat_MPIBAIJ *baij; 3569 3570 PetscFunctionBegin; 3571 baij = (Mat_MPIBAIJ*)mat->data; 3572 baij->ht_fact = fact; 3573 PetscFunctionReturn(0); 3574 } 3575 EXTERN_C_END 3576 3577 #undef __FUNCT__ 3578 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3579 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3580 { 3581 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3582 PetscFunctionBegin; 3583 *Ad = a->A; 3584 *Ao = a->B; 3585 *colmap = a->garray; 3586 PetscFunctionReturn(0); 3587 } 3588 3589 /* 3590 Special version for direct calls from Fortran (to eliminate two function call overheads 3591 */ 3592 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3593 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3594 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3595 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3596 #endif 3597 3598 #undef __FUNCT__ 3599 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3600 /*@C 3601 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3602 3603 Collective on Mat 3604 3605 Input Parameters: 3606 + mat - the matrix 3607 . min - number of input rows 3608 . im - input rows 3609 . nin - number of input columns 3610 . in - input columns 3611 . v - numerical values input 3612 - addvin - INSERT_VALUES or ADD_VALUES 3613 3614 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3615 3616 Level: advanced 3617 3618 .seealso: MatSetValuesBlocked() 3619 @*/ 3620 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3621 { 3622 /* convert input arguments to C version */ 3623 Mat mat = *matin; 3624 PetscInt m = *min, n = *nin; 3625 InsertMode addv = *addvin; 3626 3627 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3628 const MatScalar *value; 3629 MatScalar *barray=baij->barray; 3630 PetscTruth roworiented = baij->roworiented; 3631 PetscErrorCode ierr; 3632 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3633 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3634 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3635 3636 PetscFunctionBegin; 3637 /* tasks normally handled by MatSetValuesBlocked() */ 3638 if (mat->insertmode == NOT_SET_VALUES) { 3639 mat->insertmode = addv; 3640 } 3641 #if defined(PETSC_USE_DEBUG) 3642 else if (mat->insertmode != addv) { 3643 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3644 } 3645 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3646 #endif 3647 if (mat->assembled) { 3648 mat->was_assembled = PETSC_TRUE; 3649 mat->assembled = PETSC_FALSE; 3650 } 3651 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3652 3653 3654 if(!barray) { 3655 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3656 baij->barray = barray; 3657 } 3658 3659 if (roworiented) { 3660 stepval = (n-1)*bs; 3661 } else { 3662 stepval = (m-1)*bs; 3663 } 3664 for (i=0; i<m; i++) { 3665 if (im[i] < 0) continue; 3666 #if defined(PETSC_USE_DEBUG) 3667 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3668 #endif 3669 if (im[i] >= rstart && im[i] < rend) { 3670 row = im[i] - rstart; 3671 for (j=0; j<n; j++) { 3672 /* If NumCol = 1 then a copy is not required */ 3673 if ((roworiented) && (n == 1)) { 3674 barray = (MatScalar*)v + i*bs2; 3675 } else if((!roworiented) && (m == 1)) { 3676 barray = (MatScalar*)v + j*bs2; 3677 } else { /* Here a copy is required */ 3678 if (roworiented) { 3679 value = v + i*(stepval+bs)*bs + j*bs; 3680 } else { 3681 value = v + j*(stepval+bs)*bs + i*bs; 3682 } 3683 for (ii=0; ii<bs; ii++,value+=stepval) { 3684 for (jj=0; jj<bs; jj++) { 3685 *barray++ = *value++; 3686 } 3687 } 3688 barray -=bs2; 3689 } 3690 3691 if (in[j] >= cstart && in[j] < cend){ 3692 col = in[j] - cstart; 3693 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3694 } 3695 else if (in[j] < 0) continue; 3696 #if defined(PETSC_USE_DEBUG) 3697 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 3698 #endif 3699 else { 3700 if (mat->was_assembled) { 3701 if (!baij->colmap) { 3702 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3703 } 3704 3705 #if defined(PETSC_USE_DEBUG) 3706 #if defined (PETSC_USE_CTABLE) 3707 { PetscInt data; 3708 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3709 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 3710 } 3711 #else 3712 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 3713 #endif 3714 #endif 3715 #if defined (PETSC_USE_CTABLE) 3716 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3717 col = (col - 1)/bs; 3718 #else 3719 col = (baij->colmap[in[j]] - 1)/bs; 3720 #endif 3721 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3722 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3723 col = in[j]; 3724 } 3725 } 3726 else col = in[j]; 3727 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3728 } 3729 } 3730 } else { 3731 if (!baij->donotstash) { 3732 if (roworiented) { 3733 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3734 } else { 3735 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3736 } 3737 } 3738 } 3739 } 3740 3741 /* task normally handled by MatSetValuesBlocked() */ 3742 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3743 PetscFunctionReturn(0); 3744 } 3745