1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 8 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 9 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 10 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 11 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 12 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 13 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 16 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 19 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 20 { 21 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 22 PetscErrorCode ierr; 23 PetscInt i,*idxb = 0; 24 PetscScalar *va,*vb; 25 Vec vtmp; 26 27 PetscFunctionBegin; 28 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 29 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 30 if (idx) { 31 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 32 } 33 34 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 35 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 36 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 37 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 38 39 for (i=0; i<A->rmap->n; i++){ 40 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 41 } 42 43 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 44 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 45 if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);} 46 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 47 PetscFunctionReturn(0); 48 } 49 50 EXTERN_C_BEGIN 51 #undef __FUNCT__ 52 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 53 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 54 { 55 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 56 PetscErrorCode ierr; 57 58 PetscFunctionBegin; 59 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 60 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 61 PetscFunctionReturn(0); 62 } 63 EXTERN_C_END 64 65 EXTERN_C_BEGIN 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 68 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 69 { 70 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 75 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 76 PetscFunctionReturn(0); 77 } 78 EXTERN_C_END 79 80 /* 81 Local utility routine that creates a mapping from the global column 82 number to the local number in the off-diagonal part of the local 83 storage of the matrix. This is done in a non scalable way since the 84 length of colmap equals the global matrix length. 85 */ 86 #undef __FUNCT__ 87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 89 { 90 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 91 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 92 PetscErrorCode ierr; 93 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 94 95 PetscFunctionBegin; 96 #if defined (PETSC_USE_CTABLE) 97 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 98 for (i=0; i<nbs; i++){ 99 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 100 } 101 #else 102 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 103 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 105 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 106 #endif 107 PetscFunctionReturn(0); 108 } 109 110 #define CHUNKSIZE 10 111 112 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 113 { \ 114 \ 115 brow = row/bs; \ 116 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 117 rmax = aimax[brow]; nrow = ailen[brow]; \ 118 bcol = col/bs; \ 119 ridx = row % bs; cidx = col % bs; \ 120 low = 0; high = nrow; \ 121 while (high-low > 3) { \ 122 t = (low+high)/2; \ 123 if (rp[t] > bcol) high = t; \ 124 else low = t; \ 125 } \ 126 for (_i=low; _i<high; _i++) { \ 127 if (rp[_i] > bcol) break; \ 128 if (rp[_i] == bcol) { \ 129 bap = ap + bs2*_i + bs*cidx + ridx; \ 130 if (addv == ADD_VALUES) *bap += value; \ 131 else *bap = value; \ 132 goto a_noinsert; \ 133 } \ 134 } \ 135 if (a->nonew == 1) goto a_noinsert; \ 136 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 137 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 138 N = nrow++ - 1; \ 139 /* shift up all the later entries in this row */ \ 140 for (ii=N; ii>=_i; ii--) { \ 141 rp[ii+1] = rp[ii]; \ 142 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 143 } \ 144 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 145 rp[_i] = bcol; \ 146 ap[bs2*_i + bs*cidx + ridx] = value; \ 147 a_noinsert:; \ 148 ailen[brow] = nrow; \ 149 } 150 151 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 152 { \ 153 brow = row/bs; \ 154 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 155 rmax = bimax[brow]; nrow = bilen[brow]; \ 156 bcol = col/bs; \ 157 ridx = row % bs; cidx = col % bs; \ 158 low = 0; high = nrow; \ 159 while (high-low > 3) { \ 160 t = (low+high)/2; \ 161 if (rp[t] > bcol) high = t; \ 162 else low = t; \ 163 } \ 164 for (_i=low; _i<high; _i++) { \ 165 if (rp[_i] > bcol) break; \ 166 if (rp[_i] == bcol) { \ 167 bap = ap + bs2*_i + bs*cidx + ridx; \ 168 if (addv == ADD_VALUES) *bap += value; \ 169 else *bap = value; \ 170 goto b_noinsert; \ 171 } \ 172 } \ 173 if (b->nonew == 1) goto b_noinsert; \ 174 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 175 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 176 CHKMEMQ;\ 177 N = nrow++ - 1; \ 178 /* shift up all the later entries in this row */ \ 179 for (ii=N; ii>=_i; ii--) { \ 180 rp[ii+1] = rp[ii]; \ 181 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 182 } \ 183 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 184 rp[_i] = bcol; \ 185 ap[bs2*_i + bs*cidx + ridx] = value; \ 186 b_noinsert:; \ 187 bilen[brow] = nrow; \ 188 } 189 190 #undef __FUNCT__ 191 #define __FUNCT__ "MatSetValues_MPIBAIJ" 192 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 193 { 194 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 195 MatScalar value; 196 PetscTruth roworiented = baij->roworiented; 197 PetscErrorCode ierr; 198 PetscInt i,j,row,col; 199 PetscInt rstart_orig=mat->rmap->rstart; 200 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 201 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 202 203 /* Some Variables required in the macro */ 204 Mat A = baij->A; 205 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 206 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 207 MatScalar *aa=a->a; 208 209 Mat B = baij->B; 210 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 211 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 212 MatScalar *ba=b->a; 213 214 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 215 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 216 MatScalar *ap,*bap; 217 218 PetscFunctionBegin; 219 if (v) PetscValidScalarPointer(v,6); 220 for (i=0; i<m; i++) { 221 if (im[i] < 0) continue; 222 #if defined(PETSC_USE_DEBUG) 223 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 224 #endif 225 if (im[i] >= rstart_orig && im[i] < rend_orig) { 226 row = im[i] - rstart_orig; 227 for (j=0; j<n; j++) { 228 if (in[j] >= cstart_orig && in[j] < cend_orig){ 229 col = in[j] - cstart_orig; 230 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 231 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 232 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 233 } else if (in[j] < 0) continue; 234 #if defined(PETSC_USE_DEBUG) 235 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1);} 236 #endif 237 else { 238 if (mat->was_assembled) { 239 if (!baij->colmap) { 240 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 241 } 242 #if defined (PETSC_USE_CTABLE) 243 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 244 col = col - 1; 245 #else 246 col = baij->colmap[in[j]/bs] - 1; 247 #endif 248 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 249 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 250 col = in[j]; 251 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 252 B = baij->B; 253 b = (Mat_SeqBAIJ*)(B)->data; 254 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 255 ba=b->a; 256 } else col += in[j]%bs; 257 } else col = in[j]; 258 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 259 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!baij->donotstash) { 265 if (roworiented) { 266 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 267 } else { 268 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 269 } 270 } 271 } 272 } 273 PetscFunctionReturn(0); 274 } 275 276 #undef __FUNCT__ 277 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 278 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 279 { 280 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 281 const PetscScalar *value; 282 MatScalar *barray=baij->barray; 283 PetscTruth roworiented = baij->roworiented; 284 PetscErrorCode ierr; 285 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 286 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 287 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 288 289 PetscFunctionBegin; 290 if(!barray) { 291 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 292 baij->barray = barray; 293 } 294 295 if (roworiented) { 296 stepval = (n-1)*bs; 297 } else { 298 stepval = (m-1)*bs; 299 } 300 for (i=0; i<m; i++) { 301 if (im[i] < 0) continue; 302 #if defined(PETSC_USE_DEBUG) 303 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 304 #endif 305 if (im[i] >= rstart && im[i] < rend) { 306 row = im[i] - rstart; 307 for (j=0; j<n; j++) { 308 /* If NumCol = 1 then a copy is not required */ 309 if ((roworiented) && (n == 1)) { 310 barray = (MatScalar*)v + i*bs2; 311 } else if((!roworiented) && (m == 1)) { 312 barray = (MatScalar*)v + j*bs2; 313 } else { /* Here a copy is required */ 314 if (roworiented) { 315 value = v + i*(stepval+bs)*bs + j*bs; 316 } else { 317 value = v + j*(stepval+bs)*bs + i*bs; 318 } 319 for (ii=0; ii<bs; ii++,value+=stepval) { 320 for (jj=0; jj<bs; jj++) { 321 *barray++ = *value++; 322 } 323 } 324 barray -=bs2; 325 } 326 327 if (in[j] >= cstart && in[j] < cend){ 328 col = in[j] - cstart; 329 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 330 } 331 else if (in[j] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 334 #endif 335 else { 336 if (mat->was_assembled) { 337 if (!baij->colmap) { 338 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 339 } 340 341 #if defined(PETSC_USE_DEBUG) 342 #if defined (PETSC_USE_CTABLE) 343 { PetscInt data; 344 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 345 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 346 } 347 #else 348 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 349 #endif 350 #endif 351 #if defined (PETSC_USE_CTABLE) 352 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 353 col = (col - 1)/bs; 354 #else 355 col = (baij->colmap[in[j]] - 1)/bs; 356 #endif 357 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 358 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 359 col = in[j]; 360 } 361 } 362 else col = in[j]; 363 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 364 } 365 } 366 } else { 367 if (!baij->donotstash) { 368 if (roworiented) { 369 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 370 } else { 371 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 372 } 373 } 374 } 375 } 376 PetscFunctionReturn(0); 377 } 378 379 #define HASH_KEY 0.6180339887 380 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 381 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 382 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 383 #undef __FUNCT__ 384 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 385 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 386 { 387 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 388 PetscTruth roworiented = baij->roworiented; 389 PetscErrorCode ierr; 390 PetscInt i,j,row,col; 391 PetscInt rstart_orig=mat->rmap->rstart; 392 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 393 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 394 PetscReal tmp; 395 MatScalar **HD = baij->hd,value; 396 #if defined(PETSC_USE_DEBUG) 397 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 398 #endif 399 400 PetscFunctionBegin; 401 if (v) PetscValidScalarPointer(v,6); 402 for (i=0; i<m; i++) { 403 #if defined(PETSC_USE_DEBUG) 404 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 405 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 406 #endif 407 row = im[i]; 408 if (row >= rstart_orig && row < rend_orig) { 409 for (j=0; j<n; j++) { 410 col = in[j]; 411 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 412 /* Look up PetscInto the Hash Table */ 413 key = (row/bs)*Nbs+(col/bs)+1; 414 h1 = HASH(size,key,tmp); 415 416 417 idx = h1; 418 #if defined(PETSC_USE_DEBUG) 419 insert_ct++; 420 total_ct++; 421 if (HT[idx] != key) { 422 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 423 if (idx == size) { 424 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 425 if (idx == h1) { 426 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 427 } 428 } 429 } 430 #else 431 if (HT[idx] != key) { 432 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 433 if (idx == size) { 434 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 435 if (idx == h1) { 436 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 437 } 438 } 439 } 440 #endif 441 /* A HASH table entry is found, so insert the values at the correct address */ 442 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 443 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 444 } 445 } else { 446 if (!baij->donotstash) { 447 if (roworiented) { 448 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 449 } else { 450 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 451 } 452 } 453 } 454 } 455 #if defined(PETSC_USE_DEBUG) 456 baij->ht_total_ct = total_ct; 457 baij->ht_insert_ct = insert_ct; 458 #endif 459 PetscFunctionReturn(0); 460 } 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 464 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 465 { 466 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 467 PetscTruth roworiented = baij->roworiented; 468 PetscErrorCode ierr; 469 PetscInt i,j,ii,jj,row,col; 470 PetscInt rstart=baij->rstartbs; 471 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 472 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 473 PetscReal tmp; 474 MatScalar **HD = baij->hd,*baij_a; 475 const PetscScalar *v_t,*value; 476 #if defined(PETSC_USE_DEBUG) 477 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 478 #endif 479 480 PetscFunctionBegin; 481 482 if (roworiented) { 483 stepval = (n-1)*bs; 484 } else { 485 stepval = (m-1)*bs; 486 } 487 for (i=0; i<m; i++) { 488 #if defined(PETSC_USE_DEBUG) 489 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 490 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 491 #endif 492 row = im[i]; 493 v_t = v + i*nbs2; 494 if (row >= rstart && row < rend) { 495 for (j=0; j<n; j++) { 496 col = in[j]; 497 498 /* Look up into the Hash Table */ 499 key = row*Nbs+col+1; 500 h1 = HASH(size,key,tmp); 501 502 idx = h1; 503 #if defined(PETSC_USE_DEBUG) 504 total_ct++; 505 insert_ct++; 506 if (HT[idx] != key) { 507 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 508 if (idx == size) { 509 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 510 if (idx == h1) { 511 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 512 } 513 } 514 } 515 #else 516 if (HT[idx] != key) { 517 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 518 if (idx == size) { 519 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 520 if (idx == h1) { 521 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 522 } 523 } 524 } 525 #endif 526 baij_a = HD[idx]; 527 if (roworiented) { 528 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 529 /* value = v + (i*(stepval+bs)+j)*bs; */ 530 value = v_t; 531 v_t += bs; 532 if (addv == ADD_VALUES) { 533 for (ii=0; ii<bs; ii++,value+=stepval) { 534 for (jj=ii; jj<bs2; jj+=bs) { 535 baij_a[jj] += *value++; 536 } 537 } 538 } else { 539 for (ii=0; ii<bs; ii++,value+=stepval) { 540 for (jj=ii; jj<bs2; jj+=bs) { 541 baij_a[jj] = *value++; 542 } 543 } 544 } 545 } else { 546 value = v + j*(stepval+bs)*bs + i*bs; 547 if (addv == ADD_VALUES) { 548 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 549 for (jj=0; jj<bs; jj++) { 550 baij_a[jj] += *value++; 551 } 552 } 553 } else { 554 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 555 for (jj=0; jj<bs; jj++) { 556 baij_a[jj] = *value++; 557 } 558 } 559 } 560 } 561 } 562 } else { 563 if (!baij->donotstash) { 564 if (roworiented) { 565 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 566 } else { 567 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 568 } 569 } 570 } 571 } 572 #if defined(PETSC_USE_DEBUG) 573 baij->ht_total_ct = total_ct; 574 baij->ht_insert_ct = insert_ct; 575 #endif 576 PetscFunctionReturn(0); 577 } 578 579 #undef __FUNCT__ 580 #define __FUNCT__ "MatGetValues_MPIBAIJ" 581 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 582 { 583 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 584 PetscErrorCode ierr; 585 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 586 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 587 588 PetscFunctionBegin; 589 for (i=0; i<m; i++) { 590 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 591 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 592 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 593 row = idxm[i] - bsrstart; 594 for (j=0; j<n; j++) { 595 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 596 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 597 if (idxn[j] >= bscstart && idxn[j] < bscend){ 598 col = idxn[j] - bscstart; 599 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 600 } else { 601 if (!baij->colmap) { 602 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 603 } 604 #if defined (PETSC_USE_CTABLE) 605 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 606 data --; 607 #else 608 data = baij->colmap[idxn[j]/bs]-1; 609 #endif 610 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 611 else { 612 col = data + idxn[j]%bs; 613 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 614 } 615 } 616 } 617 } else { 618 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 619 } 620 } 621 PetscFunctionReturn(0); 622 } 623 624 #undef __FUNCT__ 625 #define __FUNCT__ "MatNorm_MPIBAIJ" 626 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 627 { 628 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 629 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 630 PetscErrorCode ierr; 631 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 632 PetscReal sum = 0.0; 633 MatScalar *v; 634 635 PetscFunctionBegin; 636 if (baij->size == 1) { 637 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 638 } else { 639 if (type == NORM_FROBENIUS) { 640 v = amat->a; 641 nz = amat->nz*bs2; 642 for (i=0; i<nz; i++) { 643 #if defined(PETSC_USE_COMPLEX) 644 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 645 #else 646 sum += (*v)*(*v); v++; 647 #endif 648 } 649 v = bmat->a; 650 nz = bmat->nz*bs2; 651 for (i=0; i<nz; i++) { 652 #if defined(PETSC_USE_COMPLEX) 653 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 654 #else 655 sum += (*v)*(*v); v++; 656 #endif 657 } 658 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 659 *nrm = sqrt(*nrm); 660 } else if (type == NORM_1) { /* max column sum */ 661 PetscReal *tmp,*tmp2; 662 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 663 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 664 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 665 v = amat->a; jj = amat->j; 666 for (i=0; i<amat->nz; i++) { 667 for (j=0; j<bs; j++){ 668 col = bs*(cstart + *jj) + j; /* column index */ 669 for (row=0; row<bs; row++){ 670 tmp[col] += PetscAbsScalar(*v); v++; 671 } 672 } 673 jj++; 674 } 675 v = bmat->a; jj = bmat->j; 676 for (i=0; i<bmat->nz; i++) { 677 for (j=0; j<bs; j++){ 678 col = bs*garray[*jj] + j; 679 for (row=0; row<bs; row++){ 680 tmp[col] += PetscAbsScalar(*v); v++; 681 } 682 } 683 jj++; 684 } 685 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 686 *nrm = 0.0; 687 for (j=0; j<mat->cmap->N; j++) { 688 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 689 } 690 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 691 } else if (type == NORM_INFINITY) { /* max row sum */ 692 PetscReal *sums; 693 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr) 694 sum = 0.0; 695 for (j=0; j<amat->mbs; j++) { 696 for (row=0; row<bs; row++) sums[row] = 0.0; 697 v = amat->a + bs2*amat->i[j]; 698 nz = amat->i[j+1]-amat->i[j]; 699 for (i=0; i<nz; i++) { 700 for (col=0; col<bs; col++){ 701 for (row=0; row<bs; row++){ 702 sums[row] += PetscAbsScalar(*v); v++; 703 } 704 } 705 } 706 v = bmat->a + bs2*bmat->i[j]; 707 nz = bmat->i[j+1]-bmat->i[j]; 708 for (i=0; i<nz; i++) { 709 for (col=0; col<bs; col++){ 710 for (row=0; row<bs; row++){ 711 sums[row] += PetscAbsScalar(*v); v++; 712 } 713 } 714 } 715 for (row=0; row<bs; row++){ 716 if (sums[row] > sum) sum = sums[row]; 717 } 718 } 719 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 720 ierr = PetscFree(sums);CHKERRQ(ierr); 721 } else { 722 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 723 } 724 } 725 PetscFunctionReturn(0); 726 } 727 728 /* 729 Creates the hash table, and sets the table 730 This table is created only once. 731 If new entried need to be added to the matrix 732 then the hash table has to be destroyed and 733 recreated. 734 */ 735 #undef __FUNCT__ 736 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 737 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 738 { 739 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 740 Mat A = baij->A,B=baij->B; 741 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 742 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 743 PetscErrorCode ierr; 744 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 745 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 746 PetscInt *HT,key; 747 MatScalar **HD; 748 PetscReal tmp; 749 #if defined(PETSC_USE_INFO) 750 PetscInt ct=0,max=0; 751 #endif 752 753 PetscFunctionBegin; 754 if (baij->ht) PetscFunctionReturn(0); 755 756 baij->ht_size = (PetscInt)(factor*nz); 757 ht_size = baij->ht_size; 758 759 /* Allocate Memory for Hash Table */ 760 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 761 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 762 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 763 HD = baij->hd; 764 HT = baij->ht; 765 766 /* Loop Over A */ 767 for (i=0; i<a->mbs; i++) { 768 for (j=ai[i]; j<ai[i+1]; j++) { 769 row = i+rstart; 770 col = aj[j]+cstart; 771 772 key = row*Nbs + col + 1; 773 h1 = HASH(ht_size,key,tmp); 774 for (k=0; k<ht_size; k++){ 775 if (!HT[(h1+k)%ht_size]) { 776 HT[(h1+k)%ht_size] = key; 777 HD[(h1+k)%ht_size] = a->a + j*bs2; 778 break; 779 #if defined(PETSC_USE_INFO) 780 } else { 781 ct++; 782 #endif 783 } 784 } 785 #if defined(PETSC_USE_INFO) 786 if (k> max) max = k; 787 #endif 788 } 789 } 790 /* Loop Over B */ 791 for (i=0; i<b->mbs; i++) { 792 for (j=bi[i]; j<bi[i+1]; j++) { 793 row = i+rstart; 794 col = garray[bj[j]]; 795 key = row*Nbs + col + 1; 796 h1 = HASH(ht_size,key,tmp); 797 for (k=0; k<ht_size; k++){ 798 if (!HT[(h1+k)%ht_size]) { 799 HT[(h1+k)%ht_size] = key; 800 HD[(h1+k)%ht_size] = b->a + j*bs2; 801 break; 802 #if defined(PETSC_USE_INFO) 803 } else { 804 ct++; 805 #endif 806 } 807 } 808 #if defined(PETSC_USE_INFO) 809 if (k> max) max = k; 810 #endif 811 } 812 } 813 814 /* Print Summary */ 815 #if defined(PETSC_USE_INFO) 816 for (i=0,j=0; i<ht_size; i++) { 817 if (HT[i]) {j++;} 818 } 819 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 820 #endif 821 PetscFunctionReturn(0); 822 } 823 824 #undef __FUNCT__ 825 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 826 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 827 { 828 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 829 PetscErrorCode ierr; 830 PetscInt nstash,reallocs; 831 InsertMode addv; 832 833 PetscFunctionBegin; 834 if (baij->donotstash) { 835 PetscFunctionReturn(0); 836 } 837 838 /* make sure all processors are either in INSERTMODE or ADDMODE */ 839 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 840 if (addv == (ADD_VALUES|INSERT_VALUES)) { 841 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 842 } 843 mat->insertmode = addv; /* in case this processor had no cache */ 844 845 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 846 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 847 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 848 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 849 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 850 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 851 PetscFunctionReturn(0); 852 } 853 854 #undef __FUNCT__ 855 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 856 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 857 { 858 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 859 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 860 PetscErrorCode ierr; 861 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 862 PetscInt *row,*col; 863 PetscTruth r1,r2,r3,other_disassembled; 864 MatScalar *val; 865 InsertMode addv = mat->insertmode; 866 PetscMPIInt n; 867 868 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 869 PetscFunctionBegin; 870 if (!baij->donotstash) { 871 while (1) { 872 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 873 if (!flg) break; 874 875 for (i=0; i<n;) { 876 /* Now identify the consecutive vals belonging to the same row */ 877 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 878 if (j < n) ncols = j-i; 879 else ncols = n-i; 880 /* Now assemble all these values with a single function call */ 881 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 882 i = j; 883 } 884 } 885 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 886 /* Now process the block-stash. Since the values are stashed column-oriented, 887 set the roworiented flag to column oriented, and after MatSetValues() 888 restore the original flags */ 889 r1 = baij->roworiented; 890 r2 = a->roworiented; 891 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 892 baij->roworiented = PETSC_FALSE; 893 a->roworiented = PETSC_FALSE; 894 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 895 while (1) { 896 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 897 if (!flg) break; 898 899 for (i=0; i<n;) { 900 /* Now identify the consecutive vals belonging to the same row */ 901 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 902 if (j < n) ncols = j-i; 903 else ncols = n-i; 904 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 905 i = j; 906 } 907 } 908 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 909 baij->roworiented = r1; 910 a->roworiented = r2; 911 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 912 } 913 914 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 915 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 916 917 /* determine if any processor has disassembled, if so we must 918 also disassemble ourselfs, in order that we may reassemble. */ 919 /* 920 if nonzero structure of submatrix B cannot change then we know that 921 no processor disassembled thus we can skip this stuff 922 */ 923 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 924 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 925 if (mat->was_assembled && !other_disassembled) { 926 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 927 } 928 } 929 930 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 931 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 932 } 933 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 934 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 935 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 936 937 #if defined(PETSC_USE_INFO) 938 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 939 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 940 baij->ht_total_ct = 0; 941 baij->ht_insert_ct = 0; 942 } 943 #endif 944 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 945 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 946 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 947 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 948 } 949 950 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 951 baij->rowvalues = 0; 952 PetscFunctionReturn(0); 953 } 954 955 #undef __FUNCT__ 956 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 957 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 958 { 959 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 960 PetscErrorCode ierr; 961 PetscMPIInt size = baij->size,rank = baij->rank; 962 PetscInt bs = mat->rmap->bs; 963 PetscTruth iascii,isdraw; 964 PetscViewer sviewer; 965 PetscViewerFormat format; 966 967 PetscFunctionBegin; 968 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 969 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 970 if (iascii) { 971 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 972 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 973 MatInfo info; 974 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 975 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 976 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 977 rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 978 mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 979 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 980 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 981 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 982 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 983 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 984 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 985 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 986 PetscFunctionReturn(0); 987 } else if (format == PETSC_VIEWER_ASCII_INFO) { 988 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 989 PetscFunctionReturn(0); 990 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 991 PetscFunctionReturn(0); 992 } 993 } 994 995 if (isdraw) { 996 PetscDraw draw; 997 PetscTruth isnull; 998 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 999 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1000 } 1001 1002 if (size == 1) { 1003 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1004 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1005 } else { 1006 /* assemble the entire matrix onto first processor. */ 1007 Mat A; 1008 Mat_SeqBAIJ *Aloc; 1009 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1010 MatScalar *a; 1011 1012 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1013 /* Perhaps this should be the type of mat? */ 1014 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1015 if (!rank) { 1016 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1017 } else { 1018 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1019 } 1020 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1021 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1022 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1023 1024 /* copy over the A part */ 1025 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1026 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1027 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1028 1029 for (i=0; i<mbs; i++) { 1030 rvals[0] = bs*(baij->rstartbs + i); 1031 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1032 for (j=ai[i]; j<ai[i+1]; j++) { 1033 col = (baij->cstartbs+aj[j])*bs; 1034 for (k=0; k<bs; k++) { 1035 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1036 col++; a += bs; 1037 } 1038 } 1039 } 1040 /* copy over the B part */ 1041 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1042 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1043 for (i=0; i<mbs; i++) { 1044 rvals[0] = bs*(baij->rstartbs + i); 1045 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1046 for (j=ai[i]; j<ai[i+1]; j++) { 1047 col = baij->garray[aj[j]]*bs; 1048 for (k=0; k<bs; k++) { 1049 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1050 col++; a += bs; 1051 } 1052 } 1053 } 1054 ierr = PetscFree(rvals);CHKERRQ(ierr); 1055 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1056 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1057 /* 1058 Everyone has to call to draw the matrix since the graphics waits are 1059 synchronized across all processors that share the PetscDraw object 1060 */ 1061 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1062 if (!rank) { 1063 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1064 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1065 } 1066 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1067 ierr = MatDestroy(A);CHKERRQ(ierr); 1068 } 1069 PetscFunctionReturn(0); 1070 } 1071 1072 #undef __FUNCT__ 1073 #define __FUNCT__ "MatView_MPIBAIJ" 1074 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1075 { 1076 PetscErrorCode ierr; 1077 PetscTruth iascii,isdraw,issocket,isbinary; 1078 1079 PetscFunctionBegin; 1080 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1081 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1082 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1083 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1084 if (iascii || isdraw || issocket || isbinary) { 1085 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1086 } else { 1087 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1088 } 1089 PetscFunctionReturn(0); 1090 } 1091 1092 #undef __FUNCT__ 1093 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1094 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1095 { 1096 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1097 PetscErrorCode ierr; 1098 1099 PetscFunctionBegin; 1100 #if defined(PETSC_USE_LOG) 1101 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1102 #endif 1103 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1104 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1105 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1106 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1107 #if defined (PETSC_USE_CTABLE) 1108 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 1109 #else 1110 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1111 #endif 1112 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1113 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1114 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1115 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1116 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1117 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1118 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1119 ierr = PetscFree(baij);CHKERRQ(ierr); 1120 1121 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1122 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1123 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1124 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1125 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1126 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1127 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1128 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1129 PetscFunctionReturn(0); 1130 } 1131 1132 #undef __FUNCT__ 1133 #define __FUNCT__ "MatMult_MPIBAIJ" 1134 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1135 { 1136 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1137 PetscErrorCode ierr; 1138 PetscInt nt; 1139 1140 PetscFunctionBegin; 1141 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1142 if (nt != A->cmap->n) { 1143 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1144 } 1145 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1146 if (nt != A->rmap->n) { 1147 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1148 } 1149 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1150 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1151 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1152 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1153 PetscFunctionReturn(0); 1154 } 1155 1156 #undef __FUNCT__ 1157 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1158 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1159 { 1160 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1161 PetscErrorCode ierr; 1162 1163 PetscFunctionBegin; 1164 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1165 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1166 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1167 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1168 PetscFunctionReturn(0); 1169 } 1170 1171 #undef __FUNCT__ 1172 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1173 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1174 { 1175 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1176 PetscErrorCode ierr; 1177 PetscTruth merged; 1178 1179 PetscFunctionBegin; 1180 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1181 /* do nondiagonal part */ 1182 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1183 if (!merged) { 1184 /* send it on its way */ 1185 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1186 /* do local part */ 1187 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1188 /* receive remote parts: note this assumes the values are not actually */ 1189 /* inserted in yy until the next line */ 1190 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1191 } else { 1192 /* do local part */ 1193 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1194 /* send it on its way */ 1195 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1196 /* values actually were received in the Begin() but we need to call this nop */ 1197 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1198 } 1199 PetscFunctionReturn(0); 1200 } 1201 1202 #undef __FUNCT__ 1203 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1204 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1205 { 1206 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1207 PetscErrorCode ierr; 1208 1209 PetscFunctionBegin; 1210 /* do nondiagonal part */ 1211 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1212 /* send it on its way */ 1213 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1214 /* do local part */ 1215 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1216 /* receive remote parts: note this assumes the values are not actually */ 1217 /* inserted in yy until the next line, which is true for my implementation*/ 1218 /* but is not perhaps always true. */ 1219 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1220 PetscFunctionReturn(0); 1221 } 1222 1223 /* 1224 This only works correctly for square matrices where the subblock A->A is the 1225 diagonal block 1226 */ 1227 #undef __FUNCT__ 1228 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1229 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1230 { 1231 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1232 PetscErrorCode ierr; 1233 1234 PetscFunctionBegin; 1235 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1236 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 #undef __FUNCT__ 1241 #define __FUNCT__ "MatScale_MPIBAIJ" 1242 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1243 { 1244 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1245 PetscErrorCode ierr; 1246 1247 PetscFunctionBegin; 1248 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1249 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1250 PetscFunctionReturn(0); 1251 } 1252 1253 #undef __FUNCT__ 1254 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1255 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1256 { 1257 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1258 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1259 PetscErrorCode ierr; 1260 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1261 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1262 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1263 1264 PetscFunctionBegin; 1265 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1266 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1267 mat->getrowactive = PETSC_TRUE; 1268 1269 if (!mat->rowvalues && (idx || v)) { 1270 /* 1271 allocate enough space to hold information from the longest row. 1272 */ 1273 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1274 PetscInt max = 1,mbs = mat->mbs,tmp; 1275 for (i=0; i<mbs; i++) { 1276 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1277 if (max < tmp) { max = tmp; } 1278 } 1279 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1280 } 1281 lrow = row - brstart; 1282 1283 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1284 if (!v) {pvA = 0; pvB = 0;} 1285 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1286 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1287 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1288 nztot = nzA + nzB; 1289 1290 cmap = mat->garray; 1291 if (v || idx) { 1292 if (nztot) { 1293 /* Sort by increasing column numbers, assuming A and B already sorted */ 1294 PetscInt imark = -1; 1295 if (v) { 1296 *v = v_p = mat->rowvalues; 1297 for (i=0; i<nzB; i++) { 1298 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1299 else break; 1300 } 1301 imark = i; 1302 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1303 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1304 } 1305 if (idx) { 1306 *idx = idx_p = mat->rowindices; 1307 if (imark > -1) { 1308 for (i=0; i<imark; i++) { 1309 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1310 } 1311 } else { 1312 for (i=0; i<nzB; i++) { 1313 if (cmap[cworkB[i]/bs] < cstart) 1314 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1315 else break; 1316 } 1317 imark = i; 1318 } 1319 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1320 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1321 } 1322 } else { 1323 if (idx) *idx = 0; 1324 if (v) *v = 0; 1325 } 1326 } 1327 *nz = nztot; 1328 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1329 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1330 PetscFunctionReturn(0); 1331 } 1332 1333 #undef __FUNCT__ 1334 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1335 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1336 { 1337 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1338 1339 PetscFunctionBegin; 1340 if (!baij->getrowactive) { 1341 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1342 } 1343 baij->getrowactive = PETSC_FALSE; 1344 PetscFunctionReturn(0); 1345 } 1346 1347 #undef __FUNCT__ 1348 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1349 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1350 { 1351 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1352 PetscErrorCode ierr; 1353 1354 PetscFunctionBegin; 1355 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1356 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1357 PetscFunctionReturn(0); 1358 } 1359 1360 #undef __FUNCT__ 1361 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1362 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1363 { 1364 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1365 Mat A = a->A,B = a->B; 1366 PetscErrorCode ierr; 1367 PetscReal isend[5],irecv[5]; 1368 1369 PetscFunctionBegin; 1370 info->block_size = (PetscReal)matin->rmap->bs; 1371 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1372 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1373 isend[3] = info->memory; isend[4] = info->mallocs; 1374 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1375 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1376 isend[3] += info->memory; isend[4] += info->mallocs; 1377 if (flag == MAT_LOCAL) { 1378 info->nz_used = isend[0]; 1379 info->nz_allocated = isend[1]; 1380 info->nz_unneeded = isend[2]; 1381 info->memory = isend[3]; 1382 info->mallocs = isend[4]; 1383 } else if (flag == MAT_GLOBAL_MAX) { 1384 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1385 info->nz_used = irecv[0]; 1386 info->nz_allocated = irecv[1]; 1387 info->nz_unneeded = irecv[2]; 1388 info->memory = irecv[3]; 1389 info->mallocs = irecv[4]; 1390 } else if (flag == MAT_GLOBAL_SUM) { 1391 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1392 info->nz_used = irecv[0]; 1393 info->nz_allocated = irecv[1]; 1394 info->nz_unneeded = irecv[2]; 1395 info->memory = irecv[3]; 1396 info->mallocs = irecv[4]; 1397 } else { 1398 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1399 } 1400 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1401 info->fill_ratio_needed = 0; 1402 info->factor_mallocs = 0; 1403 PetscFunctionReturn(0); 1404 } 1405 1406 #undef __FUNCT__ 1407 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1408 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg) 1409 { 1410 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1411 PetscErrorCode ierr; 1412 1413 PetscFunctionBegin; 1414 switch (op) { 1415 case MAT_NEW_NONZERO_LOCATIONS: 1416 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1417 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1418 case MAT_KEEP_NONZERO_PATTERN: 1419 case MAT_NEW_NONZERO_LOCATION_ERR: 1420 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1421 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1422 break; 1423 case MAT_ROW_ORIENTED: 1424 a->roworiented = flg; 1425 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1426 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1427 break; 1428 case MAT_NEW_DIAGONALS: 1429 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1430 break; 1431 case MAT_IGNORE_OFF_PROC_ENTRIES: 1432 a->donotstash = flg; 1433 break; 1434 case MAT_USE_HASH_TABLE: 1435 a->ht_flag = flg; 1436 break; 1437 case MAT_SYMMETRIC: 1438 case MAT_STRUCTURALLY_SYMMETRIC: 1439 case MAT_HERMITIAN: 1440 case MAT_SYMMETRY_ETERNAL: 1441 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1442 break; 1443 default: 1444 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1445 } 1446 PetscFunctionReturn(0); 1447 } 1448 1449 #undef __FUNCT__ 1450 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1451 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1452 { 1453 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1454 Mat_SeqBAIJ *Aloc; 1455 Mat B; 1456 PetscErrorCode ierr; 1457 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1458 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1459 MatScalar *a; 1460 1461 PetscFunctionBegin; 1462 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1463 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1464 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1465 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1466 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1467 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1468 } else { 1469 B = *matout; 1470 } 1471 1472 /* copy over the A part */ 1473 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1474 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1475 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1476 1477 for (i=0; i<mbs; i++) { 1478 rvals[0] = bs*(baij->rstartbs + i); 1479 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1480 for (j=ai[i]; j<ai[i+1]; j++) { 1481 col = (baij->cstartbs+aj[j])*bs; 1482 for (k=0; k<bs; k++) { 1483 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1484 col++; a += bs; 1485 } 1486 } 1487 } 1488 /* copy over the B part */ 1489 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1490 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1491 for (i=0; i<mbs; i++) { 1492 rvals[0] = bs*(baij->rstartbs + i); 1493 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1494 for (j=ai[i]; j<ai[i+1]; j++) { 1495 col = baij->garray[aj[j]]*bs; 1496 for (k=0; k<bs; k++) { 1497 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1498 col++; a += bs; 1499 } 1500 } 1501 } 1502 ierr = PetscFree(rvals);CHKERRQ(ierr); 1503 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1504 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1505 1506 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1507 *matout = B; 1508 } else { 1509 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1510 } 1511 PetscFunctionReturn(0); 1512 } 1513 1514 #undef __FUNCT__ 1515 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1516 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1517 { 1518 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1519 Mat a = baij->A,b = baij->B; 1520 PetscErrorCode ierr; 1521 PetscInt s1,s2,s3; 1522 1523 PetscFunctionBegin; 1524 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1525 if (rr) { 1526 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1527 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1528 /* Overlap communication with computation. */ 1529 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1530 } 1531 if (ll) { 1532 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1533 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1534 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1535 } 1536 /* scale the diagonal block */ 1537 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1538 1539 if (rr) { 1540 /* Do a scatter end and then right scale the off-diagonal block */ 1541 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1542 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1543 } 1544 1545 PetscFunctionReturn(0); 1546 } 1547 1548 #undef __FUNCT__ 1549 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1550 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 1551 { 1552 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1553 PetscErrorCode ierr; 1554 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1555 PetscInt i,*owners = A->rmap->range; 1556 PetscInt *nprocs,j,idx,nsends,row; 1557 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1558 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1559 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1560 MPI_Comm comm = ((PetscObject)A)->comm; 1561 MPI_Request *send_waits,*recv_waits; 1562 MPI_Status recv_status,*send_status; 1563 #if defined(PETSC_DEBUG) 1564 PetscTruth found = PETSC_FALSE; 1565 #endif 1566 1567 PetscFunctionBegin; 1568 /* first count number of contributors to each processor */ 1569 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1570 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1571 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1572 j = 0; 1573 for (i=0; i<N; i++) { 1574 if (lastidx > (idx = rows[i])) j = 0; 1575 lastidx = idx; 1576 for (; j<size; j++) { 1577 if (idx >= owners[j] && idx < owners[j+1]) { 1578 nprocs[2*j]++; 1579 nprocs[2*j+1] = 1; 1580 owner[i] = j; 1581 #if defined(PETSC_DEBUG) 1582 found = PETSC_TRUE; 1583 #endif 1584 break; 1585 } 1586 } 1587 #if defined(PETSC_DEBUG) 1588 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1589 found = PETSC_FALSE; 1590 #endif 1591 } 1592 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1593 1594 /* inform other processors of number of messages and max length*/ 1595 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1596 1597 /* post receives: */ 1598 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1599 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1600 for (i=0; i<nrecvs; i++) { 1601 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1602 } 1603 1604 /* do sends: 1605 1) starts[i] gives the starting index in svalues for stuff going to 1606 the ith processor 1607 */ 1608 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1609 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1610 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1611 starts[0] = 0; 1612 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1613 for (i=0; i<N; i++) { 1614 svalues[starts[owner[i]]++] = rows[i]; 1615 } 1616 1617 starts[0] = 0; 1618 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1619 count = 0; 1620 for (i=0; i<size; i++) { 1621 if (nprocs[2*i+1]) { 1622 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1623 } 1624 } 1625 ierr = PetscFree(starts);CHKERRQ(ierr); 1626 1627 base = owners[rank]; 1628 1629 /* wait on receives */ 1630 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1631 count = nrecvs; 1632 slen = 0; 1633 while (count) { 1634 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1635 /* unpack receives into our local space */ 1636 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1637 source[imdex] = recv_status.MPI_SOURCE; 1638 lens[imdex] = n; 1639 slen += n; 1640 count--; 1641 } 1642 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1643 1644 /* move the data into the send scatter */ 1645 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1646 count = 0; 1647 for (i=0; i<nrecvs; i++) { 1648 values = rvalues + i*nmax; 1649 for (j=0; j<lens[i]; j++) { 1650 lrows[count++] = values[j] - base; 1651 } 1652 } 1653 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1654 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1655 ierr = PetscFree(owner);CHKERRQ(ierr); 1656 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1657 1658 /* actually zap the local rows */ 1659 /* 1660 Zero the required rows. If the "diagonal block" of the matrix 1661 is square and the user wishes to set the diagonal we use separate 1662 code so that MatSetValues() is not called for each diagonal allocating 1663 new memory, thus calling lots of mallocs and slowing things down. 1664 1665 */ 1666 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1667 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1668 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1669 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1670 } else if (diag != 0.0) { 1671 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1672 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1673 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1674 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1675 } 1676 for (i=0; i<slen; i++) { 1677 row = lrows[i] + rstart_bs; 1678 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1679 } 1680 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1681 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1682 } else { 1683 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1684 } 1685 1686 ierr = PetscFree(lrows);CHKERRQ(ierr); 1687 1688 /* wait on sends */ 1689 if (nsends) { 1690 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1691 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1692 ierr = PetscFree(send_status);CHKERRQ(ierr); 1693 } 1694 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1695 ierr = PetscFree(svalues);CHKERRQ(ierr); 1696 1697 PetscFunctionReturn(0); 1698 } 1699 1700 #undef __FUNCT__ 1701 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1702 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1703 { 1704 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1705 PetscErrorCode ierr; 1706 1707 PetscFunctionBegin; 1708 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1709 PetscFunctionReturn(0); 1710 } 1711 1712 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1713 1714 #undef __FUNCT__ 1715 #define __FUNCT__ "MatEqual_MPIBAIJ" 1716 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1717 { 1718 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1719 Mat a,b,c,d; 1720 PetscTruth flg; 1721 PetscErrorCode ierr; 1722 1723 PetscFunctionBegin; 1724 a = matA->A; b = matA->B; 1725 c = matB->A; d = matB->B; 1726 1727 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1728 if (flg) { 1729 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1730 } 1731 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1732 PetscFunctionReturn(0); 1733 } 1734 1735 #undef __FUNCT__ 1736 #define __FUNCT__ "MatCopy_MPIBAIJ" 1737 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1738 { 1739 PetscErrorCode ierr; 1740 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1741 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1742 1743 PetscFunctionBegin; 1744 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1745 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1746 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1747 } else { 1748 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1749 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1750 } 1751 PetscFunctionReturn(0); 1752 } 1753 1754 #undef __FUNCT__ 1755 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1756 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1757 { 1758 PetscErrorCode ierr; 1759 1760 PetscFunctionBegin; 1761 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1762 PetscFunctionReturn(0); 1763 } 1764 1765 #undef __FUNCT__ 1766 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1767 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1768 { 1769 PetscErrorCode ierr; 1770 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1771 PetscBLASInt bnz,one=1; 1772 Mat_SeqBAIJ *x,*y; 1773 1774 PetscFunctionBegin; 1775 if (str == SAME_NONZERO_PATTERN) { 1776 PetscScalar alpha = a; 1777 x = (Mat_SeqBAIJ *)xx->A->data; 1778 y = (Mat_SeqBAIJ *)yy->A->data; 1779 bnz = PetscBLASIntCast(x->nz); 1780 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1781 x = (Mat_SeqBAIJ *)xx->B->data; 1782 y = (Mat_SeqBAIJ *)yy->B->data; 1783 bnz = PetscBLASIntCast(x->nz); 1784 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1785 } else { 1786 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1787 } 1788 PetscFunctionReturn(0); 1789 } 1790 1791 #undef __FUNCT__ 1792 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ" 1793 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs) 1794 { 1795 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1796 PetscInt rbs,cbs; 1797 PetscErrorCode ierr; 1798 1799 PetscFunctionBegin; 1800 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1801 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1802 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 1803 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 1804 if (rbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 1805 if (cbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 1806 PetscFunctionReturn(0); 1807 } 1808 1809 #undef __FUNCT__ 1810 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1811 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1812 { 1813 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1814 PetscErrorCode ierr; 1815 1816 PetscFunctionBegin; 1817 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1818 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1819 PetscFunctionReturn(0); 1820 } 1821 1822 #undef __FUNCT__ 1823 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1824 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1825 { 1826 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1827 PetscErrorCode ierr; 1828 1829 PetscFunctionBegin; 1830 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1831 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1832 PetscFunctionReturn(0); 1833 } 1834 1835 #undef __FUNCT__ 1836 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 1837 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1838 { 1839 PetscErrorCode ierr; 1840 IS iscol_local; 1841 PetscInt csize; 1842 1843 PetscFunctionBegin; 1844 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1845 if (call == MAT_REUSE_MATRIX) { 1846 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1847 if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1848 } else { 1849 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1850 } 1851 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1852 if (call == MAT_INITIAL_MATRIX) { 1853 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1854 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 1855 } 1856 PetscFunctionReturn(0); 1857 } 1858 1859 #undef __FUNCT__ 1860 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 1861 /* 1862 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1863 in local and then by concatenating the local matrices the end result. 1864 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 1865 */ 1866 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 1867 { 1868 PetscErrorCode ierr; 1869 PetscMPIInt rank,size; 1870 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 1871 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 1872 Mat *local,M,Mreuse; 1873 MatScalar *vwork,*aa; 1874 MPI_Comm comm = ((PetscObject)mat)->comm; 1875 Mat_SeqBAIJ *aij; 1876 1877 1878 PetscFunctionBegin; 1879 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1880 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1881 1882 if (call == MAT_REUSE_MATRIX) { 1883 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1884 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1885 local = &Mreuse; 1886 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1887 } else { 1888 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1889 Mreuse = *local; 1890 ierr = PetscFree(local);CHKERRQ(ierr); 1891 } 1892 1893 /* 1894 m - number of local rows 1895 n - number of columns (same on all processors) 1896 rstart - first row in new global matrix generated 1897 */ 1898 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 1899 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1900 m = m/bs; 1901 n = n/bs; 1902 1903 if (call == MAT_INITIAL_MATRIX) { 1904 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1905 ii = aij->i; 1906 jj = aij->j; 1907 1908 /* 1909 Determine the number of non-zeros in the diagonal and off-diagonal 1910 portions of the matrix in order to do correct preallocation 1911 */ 1912 1913 /* first get start and end of "diagonal" columns */ 1914 if (csize == PETSC_DECIDE) { 1915 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 1916 if (mglobal == n*bs) { /* square matrix */ 1917 nlocal = m; 1918 } else { 1919 nlocal = n/size + ((n % size) > rank); 1920 } 1921 } else { 1922 nlocal = csize/bs; 1923 } 1924 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1925 rstart = rend - nlocal; 1926 if (rank == size - 1 && rend != n) { 1927 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 1928 } 1929 1930 /* next, compute all the lengths */ 1931 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 1932 olens = dlens + m; 1933 for (i=0; i<m; i++) { 1934 jend = ii[i+1] - ii[i]; 1935 olen = 0; 1936 dlen = 0; 1937 for (j=0; j<jend; j++) { 1938 if (*jj < rstart || *jj >= rend) olen++; 1939 else dlen++; 1940 jj++; 1941 } 1942 olens[i] = olen; 1943 dlens[i] = dlen; 1944 } 1945 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 1946 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 1947 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 1948 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 1949 ierr = PetscFree(dlens);CHKERRQ(ierr); 1950 } else { 1951 PetscInt ml,nl; 1952 1953 M = *newmat; 1954 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1955 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1956 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1957 /* 1958 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1959 rather than the slower MatSetValues(). 1960 */ 1961 M->was_assembled = PETSC_TRUE; 1962 M->assembled = PETSC_FALSE; 1963 } 1964 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1965 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1966 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1967 ii = aij->i; 1968 jj = aij->j; 1969 aa = aij->a; 1970 for (i=0; i<m; i++) { 1971 row = rstart/bs + i; 1972 nz = ii[i+1] - ii[i]; 1973 cwork = jj; jj += nz; 1974 vwork = aa; aa += nz; 1975 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1976 } 1977 1978 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1979 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1980 *newmat = M; 1981 1982 /* save submatrix used in processor for next request */ 1983 if (call == MAT_INITIAL_MATRIX) { 1984 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 1985 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 1986 } 1987 1988 PetscFunctionReturn(0); 1989 } 1990 1991 #undef __FUNCT__ 1992 #define __FUNCT__ "MatPermute_MPIBAIJ" 1993 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 1994 { 1995 MPI_Comm comm,pcomm; 1996 PetscInt first,local_size,nrows; 1997 const PetscInt *rows; 1998 PetscMPIInt size; 1999 IS crowp,growp,irowp,lrowp,lcolp,icolp; 2000 PetscErrorCode ierr; 2001 2002 PetscFunctionBegin; 2003 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2004 /* make a collective version of 'rowp' */ 2005 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2006 if (pcomm==comm) { 2007 crowp = rowp; 2008 } else { 2009 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2010 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2011 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 2012 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2013 } 2014 /* collect the global row permutation and invert it */ 2015 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2016 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2017 if (pcomm!=comm) { 2018 ierr = ISDestroy(crowp);CHKERRQ(ierr); 2019 } 2020 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2021 /* get the local target indices */ 2022 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2023 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2024 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2025 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 2026 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2027 ierr = ISDestroy(irowp);CHKERRQ(ierr); 2028 /* the column permutation is so much easier; 2029 make a local version of 'colp' and invert it */ 2030 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2031 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2032 if (size==1) { 2033 lcolp = colp; 2034 } else { 2035 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2036 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2037 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 2038 } 2039 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2040 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2041 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2042 if (size>1) { 2043 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2044 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 2045 } 2046 /* now we just get the submatrix */ 2047 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2048 /* clean up */ 2049 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 2050 ierr = ISDestroy(icolp);CHKERRQ(ierr); 2051 PetscFunctionReturn(0); 2052 } 2053 2054 #undef __FUNCT__ 2055 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2056 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2057 { 2058 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2059 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2060 2061 PetscFunctionBegin; 2062 if (nghosts) { *nghosts = B->nbs;} 2063 if (ghosts) {*ghosts = baij->garray;} 2064 PetscFunctionReturn(0); 2065 } 2066 2067 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat); 2068 2069 #undef __FUNCT__ 2070 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2071 /* 2072 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2073 */ 2074 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2075 { 2076 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2077 PetscErrorCode ierr; 2078 PetscMPIInt size,*ncolsonproc,*disp,nn; 2079 PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col; 2080 const PetscInt *is; 2081 PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj; 2082 PetscInt *rowhit,M,cstart,cend,colb; 2083 PetscInt *columnsforrow,l; 2084 IS *isa; 2085 PetscTruth done,flg; 2086 ISLocalToGlobalMapping map = mat->bmapping; 2087 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2088 2089 PetscFunctionBegin; 2090 if (!mat->assembled) { 2091 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2092 } 2093 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2094 2095 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2096 2097 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2098 M = mat->rmap->n/bs; 2099 cstart = mat->cmap->rstart/bs; 2100 cend = mat->cmap->rend/bs; 2101 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2102 c->N = mat->cmap->N/bs; 2103 c->m = mat->rmap->n/bs; 2104 c->rstart = mat->rmap->rstart/bs; 2105 2106 c->ncolors = nis; 2107 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2108 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2109 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2110 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2111 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2112 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2113 2114 /* Allow access to data structures of local part of matrix */ 2115 if (!baij->colmap) { 2116 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2117 } 2118 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2119 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2120 2121 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2122 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2123 2124 for (i=0; i<nis; i++) { 2125 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2126 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2127 c->ncolumns[i] = n; 2128 if (n) { 2129 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2130 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2131 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2132 } else { 2133 c->columns[i] = 0; 2134 } 2135 2136 if (ctype == IS_COLORING_GLOBAL){ 2137 /* Determine the total (parallel) number of columns of this color */ 2138 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2139 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2140 2141 nn = PetscMPIIntCast(n); 2142 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2143 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2144 if (!nctot) { 2145 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2146 } 2147 2148 disp[0] = 0; 2149 for (j=1; j<size; j++) { 2150 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2151 } 2152 2153 /* Get complete list of columns for color on each processor */ 2154 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2155 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2156 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2157 } else if (ctype == IS_COLORING_GHOSTED){ 2158 /* Determine local number of columns of this color on this process, including ghost points */ 2159 nctot = n; 2160 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2161 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2162 } else { 2163 SETERRQ(PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2164 } 2165 2166 /* 2167 Mark all rows affect by these columns 2168 */ 2169 /* Temporary option to allow for debugging/testing */ 2170 flg = PETSC_FALSE; 2171 ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2172 if (!flg) {/*-----------------------------------------------------------------------------*/ 2173 /* crude, fast version */ 2174 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2175 /* loop over columns*/ 2176 for (j=0; j<nctot; j++) { 2177 if (ctype == IS_COLORING_GHOSTED) { 2178 col = ltog[cols[j]]; 2179 } else { 2180 col = cols[j]; 2181 } 2182 if (col >= cstart && col < cend) { 2183 /* column is in diagonal block of matrix */ 2184 rows = A_cj + A_ci[col-cstart]; 2185 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2186 } else { 2187 #if defined (PETSC_USE_CTABLE) 2188 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr) 2189 colb --; 2190 #else 2191 colb = baij->colmap[col] - 1; 2192 #endif 2193 if (colb == -1) { 2194 m = 0; 2195 } else { 2196 colb = colb/bs; 2197 rows = B_cj + B_ci[colb]; 2198 m = B_ci[colb+1] - B_ci[colb]; 2199 } 2200 } 2201 /* loop over columns marking them in rowhit */ 2202 for (k=0; k<m; k++) { 2203 rowhit[*rows++] = col + 1; 2204 } 2205 } 2206 2207 /* count the number of hits */ 2208 nrows = 0; 2209 for (j=0; j<M; j++) { 2210 if (rowhit[j]) nrows++; 2211 } 2212 c->nrows[i] = nrows; 2213 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2214 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2215 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2216 nrows = 0; 2217 for (j=0; j<M; j++) { 2218 if (rowhit[j]) { 2219 c->rows[i][nrows] = j; 2220 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2221 nrows++; 2222 } 2223 } 2224 } else {/*-------------------------------------------------------------------------------*/ 2225 /* slow version, using rowhit as a linked list */ 2226 PetscInt currentcol,fm,mfm; 2227 rowhit[M] = M; 2228 nrows = 0; 2229 /* loop over columns*/ 2230 for (j=0; j<nctot; j++) { 2231 if (ctype == IS_COLORING_GHOSTED) { 2232 col = ltog[cols[j]]; 2233 } else { 2234 col = cols[j]; 2235 } 2236 if (col >= cstart && col < cend) { 2237 /* column is in diagonal block of matrix */ 2238 rows = A_cj + A_ci[col-cstart]; 2239 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2240 } else { 2241 #if defined (PETSC_USE_CTABLE) 2242 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2243 colb --; 2244 #else 2245 colb = baij->colmap[col] - 1; 2246 #endif 2247 if (colb == -1) { 2248 m = 0; 2249 } else { 2250 colb = colb/bs; 2251 rows = B_cj + B_ci[colb]; 2252 m = B_ci[colb+1] - B_ci[colb]; 2253 } 2254 } 2255 2256 /* loop over columns marking them in rowhit */ 2257 fm = M; /* fm points to first entry in linked list */ 2258 for (k=0; k<m; k++) { 2259 currentcol = *rows++; 2260 /* is it already in the list? */ 2261 do { 2262 mfm = fm; 2263 fm = rowhit[fm]; 2264 } while (fm < currentcol); 2265 /* not in list so add it */ 2266 if (fm != currentcol) { 2267 nrows++; 2268 columnsforrow[currentcol] = col; 2269 /* next three lines insert new entry into linked list */ 2270 rowhit[mfm] = currentcol; 2271 rowhit[currentcol] = fm; 2272 fm = currentcol; 2273 /* fm points to present position in list since we know the columns are sorted */ 2274 } else { 2275 SETERRQ(PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2276 } 2277 } 2278 } 2279 c->nrows[i] = nrows; 2280 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2281 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2282 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2283 /* now store the linked list of rows into c->rows[i] */ 2284 nrows = 0; 2285 fm = rowhit[M]; 2286 do { 2287 c->rows[i][nrows] = fm; 2288 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2289 fm = rowhit[fm]; 2290 } while (fm < M); 2291 } /* ---------------------------------------------------------------------------------------*/ 2292 ierr = PetscFree(cols);CHKERRQ(ierr); 2293 } 2294 2295 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2296 /* 2297 vscale will contain the "diagonal" on processor scalings followed by the off processor 2298 */ 2299 if (ctype == IS_COLORING_GLOBAL) { 2300 PetscInt *garray; 2301 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2302 for (i=0; i<baij->B->cmap->n/bs; i++) { 2303 for (j=0; j<bs; j++) { 2304 garray[i*bs+j] = bs*baij->garray[i]+j; 2305 } 2306 } 2307 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2308 ierr = PetscFree(garray);CHKERRQ(ierr); 2309 CHKMEMQ; 2310 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2311 for (k=0; k<c->ncolors; k++) { 2312 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2313 for (l=0; l<c->nrows[k]; l++) { 2314 col = c->columnsforrow[k][l]; 2315 if (col >= cstart && col < cend) { 2316 /* column is in diagonal block of matrix */ 2317 colb = col - cstart; 2318 } else { 2319 /* column is in "off-processor" part */ 2320 #if defined (PETSC_USE_CTABLE) 2321 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2322 colb --; 2323 #else 2324 colb = baij->colmap[col] - 1; 2325 #endif 2326 colb = colb/bs; 2327 colb += cend - cstart; 2328 } 2329 c->vscaleforrow[k][l] = colb; 2330 } 2331 } 2332 } else if (ctype == IS_COLORING_GHOSTED) { 2333 /* Get gtol mapping */ 2334 PetscInt N = mat->cmap->N, *gtol; 2335 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2336 for (i=0; i<N; i++) gtol[i] = -1; 2337 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2338 2339 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2340 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2341 for (k=0; k<c->ncolors; k++) { 2342 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2343 for (l=0; l<c->nrows[k]; l++) { 2344 col = c->columnsforrow[k][l]; /* global column index */ 2345 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2346 } 2347 } 2348 ierr = PetscFree(gtol);CHKERRQ(ierr); 2349 } 2350 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2351 2352 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2353 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2354 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2355 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2356 CHKMEMQ; 2357 PetscFunctionReturn(0); 2358 } 2359 2360 #undef __FUNCT__ 2361 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ" 2362 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat) 2363 { 2364 Mat B; 2365 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2366 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2367 Mat_SeqAIJ *b; 2368 PetscErrorCode ierr; 2369 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2370 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2371 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2372 2373 PetscFunctionBegin; 2374 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2375 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2376 2377 /* ---------------------------------------------------------------- 2378 Tell every processor the number of nonzeros per row 2379 */ 2380 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2381 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2382 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2383 } 2384 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2385 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2386 displs = recvcounts + size; 2387 for (i=0; i<size; i++) { 2388 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2389 displs[i] = A->rmap->range[i]/bs; 2390 } 2391 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2392 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2393 #else 2394 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2395 #endif 2396 /* --------------------------------------------------------------- 2397 Create the sequential matrix of the same type as the local block diagonal 2398 */ 2399 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2400 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2401 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2402 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2403 b = (Mat_SeqAIJ *)B->data; 2404 2405 /*-------------------------------------------------------------------- 2406 Copy my part of matrix column indices over 2407 */ 2408 sendcount = ad->nz + bd->nz; 2409 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2410 a_jsendbuf = ad->j; 2411 b_jsendbuf = bd->j; 2412 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2413 cnt = 0; 2414 for (i=0; i<n; i++) { 2415 2416 /* put in lower diagonal portion */ 2417 m = bd->i[i+1] - bd->i[i]; 2418 while (m > 0) { 2419 /* is it above diagonal (in bd (compressed) numbering) */ 2420 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2421 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2422 m--; 2423 } 2424 2425 /* put in diagonal portion */ 2426 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2427 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2428 } 2429 2430 /* put in upper diagonal portion */ 2431 while (m-- > 0) { 2432 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2433 } 2434 } 2435 if (cnt != sendcount) SETERRQ2(PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2436 2437 /*-------------------------------------------------------------------- 2438 Gather all column indices to all processors 2439 */ 2440 for (i=0; i<size; i++) { 2441 recvcounts[i] = 0; 2442 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2443 recvcounts[i] += lens[j]; 2444 } 2445 } 2446 displs[0] = 0; 2447 for (i=1; i<size; i++) { 2448 displs[i] = displs[i-1] + recvcounts[i-1]; 2449 } 2450 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2451 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2452 #else 2453 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2454 #endif 2455 /*-------------------------------------------------------------------- 2456 Assemble the matrix into useable form (note numerical values not yet set) 2457 */ 2458 /* set the b->ilen (length of each row) values */ 2459 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2460 /* set the b->i indices */ 2461 b->i[0] = 0; 2462 for (i=1; i<=A->rmap->N/bs; i++) { 2463 b->i[i] = b->i[i-1] + lens[i-1]; 2464 } 2465 ierr = PetscFree(lens);CHKERRQ(ierr); 2466 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2467 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2468 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2469 2470 if (A->symmetric){ 2471 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2472 } else if (A->hermitian) { 2473 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2474 } else if (A->structurally_symmetric) { 2475 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2476 } 2477 *newmat = B; 2478 PetscFunctionReturn(0); 2479 } 2480 2481 #undef __FUNCT__ 2482 #define __FUNCT__ "MatSOR_MPIBAIJ" 2483 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2484 { 2485 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2486 PetscErrorCode ierr; 2487 Vec bb1 = 0; 2488 2489 PetscFunctionBegin; 2490 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2491 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2492 } 2493 2494 if (flag == SOR_APPLY_UPPER) { 2495 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2496 PetscFunctionReturn(0); 2497 } 2498 2499 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2500 if (flag & SOR_ZERO_INITIAL_GUESS) { 2501 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2502 its--; 2503 } 2504 2505 while (its--) { 2506 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2507 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2508 2509 /* update rhs: bb1 = bb - B*x */ 2510 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2511 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2512 2513 /* local sweep */ 2514 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2515 } 2516 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2517 if (flag & SOR_ZERO_INITIAL_GUESS) { 2518 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2519 its--; 2520 } 2521 while (its--) { 2522 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2523 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2524 2525 /* update rhs: bb1 = bb - B*x */ 2526 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2527 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2528 2529 /* local sweep */ 2530 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2531 } 2532 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2533 if (flag & SOR_ZERO_INITIAL_GUESS) { 2534 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2535 its--; 2536 } 2537 while (its--) { 2538 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2539 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2540 2541 /* update rhs: bb1 = bb - B*x */ 2542 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2543 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2544 2545 /* local sweep */ 2546 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2547 } 2548 } else { 2549 SETERRQ(PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2550 } 2551 2552 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 2553 PetscFunctionReturn(0); 2554 } 2555 2556 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2557 2558 2559 /* -------------------------------------------------------------------*/ 2560 static struct _MatOps MatOps_Values = { 2561 MatSetValues_MPIBAIJ, 2562 MatGetRow_MPIBAIJ, 2563 MatRestoreRow_MPIBAIJ, 2564 MatMult_MPIBAIJ, 2565 /* 4*/ MatMultAdd_MPIBAIJ, 2566 MatMultTranspose_MPIBAIJ, 2567 MatMultTransposeAdd_MPIBAIJ, 2568 0, 2569 0, 2570 0, 2571 /*10*/ 0, 2572 0, 2573 0, 2574 MatSOR_MPIBAIJ, 2575 MatTranspose_MPIBAIJ, 2576 /*15*/ MatGetInfo_MPIBAIJ, 2577 MatEqual_MPIBAIJ, 2578 MatGetDiagonal_MPIBAIJ, 2579 MatDiagonalScale_MPIBAIJ, 2580 MatNorm_MPIBAIJ, 2581 /*20*/ MatAssemblyBegin_MPIBAIJ, 2582 MatAssemblyEnd_MPIBAIJ, 2583 MatSetOption_MPIBAIJ, 2584 MatZeroEntries_MPIBAIJ, 2585 /*24*/ MatZeroRows_MPIBAIJ, 2586 0, 2587 0, 2588 0, 2589 0, 2590 /*29*/ MatSetUpPreallocation_MPIBAIJ, 2591 0, 2592 0, 2593 0, 2594 0, 2595 /*34*/ MatDuplicate_MPIBAIJ, 2596 0, 2597 0, 2598 0, 2599 0, 2600 /*39*/ MatAXPY_MPIBAIJ, 2601 MatGetSubMatrices_MPIBAIJ, 2602 MatIncreaseOverlap_MPIBAIJ, 2603 MatGetValues_MPIBAIJ, 2604 MatCopy_MPIBAIJ, 2605 /*44*/ 0, 2606 MatScale_MPIBAIJ, 2607 0, 2608 0, 2609 0, 2610 /*49*/ MatSetBlockSize_MPIBAIJ, 2611 0, 2612 0, 2613 0, 2614 0, 2615 /*54*/ MatFDColoringCreate_MPIBAIJ, 2616 0, 2617 MatSetUnfactored_MPIBAIJ, 2618 MatPermute_MPIBAIJ, 2619 MatSetValuesBlocked_MPIBAIJ, 2620 /*59*/ MatGetSubMatrix_MPIBAIJ, 2621 MatDestroy_MPIBAIJ, 2622 MatView_MPIBAIJ, 2623 0, 2624 0, 2625 /*64*/ 0, 2626 0, 2627 0, 2628 0, 2629 0, 2630 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2631 0, 2632 0, 2633 0, 2634 0, 2635 /*74*/ 0, 2636 MatFDColoringApply_BAIJ, 2637 0, 2638 0, 2639 0, 2640 /*79*/ 0, 2641 0, 2642 0, 2643 0, 2644 MatLoad_MPIBAIJ, 2645 /*84*/ 0, 2646 0, 2647 0, 2648 0, 2649 0, 2650 /*89*/ 0, 2651 0, 2652 0, 2653 0, 2654 0, 2655 /*94*/ 0, 2656 0, 2657 0, 2658 0, 2659 0, 2660 /*99*/ 0, 2661 0, 2662 0, 2663 0, 2664 0, 2665 /*104*/0, 2666 MatRealPart_MPIBAIJ, 2667 MatImaginaryPart_MPIBAIJ, 2668 0, 2669 0, 2670 /*109*/0, 2671 0, 2672 0, 2673 0, 2674 0, 2675 /*114*/MatGetSeqNonzerostructure_MPIBAIJ, 2676 0, 2677 MatGetGhosts_MPIBAIJ 2678 }; 2679 2680 EXTERN_C_BEGIN 2681 #undef __FUNCT__ 2682 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2683 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2684 { 2685 PetscFunctionBegin; 2686 *a = ((Mat_MPIBAIJ *)A->data)->A; 2687 *iscopy = PETSC_FALSE; 2688 PetscFunctionReturn(0); 2689 } 2690 EXTERN_C_END 2691 2692 EXTERN_C_BEGIN 2693 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2694 EXTERN_C_END 2695 2696 EXTERN_C_BEGIN 2697 #undef __FUNCT__ 2698 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2699 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2700 { 2701 PetscInt m,rstart,cstart,cend; 2702 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2703 const PetscInt *JJ=0; 2704 PetscScalar *values=0; 2705 PetscErrorCode ierr; 2706 2707 PetscFunctionBegin; 2708 2709 if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2710 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2711 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2712 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2713 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2714 m = B->rmap->n/bs; 2715 rstart = B->rmap->rstart/bs; 2716 cstart = B->cmap->rstart/bs; 2717 cend = B->cmap->rend/bs; 2718 2719 if (ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2720 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2721 for (i=0; i<m; i++) { 2722 nz = ii[i+1] - ii[i]; 2723 if (nz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2724 nz_max = PetscMax(nz_max,nz); 2725 JJ = jj + ii[i]; 2726 for (j=0; j<nz; j++) { 2727 if (*JJ >= cstart) break; 2728 JJ++; 2729 } 2730 d = 0; 2731 for (; j<nz; j++) { 2732 if (*JJ++ >= cend) break; 2733 d++; 2734 } 2735 d_nnz[i] = d; 2736 o_nnz[i] = nz - d; 2737 } 2738 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2739 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2740 2741 values = (PetscScalar*)V; 2742 if (!values) { 2743 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2744 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2745 } 2746 for (i=0; i<m; i++) { 2747 PetscInt row = i + rstart; 2748 PetscInt ncols = ii[i+1] - ii[i]; 2749 const PetscInt *icols = jj + ii[i]; 2750 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2751 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2752 } 2753 2754 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2755 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2756 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2757 2758 PetscFunctionReturn(0); 2759 } 2760 EXTERN_C_END 2761 2762 #undef __FUNCT__ 2763 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2764 /*@C 2765 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2766 (the default parallel PETSc format). 2767 2768 Collective on MPI_Comm 2769 2770 Input Parameters: 2771 + A - the matrix 2772 . i - the indices into j for the start of each local row (starts with zero) 2773 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2774 - v - optional values in the matrix 2775 2776 Level: developer 2777 2778 .keywords: matrix, aij, compressed row, sparse, parallel 2779 2780 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2781 @*/ 2782 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2783 { 2784 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2785 2786 PetscFunctionBegin; 2787 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2788 if (f) { 2789 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2790 } 2791 PetscFunctionReturn(0); 2792 } 2793 2794 EXTERN_C_BEGIN 2795 #undef __FUNCT__ 2796 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2797 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2798 { 2799 Mat_MPIBAIJ *b; 2800 PetscErrorCode ierr; 2801 PetscInt i, newbs = PetscAbs(bs); 2802 2803 PetscFunctionBegin; 2804 if (bs < 0) { 2805 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2806 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2807 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2808 bs = PetscAbs(bs); 2809 } 2810 if ((d_nnz || o_nnz) && newbs != bs) { 2811 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 2812 } 2813 bs = newbs; 2814 2815 2816 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2817 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2818 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2819 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2820 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2821 2822 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2823 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2824 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2825 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2826 2827 if (d_nnz) { 2828 for (i=0; i<B->rmap->n/bs; i++) { 2829 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2830 } 2831 } 2832 if (o_nnz) { 2833 for (i=0; i<B->rmap->n/bs; i++) { 2834 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2835 } 2836 } 2837 2838 b = (Mat_MPIBAIJ*)B->data; 2839 b->bs2 = bs*bs; 2840 b->mbs = B->rmap->n/bs; 2841 b->nbs = B->cmap->n/bs; 2842 b->Mbs = B->rmap->N/bs; 2843 b->Nbs = B->cmap->N/bs; 2844 2845 for (i=0; i<=b->size; i++) { 2846 b->rangebs[i] = B->rmap->range[i]/bs; 2847 } 2848 b->rstartbs = B->rmap->rstart/bs; 2849 b->rendbs = B->rmap->rend/bs; 2850 b->cstartbs = B->cmap->rstart/bs; 2851 b->cendbs = B->cmap->rend/bs; 2852 2853 if (!B->preallocated) { 2854 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2855 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2856 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2857 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2858 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2859 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2860 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2861 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2862 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 2863 } 2864 2865 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2866 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2867 B->preallocated = PETSC_TRUE; 2868 PetscFunctionReturn(0); 2869 } 2870 EXTERN_C_END 2871 2872 EXTERN_C_BEGIN 2873 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2874 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2875 EXTERN_C_END 2876 2877 2878 EXTERN_C_BEGIN 2879 #undef __FUNCT__ 2880 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 2881 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj) 2882 { 2883 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2884 PetscErrorCode ierr; 2885 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2886 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2887 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2888 2889 PetscFunctionBegin; 2890 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 2891 ii[0] = 0; 2892 CHKMEMQ; 2893 for (i=0; i<M; i++) { 2894 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2895 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2896 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2897 /* remove one from count of matrix has diagonal */ 2898 for (j=id[i]; j<id[i+1]; j++) { 2899 if (jd[j] == i) {ii[i+1]--;break;} 2900 } 2901 CHKMEMQ; 2902 } 2903 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2904 cnt = 0; 2905 for (i=0; i<M; i++) { 2906 for (j=io[i]; j<io[i+1]; j++) { 2907 if (garray[jo[j]] > rstart) break; 2908 jj[cnt++] = garray[jo[j]]; 2909 CHKMEMQ; 2910 } 2911 for (k=id[i]; k<id[i+1]; k++) { 2912 if (jd[k] != i) { 2913 jj[cnt++] = rstart + jd[k]; 2914 CHKMEMQ; 2915 } 2916 } 2917 for (;j<io[i+1]; j++) { 2918 jj[cnt++] = garray[jo[j]]; 2919 CHKMEMQ; 2920 } 2921 } 2922 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 2923 PetscFunctionReturn(0); 2924 } 2925 EXTERN_C_END 2926 2927 EXTERN_C_BEGIN 2928 #if defined(PETSC_HAVE_MUMPS) 2929 extern PetscErrorCode MatGetFactor_mpibaij_mumps(Mat,MatFactorType,Mat*); 2930 #endif 2931 EXTERN_C_END 2932 2933 /*MC 2934 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2935 2936 Options Database Keys: 2937 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2938 . -mat_block_size <bs> - set the blocksize used to store the matrix 2939 - -mat_use_hash_table <fact> 2940 2941 Level: beginner 2942 2943 .seealso: MatCreateMPIBAIJ 2944 M*/ 2945 2946 EXTERN_C_BEGIN 2947 #undef __FUNCT__ 2948 #define __FUNCT__ "MatCreate_MPIBAIJ" 2949 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2950 { 2951 Mat_MPIBAIJ *b; 2952 PetscErrorCode ierr; 2953 PetscTruth flg; 2954 2955 PetscFunctionBegin; 2956 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2957 B->data = (void*)b; 2958 2959 2960 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2961 B->mapping = 0; 2962 B->assembled = PETSC_FALSE; 2963 2964 B->insertmode = NOT_SET_VALUES; 2965 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 2966 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 2967 2968 /* build local table of row and column ownerships */ 2969 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 2970 2971 /* build cache for off array entries formed */ 2972 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 2973 b->donotstash = PETSC_FALSE; 2974 b->colmap = PETSC_NULL; 2975 b->garray = PETSC_NULL; 2976 b->roworiented = PETSC_TRUE; 2977 2978 /* stuff used in block assembly */ 2979 b->barray = 0; 2980 2981 /* stuff used for matrix vector multiply */ 2982 b->lvec = 0; 2983 b->Mvctx = 0; 2984 2985 /* stuff for MatGetRow() */ 2986 b->rowindices = 0; 2987 b->rowvalues = 0; 2988 b->getrowactive = PETSC_FALSE; 2989 2990 /* hash table stuff */ 2991 b->ht = 0; 2992 b->hd = 0; 2993 b->ht_size = 0; 2994 b->ht_flag = PETSC_FALSE; 2995 b->ht_fact = 0; 2996 b->ht_total_ct = 0; 2997 b->ht_insert_ct = 0; 2998 2999 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3000 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3001 if (flg) { 3002 PetscReal fact = 1.39; 3003 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3004 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 3005 if (fact <= 1.0) fact = 1.39; 3006 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3007 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3008 } 3009 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3010 3011 #if defined(PETSC_HAVE_MUMPS) 3012 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_mpibaij_mumps",MatGetFactor_mpibaij_mumps);CHKERRQ(ierr); 3013 #endif 3014 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3015 "MatConvert_MPIBAIJ_MPIAdj", 3016 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3017 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3018 "MatStoreValues_MPIBAIJ", 3019 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3020 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3021 "MatRetrieveValues_MPIBAIJ", 3022 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3023 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3024 "MatGetDiagonalBlock_MPIBAIJ", 3025 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3026 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3027 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3028 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3029 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3030 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3031 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3032 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3033 "MatDiagonalScaleLocal_MPIBAIJ", 3034 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3035 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3036 "MatSetHashTableFactor_MPIBAIJ", 3037 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3038 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3039 PetscFunctionReturn(0); 3040 } 3041 EXTERN_C_END 3042 3043 /*MC 3044 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3045 3046 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3047 and MATMPIBAIJ otherwise. 3048 3049 Options Database Keys: 3050 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3051 3052 Level: beginner 3053 3054 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3055 M*/ 3056 3057 EXTERN_C_BEGIN 3058 #undef __FUNCT__ 3059 #define __FUNCT__ "MatCreate_BAIJ" 3060 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 3061 { 3062 PetscErrorCode ierr; 3063 PetscMPIInt size; 3064 3065 PetscFunctionBegin; 3066 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3067 if (size == 1) { 3068 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 3069 } else { 3070 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 3071 } 3072 PetscFunctionReturn(0); 3073 } 3074 EXTERN_C_END 3075 3076 #undef __FUNCT__ 3077 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3078 /*@C 3079 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3080 (block compressed row). For good matrix assembly performance 3081 the user should preallocate the matrix storage by setting the parameters 3082 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3083 performance can be increased by more than a factor of 50. 3084 3085 Collective on Mat 3086 3087 Input Parameters: 3088 + A - the matrix 3089 . bs - size of blockk 3090 . d_nz - number of block nonzeros per block row in diagonal portion of local 3091 submatrix (same for all local rows) 3092 . d_nnz - array containing the number of block nonzeros in the various block rows 3093 of the in diagonal portion of the local (possibly different for each block 3094 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3095 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3096 submatrix (same for all local rows). 3097 - o_nnz - array containing the number of nonzeros in the various block rows of the 3098 off-diagonal portion of the local submatrix (possibly different for 3099 each block row) or PETSC_NULL. 3100 3101 If the *_nnz parameter is given then the *_nz parameter is ignored 3102 3103 Options Database Keys: 3104 + -mat_block_size - size of the blocks to use 3105 - -mat_use_hash_table <fact> 3106 3107 Notes: 3108 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3109 than it must be used on all processors that share the object for that argument. 3110 3111 Storage Information: 3112 For a square global matrix we define each processor's diagonal portion 3113 to be its local rows and the corresponding columns (a square submatrix); 3114 each processor's off-diagonal portion encompasses the remainder of the 3115 local matrix (a rectangular submatrix). 3116 3117 The user can specify preallocated storage for the diagonal part of 3118 the local submatrix with either d_nz or d_nnz (not both). Set 3119 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3120 memory allocation. Likewise, specify preallocated storage for the 3121 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3122 3123 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3124 the figure below we depict these three local rows and all columns (0-11). 3125 3126 .vb 3127 0 1 2 3 4 5 6 7 8 9 10 11 3128 ------------------- 3129 row 3 | o o o d d d o o o o o o 3130 row 4 | o o o d d d o o o o o o 3131 row 5 | o o o d d d o o o o o o 3132 ------------------- 3133 .ve 3134 3135 Thus, any entries in the d locations are stored in the d (diagonal) 3136 submatrix, and any entries in the o locations are stored in the 3137 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3138 stored simply in the MATSEQBAIJ format for compressed row storage. 3139 3140 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3141 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3142 In general, for PDE problems in which most nonzeros are near the diagonal, 3143 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3144 or you will get TERRIBLE performance; see the users' manual chapter on 3145 matrices. 3146 3147 You can call MatGetInfo() to get information on how effective the preallocation was; 3148 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3149 You can also run with the option -info and look for messages with the string 3150 malloc in them to see if additional memory allocation was needed. 3151 3152 Level: intermediate 3153 3154 .keywords: matrix, block, aij, compressed row, sparse, parallel 3155 3156 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 3157 @*/ 3158 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3159 { 3160 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3161 3162 PetscFunctionBegin; 3163 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3164 if (f) { 3165 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3166 } 3167 PetscFunctionReturn(0); 3168 } 3169 3170 #undef __FUNCT__ 3171 #define __FUNCT__ "MatCreateMPIBAIJ" 3172 /*@C 3173 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 3174 (block compressed row). For good matrix assembly performance 3175 the user should preallocate the matrix storage by setting the parameters 3176 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3177 performance can be increased by more than a factor of 50. 3178 3179 Collective on MPI_Comm 3180 3181 Input Parameters: 3182 + comm - MPI communicator 3183 . bs - size of blockk 3184 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3185 This value should be the same as the local size used in creating the 3186 y vector for the matrix-vector product y = Ax. 3187 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3188 This value should be the same as the local size used in creating the 3189 x vector for the matrix-vector product y = Ax. 3190 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3191 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3192 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3193 submatrix (same for all local rows) 3194 . d_nnz - array containing the number of nonzero blocks in the various block rows 3195 of the in diagonal portion of the local (possibly different for each block 3196 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3197 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3198 submatrix (same for all local rows). 3199 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3200 off-diagonal portion of the local submatrix (possibly different for 3201 each block row) or PETSC_NULL. 3202 3203 Output Parameter: 3204 . A - the matrix 3205 3206 Options Database Keys: 3207 + -mat_block_size - size of the blocks to use 3208 - -mat_use_hash_table <fact> 3209 3210 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3211 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3212 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3213 3214 Notes: 3215 If the *_nnz parameter is given then the *_nz parameter is ignored 3216 3217 A nonzero block is any block that as 1 or more nonzeros in it 3218 3219 The user MUST specify either the local or global matrix dimensions 3220 (possibly both). 3221 3222 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3223 than it must be used on all processors that share the object for that argument. 3224 3225 Storage Information: 3226 For a square global matrix we define each processor's diagonal portion 3227 to be its local rows and the corresponding columns (a square submatrix); 3228 each processor's off-diagonal portion encompasses the remainder of the 3229 local matrix (a rectangular submatrix). 3230 3231 The user can specify preallocated storage for the diagonal part of 3232 the local submatrix with either d_nz or d_nnz (not both). Set 3233 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3234 memory allocation. Likewise, specify preallocated storage for the 3235 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3236 3237 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3238 the figure below we depict these three local rows and all columns (0-11). 3239 3240 .vb 3241 0 1 2 3 4 5 6 7 8 9 10 11 3242 ------------------- 3243 row 3 | o o o d d d o o o o o o 3244 row 4 | o o o d d d o o o o o o 3245 row 5 | o o o d d d o o o o o o 3246 ------------------- 3247 .ve 3248 3249 Thus, any entries in the d locations are stored in the d (diagonal) 3250 submatrix, and any entries in the o locations are stored in the 3251 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3252 stored simply in the MATSEQBAIJ format for compressed row storage. 3253 3254 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3255 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3256 In general, for PDE problems in which most nonzeros are near the diagonal, 3257 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3258 or you will get TERRIBLE performance; see the users' manual chapter on 3259 matrices. 3260 3261 Level: intermediate 3262 3263 .keywords: matrix, block, aij, compressed row, sparse, parallel 3264 3265 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3266 @*/ 3267 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3268 { 3269 PetscErrorCode ierr; 3270 PetscMPIInt size; 3271 3272 PetscFunctionBegin; 3273 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3274 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3275 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3276 if (size > 1) { 3277 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3278 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3279 } else { 3280 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3281 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3282 } 3283 PetscFunctionReturn(0); 3284 } 3285 3286 #undef __FUNCT__ 3287 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3288 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3289 { 3290 Mat mat; 3291 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3292 PetscErrorCode ierr; 3293 PetscInt len=0; 3294 3295 PetscFunctionBegin; 3296 *newmat = 0; 3297 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3298 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3299 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3300 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3301 3302 mat->factortype = matin->factortype; 3303 mat->preallocated = PETSC_TRUE; 3304 mat->assembled = PETSC_TRUE; 3305 mat->insertmode = NOT_SET_VALUES; 3306 3307 a = (Mat_MPIBAIJ*)mat->data; 3308 mat->rmap->bs = matin->rmap->bs; 3309 a->bs2 = oldmat->bs2; 3310 a->mbs = oldmat->mbs; 3311 a->nbs = oldmat->nbs; 3312 a->Mbs = oldmat->Mbs; 3313 a->Nbs = oldmat->Nbs; 3314 3315 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3316 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3317 3318 a->size = oldmat->size; 3319 a->rank = oldmat->rank; 3320 a->donotstash = oldmat->donotstash; 3321 a->roworiented = oldmat->roworiented; 3322 a->rowindices = 0; 3323 a->rowvalues = 0; 3324 a->getrowactive = PETSC_FALSE; 3325 a->barray = 0; 3326 a->rstartbs = oldmat->rstartbs; 3327 a->rendbs = oldmat->rendbs; 3328 a->cstartbs = oldmat->cstartbs; 3329 a->cendbs = oldmat->cendbs; 3330 3331 /* hash table stuff */ 3332 a->ht = 0; 3333 a->hd = 0; 3334 a->ht_size = 0; 3335 a->ht_flag = oldmat->ht_flag; 3336 a->ht_fact = oldmat->ht_fact; 3337 a->ht_total_ct = 0; 3338 a->ht_insert_ct = 0; 3339 3340 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3341 if (oldmat->colmap) { 3342 #if defined (PETSC_USE_CTABLE) 3343 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3344 #else 3345 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3346 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3347 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3348 #endif 3349 } else a->colmap = 0; 3350 3351 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3352 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3353 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3354 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3355 } else a->garray = 0; 3356 3357 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3358 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3359 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3360 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3361 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3362 3363 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3364 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3365 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3366 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3367 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3368 *newmat = mat; 3369 3370 PetscFunctionReturn(0); 3371 } 3372 3373 #undef __FUNCT__ 3374 #define __FUNCT__ "MatLoad_MPIBAIJ" 3375 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 3376 { 3377 Mat A; 3378 PetscErrorCode ierr; 3379 int fd; 3380 PetscInt i,nz,j,rstart,rend; 3381 PetscScalar *vals,*buf; 3382 MPI_Comm comm = ((PetscObject)viewer)->comm; 3383 MPI_Status status; 3384 PetscMPIInt rank,size,maxnz; 3385 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3386 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3387 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3388 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3389 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3390 PetscInt dcount,kmax,k,nzcount,tmp,mend; 3391 3392 PetscFunctionBegin; 3393 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3394 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3395 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3396 3397 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3398 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3399 if (!rank) { 3400 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3401 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3402 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3403 } 3404 3405 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3406 M = header[1]; N = header[2]; 3407 3408 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 3409 3410 /* 3411 This code adds extra rows to make sure the number of rows is 3412 divisible by the blocksize 3413 */ 3414 Mbs = M/bs; 3415 extra_rows = bs - M + bs*Mbs; 3416 if (extra_rows == bs) extra_rows = 0; 3417 else Mbs++; 3418 if (extra_rows && !rank) { 3419 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3420 } 3421 3422 /* determine ownership of all rows */ 3423 mbs = Mbs/size + ((Mbs % size) > rank); 3424 m = mbs*bs; 3425 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3426 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3427 3428 /* process 0 needs enough room for process with most rows */ 3429 if (!rank) { 3430 mmax = rowners[1]; 3431 for (i=2; i<size; i++) { 3432 mmax = PetscMax(mmax,rowners[i]); 3433 } 3434 mmax*=bs; 3435 } else mmax = m; 3436 3437 rowners[0] = 0; 3438 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3439 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3440 rstart = rowners[rank]; 3441 rend = rowners[rank+1]; 3442 3443 /* distribute row lengths to all processors */ 3444 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3445 if (!rank) { 3446 mend = m; 3447 if (size == 1) mend = mend - extra_rows; 3448 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3449 for (j=mend; j<m; j++) locrowlens[j] = 1; 3450 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3451 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3452 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3453 for (j=0; j<m; j++) { 3454 procsnz[0] += locrowlens[j]; 3455 } 3456 for (i=1; i<size; i++) { 3457 mend = browners[i+1] - browners[i]; 3458 if (i == size-1) mend = mend - extra_rows; 3459 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3460 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3461 /* calculate the number of nonzeros on each processor */ 3462 for (j=0; j<browners[i+1]-browners[i]; j++) { 3463 procsnz[i] += rowlengths[j]; 3464 } 3465 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3466 } 3467 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3468 } else { 3469 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3470 } 3471 3472 if (!rank) { 3473 /* determine max buffer needed and allocate it */ 3474 maxnz = procsnz[0]; 3475 for (i=1; i<size; i++) { 3476 maxnz = PetscMax(maxnz,procsnz[i]); 3477 } 3478 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3479 3480 /* read in my part of the matrix column indices */ 3481 nz = procsnz[0]; 3482 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3483 mycols = ibuf; 3484 if (size == 1) nz -= extra_rows; 3485 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3486 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3487 3488 /* read in every ones (except the last) and ship off */ 3489 for (i=1; i<size-1; i++) { 3490 nz = procsnz[i]; 3491 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3492 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3493 } 3494 /* read in the stuff for the last proc */ 3495 if (size != 1) { 3496 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3497 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3498 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3499 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3500 } 3501 ierr = PetscFree(cols);CHKERRQ(ierr); 3502 } else { 3503 /* determine buffer space needed for message */ 3504 nz = 0; 3505 for (i=0; i<m; i++) { 3506 nz += locrowlens[i]; 3507 } 3508 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3509 mycols = ibuf; 3510 /* receive message of column indices*/ 3511 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3512 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3513 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3514 } 3515 3516 /* loop over local rows, determining number of off diagonal entries */ 3517 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3518 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3519 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3520 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3521 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3522 rowcount = 0; nzcount = 0; 3523 for (i=0; i<mbs; i++) { 3524 dcount = 0; 3525 odcount = 0; 3526 for (j=0; j<bs; j++) { 3527 kmax = locrowlens[rowcount]; 3528 for (k=0; k<kmax; k++) { 3529 tmp = mycols[nzcount++]/bs; 3530 if (!mask[tmp]) { 3531 mask[tmp] = 1; 3532 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3533 else masked1[dcount++] = tmp; 3534 } 3535 } 3536 rowcount++; 3537 } 3538 3539 dlens[i] = dcount; 3540 odlens[i] = odcount; 3541 3542 /* zero out the mask elements we set */ 3543 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3544 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3545 } 3546 3547 /* create our matrix */ 3548 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 3549 ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3550 ierr = MatSetType(A,type);CHKERRQ(ierr) 3551 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3552 3553 if (!rank) { 3554 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3555 /* read in my part of the matrix numerical values */ 3556 nz = procsnz[0]; 3557 vals = buf; 3558 mycols = ibuf; 3559 if (size == 1) nz -= extra_rows; 3560 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3561 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3562 3563 /* insert into matrix */ 3564 jj = rstart*bs; 3565 for (i=0; i<m; i++) { 3566 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3567 mycols += locrowlens[i]; 3568 vals += locrowlens[i]; 3569 jj++; 3570 } 3571 /* read in other processors (except the last one) and ship out */ 3572 for (i=1; i<size-1; i++) { 3573 nz = procsnz[i]; 3574 vals = buf; 3575 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3576 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3577 } 3578 /* the last proc */ 3579 if (size != 1){ 3580 nz = procsnz[i] - extra_rows; 3581 vals = buf; 3582 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3583 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3584 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3585 } 3586 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3587 } else { 3588 /* receive numeric values */ 3589 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3590 3591 /* receive message of values*/ 3592 vals = buf; 3593 mycols = ibuf; 3594 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 3595 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3596 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3597 3598 /* insert into matrix */ 3599 jj = rstart*bs; 3600 for (i=0; i<m; i++) { 3601 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3602 mycols += locrowlens[i]; 3603 vals += locrowlens[i]; 3604 jj++; 3605 } 3606 } 3607 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3608 ierr = PetscFree(buf);CHKERRQ(ierr); 3609 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3610 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3611 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3612 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3613 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3614 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3615 3616 *newmat = A; 3617 PetscFunctionReturn(0); 3618 } 3619 3620 #undef __FUNCT__ 3621 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3622 /*@ 3623 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3624 3625 Input Parameters: 3626 . mat - the matrix 3627 . fact - factor 3628 3629 Collective on Mat 3630 3631 Level: advanced 3632 3633 Notes: 3634 This can also be set by the command line option: -mat_use_hash_table <fact> 3635 3636 .keywords: matrix, hashtable, factor, HT 3637 3638 .seealso: MatSetOption() 3639 @*/ 3640 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3641 { 3642 PetscErrorCode ierr,(*f)(Mat,PetscReal); 3643 3644 PetscFunctionBegin; 3645 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 3646 if (f) { 3647 ierr = (*f)(mat,fact);CHKERRQ(ierr); 3648 } 3649 PetscFunctionReturn(0); 3650 } 3651 3652 EXTERN_C_BEGIN 3653 #undef __FUNCT__ 3654 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3655 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3656 { 3657 Mat_MPIBAIJ *baij; 3658 3659 PetscFunctionBegin; 3660 baij = (Mat_MPIBAIJ*)mat->data; 3661 baij->ht_fact = fact; 3662 PetscFunctionReturn(0); 3663 } 3664 EXTERN_C_END 3665 3666 #undef __FUNCT__ 3667 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3668 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3669 { 3670 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3671 PetscFunctionBegin; 3672 *Ad = a->A; 3673 *Ao = a->B; 3674 *colmap = a->garray; 3675 PetscFunctionReturn(0); 3676 } 3677 3678 /* 3679 Special version for direct calls from Fortran (to eliminate two function call overheads 3680 */ 3681 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3682 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3683 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3684 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3685 #endif 3686 3687 #undef __FUNCT__ 3688 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3689 /*@C 3690 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3691 3692 Collective on Mat 3693 3694 Input Parameters: 3695 + mat - the matrix 3696 . min - number of input rows 3697 . im - input rows 3698 . nin - number of input columns 3699 . in - input columns 3700 . v - numerical values input 3701 - addvin - INSERT_VALUES or ADD_VALUES 3702 3703 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3704 3705 Level: advanced 3706 3707 .seealso: MatSetValuesBlocked() 3708 @*/ 3709 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3710 { 3711 /* convert input arguments to C version */ 3712 Mat mat = *matin; 3713 PetscInt m = *min, n = *nin; 3714 InsertMode addv = *addvin; 3715 3716 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3717 const MatScalar *value; 3718 MatScalar *barray=baij->barray; 3719 PetscTruth roworiented = baij->roworiented; 3720 PetscErrorCode ierr; 3721 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3722 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3723 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3724 3725 PetscFunctionBegin; 3726 /* tasks normally handled by MatSetValuesBlocked() */ 3727 if (mat->insertmode == NOT_SET_VALUES) { 3728 mat->insertmode = addv; 3729 } 3730 #if defined(PETSC_USE_DEBUG) 3731 else if (mat->insertmode != addv) { 3732 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3733 } 3734 if (mat->factortype) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3735 #endif 3736 if (mat->assembled) { 3737 mat->was_assembled = PETSC_TRUE; 3738 mat->assembled = PETSC_FALSE; 3739 } 3740 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3741 3742 3743 if(!barray) { 3744 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3745 baij->barray = barray; 3746 } 3747 3748 if (roworiented) { 3749 stepval = (n-1)*bs; 3750 } else { 3751 stepval = (m-1)*bs; 3752 } 3753 for (i=0; i<m; i++) { 3754 if (im[i] < 0) continue; 3755 #if defined(PETSC_USE_DEBUG) 3756 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3757 #endif 3758 if (im[i] >= rstart && im[i] < rend) { 3759 row = im[i] - rstart; 3760 for (j=0; j<n; j++) { 3761 /* If NumCol = 1 then a copy is not required */ 3762 if ((roworiented) && (n == 1)) { 3763 barray = (MatScalar*)v + i*bs2; 3764 } else if((!roworiented) && (m == 1)) { 3765 barray = (MatScalar*)v + j*bs2; 3766 } else { /* Here a copy is required */ 3767 if (roworiented) { 3768 value = v + i*(stepval+bs)*bs + j*bs; 3769 } else { 3770 value = v + j*(stepval+bs)*bs + i*bs; 3771 } 3772 for (ii=0; ii<bs; ii++,value+=stepval) { 3773 for (jj=0; jj<bs; jj++) { 3774 *barray++ = *value++; 3775 } 3776 } 3777 barray -=bs2; 3778 } 3779 3780 if (in[j] >= cstart && in[j] < cend){ 3781 col = in[j] - cstart; 3782 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3783 } 3784 else if (in[j] < 0) continue; 3785 #if defined(PETSC_USE_DEBUG) 3786 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 3787 #endif 3788 else { 3789 if (mat->was_assembled) { 3790 if (!baij->colmap) { 3791 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3792 } 3793 3794 #if defined(PETSC_USE_DEBUG) 3795 #if defined (PETSC_USE_CTABLE) 3796 { PetscInt data; 3797 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3798 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 3799 } 3800 #else 3801 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 3802 #endif 3803 #endif 3804 #if defined (PETSC_USE_CTABLE) 3805 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3806 col = (col - 1)/bs; 3807 #else 3808 col = (baij->colmap[in[j]] - 1)/bs; 3809 #endif 3810 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3811 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3812 col = in[j]; 3813 } 3814 } 3815 else col = in[j]; 3816 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3817 } 3818 } 3819 } else { 3820 if (!baij->donotstash) { 3821 if (roworiented) { 3822 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3823 } else { 3824 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3825 } 3826 } 3827 } 3828 } 3829 3830 /* task normally handled by MatSetValuesBlocked() */ 3831 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3832 PetscFunctionReturn(0); 3833 } 3834