1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 8 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 9 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 10 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 11 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 12 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 13 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 16 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 19 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 20 { 21 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 22 PetscErrorCode ierr; 23 PetscInt i,*idxb = 0; 24 PetscScalar *va,*vb; 25 Vec vtmp; 26 27 PetscFunctionBegin; 28 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 29 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 30 if (idx) { 31 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 32 } 33 34 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 35 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 36 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 37 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 38 39 for (i=0; i<A->rmap->n; i++){ 40 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 41 } 42 43 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 44 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 45 if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);} 46 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 47 PetscFunctionReturn(0); 48 } 49 50 EXTERN_C_BEGIN 51 #undef __FUNCT__ 52 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 53 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 54 { 55 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 56 PetscErrorCode ierr; 57 58 PetscFunctionBegin; 59 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 60 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 61 PetscFunctionReturn(0); 62 } 63 EXTERN_C_END 64 65 EXTERN_C_BEGIN 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 68 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 69 { 70 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 75 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 76 PetscFunctionReturn(0); 77 } 78 EXTERN_C_END 79 80 /* 81 Local utility routine that creates a mapping from the global column 82 number to the local number in the off-diagonal part of the local 83 storage of the matrix. This is done in a non scalable way since the 84 length of colmap equals the global matrix length. 85 */ 86 #undef __FUNCT__ 87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 89 { 90 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 91 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 92 PetscErrorCode ierr; 93 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 94 95 PetscFunctionBegin; 96 #if defined (PETSC_USE_CTABLE) 97 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 98 for (i=0; i<nbs; i++){ 99 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 100 } 101 #else 102 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 103 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 105 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 106 #endif 107 PetscFunctionReturn(0); 108 } 109 110 #define CHUNKSIZE 10 111 112 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 113 { \ 114 \ 115 brow = row/bs; \ 116 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 117 rmax = aimax[brow]; nrow = ailen[brow]; \ 118 bcol = col/bs; \ 119 ridx = row % bs; cidx = col % bs; \ 120 low = 0; high = nrow; \ 121 while (high-low > 3) { \ 122 t = (low+high)/2; \ 123 if (rp[t] > bcol) high = t; \ 124 else low = t; \ 125 } \ 126 for (_i=low; _i<high; _i++) { \ 127 if (rp[_i] > bcol) break; \ 128 if (rp[_i] == bcol) { \ 129 bap = ap + bs2*_i + bs*cidx + ridx; \ 130 if (addv == ADD_VALUES) *bap += value; \ 131 else *bap = value; \ 132 goto a_noinsert; \ 133 } \ 134 } \ 135 if (a->nonew == 1) goto a_noinsert; \ 136 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 137 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 138 N = nrow++ - 1; \ 139 /* shift up all the later entries in this row */ \ 140 for (ii=N; ii>=_i; ii--) { \ 141 rp[ii+1] = rp[ii]; \ 142 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 143 } \ 144 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 145 rp[_i] = bcol; \ 146 ap[bs2*_i + bs*cidx + ridx] = value; \ 147 a_noinsert:; \ 148 ailen[brow] = nrow; \ 149 } 150 151 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 152 { \ 153 brow = row/bs; \ 154 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 155 rmax = bimax[brow]; nrow = bilen[brow]; \ 156 bcol = col/bs; \ 157 ridx = row % bs; cidx = col % bs; \ 158 low = 0; high = nrow; \ 159 while (high-low > 3) { \ 160 t = (low+high)/2; \ 161 if (rp[t] > bcol) high = t; \ 162 else low = t; \ 163 } \ 164 for (_i=low; _i<high; _i++) { \ 165 if (rp[_i] > bcol) break; \ 166 if (rp[_i] == bcol) { \ 167 bap = ap + bs2*_i + bs*cidx + ridx; \ 168 if (addv == ADD_VALUES) *bap += value; \ 169 else *bap = value; \ 170 goto b_noinsert; \ 171 } \ 172 } \ 173 if (b->nonew == 1) goto b_noinsert; \ 174 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 175 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 176 CHKMEMQ;\ 177 N = nrow++ - 1; \ 178 /* shift up all the later entries in this row */ \ 179 for (ii=N; ii>=_i; ii--) { \ 180 rp[ii+1] = rp[ii]; \ 181 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 182 } \ 183 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 184 rp[_i] = bcol; \ 185 ap[bs2*_i + bs*cidx + ridx] = value; \ 186 b_noinsert:; \ 187 bilen[brow] = nrow; \ 188 } 189 190 #undef __FUNCT__ 191 #define __FUNCT__ "MatSetValues_MPIBAIJ" 192 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 193 { 194 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 195 MatScalar value; 196 PetscTruth roworiented = baij->roworiented; 197 PetscErrorCode ierr; 198 PetscInt i,j,row,col; 199 PetscInt rstart_orig=mat->rmap->rstart; 200 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 201 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 202 203 /* Some Variables required in the macro */ 204 Mat A = baij->A; 205 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 206 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 207 MatScalar *aa=a->a; 208 209 Mat B = baij->B; 210 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 211 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 212 MatScalar *ba=b->a; 213 214 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 215 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 216 MatScalar *ap,*bap; 217 218 PetscFunctionBegin; 219 if (v) PetscValidScalarPointer(v,6); 220 for (i=0; i<m; i++) { 221 if (im[i] < 0) continue; 222 #if defined(PETSC_USE_DEBUG) 223 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 224 #endif 225 if (im[i] >= rstart_orig && im[i] < rend_orig) { 226 row = im[i] - rstart_orig; 227 for (j=0; j<n; j++) { 228 if (in[j] >= cstart_orig && in[j] < cend_orig){ 229 col = in[j] - cstart_orig; 230 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 231 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 232 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 233 } else if (in[j] < 0) continue; 234 #if defined(PETSC_USE_DEBUG) 235 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1);} 236 #endif 237 else { 238 if (mat->was_assembled) { 239 if (!baij->colmap) { 240 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 241 } 242 #if defined (PETSC_USE_CTABLE) 243 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 244 col = col - 1; 245 #else 246 col = baij->colmap[in[j]/bs] - 1; 247 #endif 248 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 249 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 250 col = in[j]; 251 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 252 B = baij->B; 253 b = (Mat_SeqBAIJ*)(B)->data; 254 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 255 ba=b->a; 256 } else col += in[j]%bs; 257 } else col = in[j]; 258 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 259 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!baij->donotstash) { 265 if (roworiented) { 266 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 267 } else { 268 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 269 } 270 } 271 } 272 } 273 PetscFunctionReturn(0); 274 } 275 276 #undef __FUNCT__ 277 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 278 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 279 { 280 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 281 const PetscScalar *value; 282 MatScalar *barray=baij->barray; 283 PetscTruth roworiented = baij->roworiented; 284 PetscErrorCode ierr; 285 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 286 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 287 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 288 289 PetscFunctionBegin; 290 if(!barray) { 291 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 292 baij->barray = barray; 293 } 294 295 if (roworiented) { 296 stepval = (n-1)*bs; 297 } else { 298 stepval = (m-1)*bs; 299 } 300 for (i=0; i<m; i++) { 301 if (im[i] < 0) continue; 302 #if defined(PETSC_USE_DEBUG) 303 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 304 #endif 305 if (im[i] >= rstart && im[i] < rend) { 306 row = im[i] - rstart; 307 for (j=0; j<n; j++) { 308 /* If NumCol = 1 then a copy is not required */ 309 if ((roworiented) && (n == 1)) { 310 barray = (MatScalar*)v + i*bs2; 311 } else if((!roworiented) && (m == 1)) { 312 barray = (MatScalar*)v + j*bs2; 313 } else { /* Here a copy is required */ 314 if (roworiented) { 315 value = v + i*(stepval+bs)*bs + j*bs; 316 } else { 317 value = v + j*(stepval+bs)*bs + i*bs; 318 } 319 for (ii=0; ii<bs; ii++,value+=stepval) { 320 for (jj=0; jj<bs; jj++) { 321 *barray++ = *value++; 322 } 323 } 324 barray -=bs2; 325 } 326 327 if (in[j] >= cstart && in[j] < cend){ 328 col = in[j] - cstart; 329 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 330 } 331 else if (in[j] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 334 #endif 335 else { 336 if (mat->was_assembled) { 337 if (!baij->colmap) { 338 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 339 } 340 341 #if defined(PETSC_USE_DEBUG) 342 #if defined (PETSC_USE_CTABLE) 343 { PetscInt data; 344 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 345 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 346 } 347 #else 348 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 349 #endif 350 #endif 351 #if defined (PETSC_USE_CTABLE) 352 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 353 col = (col - 1)/bs; 354 #else 355 col = (baij->colmap[in[j]] - 1)/bs; 356 #endif 357 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 358 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 359 col = in[j]; 360 } 361 } 362 else col = in[j]; 363 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 364 } 365 } 366 } else { 367 if (!baij->donotstash) { 368 if (roworiented) { 369 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 370 } else { 371 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 372 } 373 } 374 } 375 } 376 PetscFunctionReturn(0); 377 } 378 379 #define HASH_KEY 0.6180339887 380 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 381 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 382 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 383 #undef __FUNCT__ 384 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 385 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 386 { 387 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 388 PetscTruth roworiented = baij->roworiented; 389 PetscErrorCode ierr; 390 PetscInt i,j,row,col; 391 PetscInt rstart_orig=mat->rmap->rstart; 392 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 393 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 394 PetscReal tmp; 395 MatScalar **HD = baij->hd,value; 396 #if defined(PETSC_USE_DEBUG) 397 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 398 #endif 399 400 PetscFunctionBegin; 401 if (v) PetscValidScalarPointer(v,6); 402 for (i=0; i<m; i++) { 403 #if defined(PETSC_USE_DEBUG) 404 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 405 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 406 #endif 407 row = im[i]; 408 if (row >= rstart_orig && row < rend_orig) { 409 for (j=0; j<n; j++) { 410 col = in[j]; 411 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 412 /* Look up PetscInto the Hash Table */ 413 key = (row/bs)*Nbs+(col/bs)+1; 414 h1 = HASH(size,key,tmp); 415 416 417 idx = h1; 418 #if defined(PETSC_USE_DEBUG) 419 insert_ct++; 420 total_ct++; 421 if (HT[idx] != key) { 422 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 423 if (idx == size) { 424 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 425 if (idx == h1) { 426 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 427 } 428 } 429 } 430 #else 431 if (HT[idx] != key) { 432 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 433 if (idx == size) { 434 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 435 if (idx == h1) { 436 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 437 } 438 } 439 } 440 #endif 441 /* A HASH table entry is found, so insert the values at the correct address */ 442 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 443 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 444 } 445 } else { 446 if (!baij->donotstash) { 447 if (roworiented) { 448 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 449 } else { 450 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 451 } 452 } 453 } 454 } 455 #if defined(PETSC_USE_DEBUG) 456 baij->ht_total_ct = total_ct; 457 baij->ht_insert_ct = insert_ct; 458 #endif 459 PetscFunctionReturn(0); 460 } 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 464 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 465 { 466 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 467 PetscTruth roworiented = baij->roworiented; 468 PetscErrorCode ierr; 469 PetscInt i,j,ii,jj,row,col; 470 PetscInt rstart=baij->rstartbs; 471 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 472 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 473 PetscReal tmp; 474 MatScalar **HD = baij->hd,*baij_a; 475 const PetscScalar *v_t,*value; 476 #if defined(PETSC_USE_DEBUG) 477 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 478 #endif 479 480 PetscFunctionBegin; 481 482 if (roworiented) { 483 stepval = (n-1)*bs; 484 } else { 485 stepval = (m-1)*bs; 486 } 487 for (i=0; i<m; i++) { 488 #if defined(PETSC_USE_DEBUG) 489 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 490 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 491 #endif 492 row = im[i]; 493 v_t = v + i*nbs2; 494 if (row >= rstart && row < rend) { 495 for (j=0; j<n; j++) { 496 col = in[j]; 497 498 /* Look up into the Hash Table */ 499 key = row*Nbs+col+1; 500 h1 = HASH(size,key,tmp); 501 502 idx = h1; 503 #if defined(PETSC_USE_DEBUG) 504 total_ct++; 505 insert_ct++; 506 if (HT[idx] != key) { 507 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 508 if (idx == size) { 509 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 510 if (idx == h1) { 511 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 512 } 513 } 514 } 515 #else 516 if (HT[idx] != key) { 517 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 518 if (idx == size) { 519 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 520 if (idx == h1) { 521 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 522 } 523 } 524 } 525 #endif 526 baij_a = HD[idx]; 527 if (roworiented) { 528 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 529 /* value = v + (i*(stepval+bs)+j)*bs; */ 530 value = v_t; 531 v_t += bs; 532 if (addv == ADD_VALUES) { 533 for (ii=0; ii<bs; ii++,value+=stepval) { 534 for (jj=ii; jj<bs2; jj+=bs) { 535 baij_a[jj] += *value++; 536 } 537 } 538 } else { 539 for (ii=0; ii<bs; ii++,value+=stepval) { 540 for (jj=ii; jj<bs2; jj+=bs) { 541 baij_a[jj] = *value++; 542 } 543 } 544 } 545 } else { 546 value = v + j*(stepval+bs)*bs + i*bs; 547 if (addv == ADD_VALUES) { 548 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 549 for (jj=0; jj<bs; jj++) { 550 baij_a[jj] += *value++; 551 } 552 } 553 } else { 554 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 555 for (jj=0; jj<bs; jj++) { 556 baij_a[jj] = *value++; 557 } 558 } 559 } 560 } 561 } 562 } else { 563 if (!baij->donotstash) { 564 if (roworiented) { 565 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 566 } else { 567 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 568 } 569 } 570 } 571 } 572 #if defined(PETSC_USE_DEBUG) 573 baij->ht_total_ct = total_ct; 574 baij->ht_insert_ct = insert_ct; 575 #endif 576 PetscFunctionReturn(0); 577 } 578 579 #undef __FUNCT__ 580 #define __FUNCT__ "MatGetValues_MPIBAIJ" 581 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 582 { 583 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 584 PetscErrorCode ierr; 585 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 586 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 587 588 PetscFunctionBegin; 589 for (i=0; i<m; i++) { 590 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 591 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 592 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 593 row = idxm[i] - bsrstart; 594 for (j=0; j<n; j++) { 595 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 596 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 597 if (idxn[j] >= bscstart && idxn[j] < bscend){ 598 col = idxn[j] - bscstart; 599 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 600 } else { 601 if (!baij->colmap) { 602 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 603 } 604 #if defined (PETSC_USE_CTABLE) 605 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 606 data --; 607 #else 608 data = baij->colmap[idxn[j]/bs]-1; 609 #endif 610 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 611 else { 612 col = data + idxn[j]%bs; 613 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 614 } 615 } 616 } 617 } else { 618 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 619 } 620 } 621 PetscFunctionReturn(0); 622 } 623 624 #undef __FUNCT__ 625 #define __FUNCT__ "MatNorm_MPIBAIJ" 626 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 627 { 628 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 629 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 630 PetscErrorCode ierr; 631 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 632 PetscReal sum = 0.0; 633 MatScalar *v; 634 635 PetscFunctionBegin; 636 if (baij->size == 1) { 637 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 638 } else { 639 if (type == NORM_FROBENIUS) { 640 v = amat->a; 641 nz = amat->nz*bs2; 642 for (i=0; i<nz; i++) { 643 #if defined(PETSC_USE_COMPLEX) 644 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 645 #else 646 sum += (*v)*(*v); v++; 647 #endif 648 } 649 v = bmat->a; 650 nz = bmat->nz*bs2; 651 for (i=0; i<nz; i++) { 652 #if defined(PETSC_USE_COMPLEX) 653 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 654 #else 655 sum += (*v)*(*v); v++; 656 #endif 657 } 658 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 659 *nrm = sqrt(*nrm); 660 } else if (type == NORM_1) { /* max column sum */ 661 PetscReal *tmp,*tmp2; 662 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 663 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 664 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 665 v = amat->a; jj = amat->j; 666 for (i=0; i<amat->nz; i++) { 667 for (j=0; j<bs; j++){ 668 col = bs*(cstart + *jj) + j; /* column index */ 669 for (row=0; row<bs; row++){ 670 tmp[col] += PetscAbsScalar(*v); v++; 671 } 672 } 673 jj++; 674 } 675 v = bmat->a; jj = bmat->j; 676 for (i=0; i<bmat->nz; i++) { 677 for (j=0; j<bs; j++){ 678 col = bs*garray[*jj] + j; 679 for (row=0; row<bs; row++){ 680 tmp[col] += PetscAbsScalar(*v); v++; 681 } 682 } 683 jj++; 684 } 685 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 686 *nrm = 0.0; 687 for (j=0; j<mat->cmap->N; j++) { 688 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 689 } 690 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 691 } else if (type == NORM_INFINITY) { /* max row sum */ 692 PetscReal *sums; 693 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr) 694 sum = 0.0; 695 for (j=0; j<amat->mbs; j++) { 696 for (row=0; row<bs; row++) sums[row] = 0.0; 697 v = amat->a + bs2*amat->i[j]; 698 nz = amat->i[j+1]-amat->i[j]; 699 for (i=0; i<nz; i++) { 700 for (col=0; col<bs; col++){ 701 for (row=0; row<bs; row++){ 702 sums[row] += PetscAbsScalar(*v); v++; 703 } 704 } 705 } 706 v = bmat->a + bs2*bmat->i[j]; 707 nz = bmat->i[j+1]-bmat->i[j]; 708 for (i=0; i<nz; i++) { 709 for (col=0; col<bs; col++){ 710 for (row=0; row<bs; row++){ 711 sums[row] += PetscAbsScalar(*v); v++; 712 } 713 } 714 } 715 for (row=0; row<bs; row++){ 716 if (sums[row] > sum) sum = sums[row]; 717 } 718 } 719 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 720 ierr = PetscFree(sums);CHKERRQ(ierr); 721 } else { 722 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 723 } 724 } 725 PetscFunctionReturn(0); 726 } 727 728 /* 729 Creates the hash table, and sets the table 730 This table is created only once. 731 If new entried need to be added to the matrix 732 then the hash table has to be destroyed and 733 recreated. 734 */ 735 #undef __FUNCT__ 736 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 737 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 738 { 739 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 740 Mat A = baij->A,B=baij->B; 741 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 742 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 743 PetscErrorCode ierr; 744 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 745 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 746 PetscInt *HT,key; 747 MatScalar **HD; 748 PetscReal tmp; 749 #if defined(PETSC_USE_INFO) 750 PetscInt ct=0,max=0; 751 #endif 752 753 PetscFunctionBegin; 754 if (baij->ht) PetscFunctionReturn(0); 755 756 baij->ht_size = (PetscInt)(factor*nz); 757 ht_size = baij->ht_size; 758 759 /* Allocate Memory for Hash Table */ 760 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 761 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 762 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 763 HD = baij->hd; 764 HT = baij->ht; 765 766 /* Loop Over A */ 767 for (i=0; i<a->mbs; i++) { 768 for (j=ai[i]; j<ai[i+1]; j++) { 769 row = i+rstart; 770 col = aj[j]+cstart; 771 772 key = row*Nbs + col + 1; 773 h1 = HASH(ht_size,key,tmp); 774 for (k=0; k<ht_size; k++){ 775 if (!HT[(h1+k)%ht_size]) { 776 HT[(h1+k)%ht_size] = key; 777 HD[(h1+k)%ht_size] = a->a + j*bs2; 778 break; 779 #if defined(PETSC_USE_INFO) 780 } else { 781 ct++; 782 #endif 783 } 784 } 785 #if defined(PETSC_USE_INFO) 786 if (k> max) max = k; 787 #endif 788 } 789 } 790 /* Loop Over B */ 791 for (i=0; i<b->mbs; i++) { 792 for (j=bi[i]; j<bi[i+1]; j++) { 793 row = i+rstart; 794 col = garray[bj[j]]; 795 key = row*Nbs + col + 1; 796 h1 = HASH(ht_size,key,tmp); 797 for (k=0; k<ht_size; k++){ 798 if (!HT[(h1+k)%ht_size]) { 799 HT[(h1+k)%ht_size] = key; 800 HD[(h1+k)%ht_size] = b->a + j*bs2; 801 break; 802 #if defined(PETSC_USE_INFO) 803 } else { 804 ct++; 805 #endif 806 } 807 } 808 #if defined(PETSC_USE_INFO) 809 if (k> max) max = k; 810 #endif 811 } 812 } 813 814 /* Print Summary */ 815 #if defined(PETSC_USE_INFO) 816 for (i=0,j=0; i<ht_size; i++) { 817 if (HT[i]) {j++;} 818 } 819 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 820 #endif 821 PetscFunctionReturn(0); 822 } 823 824 #undef __FUNCT__ 825 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 826 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 827 { 828 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 829 PetscErrorCode ierr; 830 PetscInt nstash,reallocs; 831 InsertMode addv; 832 833 PetscFunctionBegin; 834 if (baij->donotstash) { 835 PetscFunctionReturn(0); 836 } 837 838 /* make sure all processors are either in INSERTMODE or ADDMODE */ 839 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 840 if (addv == (ADD_VALUES|INSERT_VALUES)) { 841 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 842 } 843 mat->insertmode = addv; /* in case this processor had no cache */ 844 845 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 846 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 847 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 848 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 849 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 850 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 851 PetscFunctionReturn(0); 852 } 853 854 #undef __FUNCT__ 855 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 856 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 857 { 858 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 859 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 860 PetscErrorCode ierr; 861 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 862 PetscInt *row,*col; 863 PetscTruth r1,r2,r3,other_disassembled; 864 MatScalar *val; 865 InsertMode addv = mat->insertmode; 866 PetscMPIInt n; 867 868 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 869 PetscFunctionBegin; 870 if (!baij->donotstash) { 871 while (1) { 872 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 873 if (!flg) break; 874 875 for (i=0; i<n;) { 876 /* Now identify the consecutive vals belonging to the same row */ 877 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 878 if (j < n) ncols = j-i; 879 else ncols = n-i; 880 /* Now assemble all these values with a single function call */ 881 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 882 i = j; 883 } 884 } 885 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 886 /* Now process the block-stash. Since the values are stashed column-oriented, 887 set the roworiented flag to column oriented, and after MatSetValues() 888 restore the original flags */ 889 r1 = baij->roworiented; 890 r2 = a->roworiented; 891 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 892 baij->roworiented = PETSC_FALSE; 893 a->roworiented = PETSC_FALSE; 894 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 895 while (1) { 896 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 897 if (!flg) break; 898 899 for (i=0; i<n;) { 900 /* Now identify the consecutive vals belonging to the same row */ 901 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 902 if (j < n) ncols = j-i; 903 else ncols = n-i; 904 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 905 i = j; 906 } 907 } 908 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 909 baij->roworiented = r1; 910 a->roworiented = r2; 911 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 912 } 913 914 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 915 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 916 917 /* determine if any processor has disassembled, if so we must 918 also disassemble ourselfs, in order that we may reassemble. */ 919 /* 920 if nonzero structure of submatrix B cannot change then we know that 921 no processor disassembled thus we can skip this stuff 922 */ 923 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 924 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 925 if (mat->was_assembled && !other_disassembled) { 926 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 927 } 928 } 929 930 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 931 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 932 } 933 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_FALSE; /* b->compressedrow.use */ 934 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 935 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 936 937 #if defined(PETSC_USE_INFO) 938 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 939 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 940 baij->ht_total_ct = 0; 941 baij->ht_insert_ct = 0; 942 } 943 #endif 944 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 945 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 946 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 947 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 948 } 949 950 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 951 baij->rowvalues = 0; 952 PetscFunctionReturn(0); 953 } 954 955 #undef __FUNCT__ 956 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 957 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 958 { 959 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 960 PetscErrorCode ierr; 961 PetscMPIInt size = baij->size,rank = baij->rank; 962 PetscInt bs = mat->rmap->bs; 963 PetscTruth iascii,isdraw; 964 PetscViewer sviewer; 965 PetscViewerFormat format; 966 967 PetscFunctionBegin; 968 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 969 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 970 if (iascii) { 971 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 972 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 973 MatInfo info; 974 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 975 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 976 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 977 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 978 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 979 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 980 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 981 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 982 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 983 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 984 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 985 PetscFunctionReturn(0); 986 } else if (format == PETSC_VIEWER_ASCII_INFO) { 987 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 988 PetscFunctionReturn(0); 989 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 990 PetscFunctionReturn(0); 991 } 992 } 993 994 if (isdraw) { 995 PetscDraw draw; 996 PetscTruth isnull; 997 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 998 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 999 } 1000 1001 if (size == 1) { 1002 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1003 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1004 } else { 1005 /* assemble the entire matrix onto first processor. */ 1006 Mat A; 1007 Mat_SeqBAIJ *Aloc; 1008 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1009 MatScalar *a; 1010 1011 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1012 /* Perhaps this should be the type of mat? */ 1013 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1014 if (!rank) { 1015 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1016 } else { 1017 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1018 } 1019 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1020 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1021 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1022 1023 /* copy over the A part */ 1024 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1025 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1026 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1027 1028 for (i=0; i<mbs; i++) { 1029 rvals[0] = bs*(baij->rstartbs + i); 1030 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1031 for (j=ai[i]; j<ai[i+1]; j++) { 1032 col = (baij->cstartbs+aj[j])*bs; 1033 for (k=0; k<bs; k++) { 1034 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1035 col++; a += bs; 1036 } 1037 } 1038 } 1039 /* copy over the B part */ 1040 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1041 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1042 for (i=0; i<mbs; i++) { 1043 rvals[0] = bs*(baij->rstartbs + i); 1044 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1045 for (j=ai[i]; j<ai[i+1]; j++) { 1046 col = baij->garray[aj[j]]*bs; 1047 for (k=0; k<bs; k++) { 1048 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1049 col++; a += bs; 1050 } 1051 } 1052 } 1053 ierr = PetscFree(rvals);CHKERRQ(ierr); 1054 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1055 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1056 /* 1057 Everyone has to call to draw the matrix since the graphics waits are 1058 synchronized across all processors that share the PetscDraw object 1059 */ 1060 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1061 if (!rank) { 1062 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1063 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1064 } 1065 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1066 ierr = MatDestroy(A);CHKERRQ(ierr); 1067 } 1068 PetscFunctionReturn(0); 1069 } 1070 1071 #undef __FUNCT__ 1072 #define __FUNCT__ "MatView_MPIBAIJ" 1073 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1074 { 1075 PetscErrorCode ierr; 1076 PetscTruth iascii,isdraw,issocket,isbinary; 1077 1078 PetscFunctionBegin; 1079 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1080 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1081 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1082 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1083 if (iascii || isdraw || issocket || isbinary) { 1084 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1085 } else { 1086 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1087 } 1088 PetscFunctionReturn(0); 1089 } 1090 1091 #undef __FUNCT__ 1092 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1093 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1094 { 1095 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1096 PetscErrorCode ierr; 1097 1098 PetscFunctionBegin; 1099 #if defined(PETSC_USE_LOG) 1100 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1101 #endif 1102 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1103 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1104 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1105 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1106 #if defined (PETSC_USE_CTABLE) 1107 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 1108 #else 1109 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1110 #endif 1111 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1112 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1113 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1114 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1115 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1116 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1117 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1118 ierr = PetscFree(baij);CHKERRQ(ierr); 1119 1120 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1121 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1122 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1123 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1124 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1125 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1126 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1127 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1128 PetscFunctionReturn(0); 1129 } 1130 1131 #undef __FUNCT__ 1132 #define __FUNCT__ "MatMult_MPIBAIJ" 1133 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1134 { 1135 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1136 PetscErrorCode ierr; 1137 PetscInt nt; 1138 1139 PetscFunctionBegin; 1140 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1141 if (nt != A->cmap->n) { 1142 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1143 } 1144 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1145 if (nt != A->rmap->n) { 1146 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1147 } 1148 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1149 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1150 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1151 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1152 PetscFunctionReturn(0); 1153 } 1154 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1157 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1158 { 1159 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1160 PetscErrorCode ierr; 1161 1162 PetscFunctionBegin; 1163 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1164 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1165 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1166 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1167 PetscFunctionReturn(0); 1168 } 1169 1170 #undef __FUNCT__ 1171 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1172 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1173 { 1174 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1175 PetscErrorCode ierr; 1176 PetscTruth merged; 1177 1178 PetscFunctionBegin; 1179 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1180 /* do nondiagonal part */ 1181 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1182 if (!merged) { 1183 /* send it on its way */ 1184 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1185 /* do local part */ 1186 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1187 /* receive remote parts: note this assumes the values are not actually */ 1188 /* inserted in yy until the next line */ 1189 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1190 } else { 1191 /* do local part */ 1192 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1193 /* send it on its way */ 1194 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1195 /* values actually were received in the Begin() but we need to call this nop */ 1196 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1197 } 1198 PetscFunctionReturn(0); 1199 } 1200 1201 #undef __FUNCT__ 1202 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1203 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1204 { 1205 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1206 PetscErrorCode ierr; 1207 1208 PetscFunctionBegin; 1209 /* do nondiagonal part */ 1210 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1211 /* send it on its way */ 1212 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1213 /* do local part */ 1214 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1215 /* receive remote parts: note this assumes the values are not actually */ 1216 /* inserted in yy until the next line, which is true for my implementation*/ 1217 /* but is not perhaps always true. */ 1218 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1219 PetscFunctionReturn(0); 1220 } 1221 1222 /* 1223 This only works correctly for square matrices where the subblock A->A is the 1224 diagonal block 1225 */ 1226 #undef __FUNCT__ 1227 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1228 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1229 { 1230 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1231 PetscErrorCode ierr; 1232 1233 PetscFunctionBegin; 1234 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1235 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1236 PetscFunctionReturn(0); 1237 } 1238 1239 #undef __FUNCT__ 1240 #define __FUNCT__ "MatScale_MPIBAIJ" 1241 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1242 { 1243 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1244 PetscErrorCode ierr; 1245 1246 PetscFunctionBegin; 1247 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1248 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNCT__ 1253 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1254 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1255 { 1256 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1257 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1258 PetscErrorCode ierr; 1259 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1260 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1261 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1262 1263 PetscFunctionBegin; 1264 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1265 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1266 mat->getrowactive = PETSC_TRUE; 1267 1268 if (!mat->rowvalues && (idx || v)) { 1269 /* 1270 allocate enough space to hold information from the longest row. 1271 */ 1272 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1273 PetscInt max = 1,mbs = mat->mbs,tmp; 1274 for (i=0; i<mbs; i++) { 1275 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1276 if (max < tmp) { max = tmp; } 1277 } 1278 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1279 } 1280 lrow = row - brstart; 1281 1282 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1283 if (!v) {pvA = 0; pvB = 0;} 1284 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1285 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1286 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1287 nztot = nzA + nzB; 1288 1289 cmap = mat->garray; 1290 if (v || idx) { 1291 if (nztot) { 1292 /* Sort by increasing column numbers, assuming A and B already sorted */ 1293 PetscInt imark = -1; 1294 if (v) { 1295 *v = v_p = mat->rowvalues; 1296 for (i=0; i<nzB; i++) { 1297 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1298 else break; 1299 } 1300 imark = i; 1301 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1302 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1303 } 1304 if (idx) { 1305 *idx = idx_p = mat->rowindices; 1306 if (imark > -1) { 1307 for (i=0; i<imark; i++) { 1308 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1309 } 1310 } else { 1311 for (i=0; i<nzB; i++) { 1312 if (cmap[cworkB[i]/bs] < cstart) 1313 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1314 else break; 1315 } 1316 imark = i; 1317 } 1318 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1319 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1320 } 1321 } else { 1322 if (idx) *idx = 0; 1323 if (v) *v = 0; 1324 } 1325 } 1326 *nz = nztot; 1327 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1328 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1329 PetscFunctionReturn(0); 1330 } 1331 1332 #undef __FUNCT__ 1333 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1334 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1335 { 1336 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1337 1338 PetscFunctionBegin; 1339 if (!baij->getrowactive) { 1340 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1341 } 1342 baij->getrowactive = PETSC_FALSE; 1343 PetscFunctionReturn(0); 1344 } 1345 1346 #undef __FUNCT__ 1347 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1348 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1349 { 1350 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1351 PetscErrorCode ierr; 1352 1353 PetscFunctionBegin; 1354 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1355 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1356 PetscFunctionReturn(0); 1357 } 1358 1359 #undef __FUNCT__ 1360 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1361 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1362 { 1363 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1364 Mat A = a->A,B = a->B; 1365 PetscErrorCode ierr; 1366 PetscReal isend[5],irecv[5]; 1367 1368 PetscFunctionBegin; 1369 info->block_size = (PetscReal)matin->rmap->bs; 1370 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1371 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1372 isend[3] = info->memory; isend[4] = info->mallocs; 1373 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1374 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1375 isend[3] += info->memory; isend[4] += info->mallocs; 1376 if (flag == MAT_LOCAL) { 1377 info->nz_used = isend[0]; 1378 info->nz_allocated = isend[1]; 1379 info->nz_unneeded = isend[2]; 1380 info->memory = isend[3]; 1381 info->mallocs = isend[4]; 1382 } else if (flag == MAT_GLOBAL_MAX) { 1383 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1384 info->nz_used = irecv[0]; 1385 info->nz_allocated = irecv[1]; 1386 info->nz_unneeded = irecv[2]; 1387 info->memory = irecv[3]; 1388 info->mallocs = irecv[4]; 1389 } else if (flag == MAT_GLOBAL_SUM) { 1390 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1391 info->nz_used = irecv[0]; 1392 info->nz_allocated = irecv[1]; 1393 info->nz_unneeded = irecv[2]; 1394 info->memory = irecv[3]; 1395 info->mallocs = irecv[4]; 1396 } else { 1397 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1398 } 1399 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1400 info->fill_ratio_needed = 0; 1401 info->factor_mallocs = 0; 1402 PetscFunctionReturn(0); 1403 } 1404 1405 #undef __FUNCT__ 1406 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1407 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg) 1408 { 1409 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1410 PetscErrorCode ierr; 1411 1412 PetscFunctionBegin; 1413 switch (op) { 1414 case MAT_NEW_NONZERO_LOCATIONS: 1415 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1416 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1417 case MAT_KEEP_NONZERO_PATTERN: 1418 case MAT_NEW_NONZERO_LOCATION_ERR: 1419 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1420 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1421 break; 1422 case MAT_ROW_ORIENTED: 1423 a->roworiented = flg; 1424 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1425 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1426 break; 1427 case MAT_NEW_DIAGONALS: 1428 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1429 break; 1430 case MAT_IGNORE_OFF_PROC_ENTRIES: 1431 a->donotstash = flg; 1432 break; 1433 case MAT_USE_HASH_TABLE: 1434 a->ht_flag = flg; 1435 break; 1436 case MAT_SYMMETRIC: 1437 case MAT_STRUCTURALLY_SYMMETRIC: 1438 case MAT_HERMITIAN: 1439 case MAT_SYMMETRY_ETERNAL: 1440 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1441 break; 1442 default: 1443 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1444 } 1445 PetscFunctionReturn(0); 1446 } 1447 1448 #undef __FUNCT__ 1449 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1450 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1451 { 1452 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1453 Mat_SeqBAIJ *Aloc; 1454 Mat B; 1455 PetscErrorCode ierr; 1456 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1457 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1458 MatScalar *a; 1459 1460 PetscFunctionBegin; 1461 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1462 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1463 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1464 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1465 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1466 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1467 } else { 1468 B = *matout; 1469 } 1470 1471 /* copy over the A part */ 1472 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1473 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1474 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1475 1476 for (i=0; i<mbs; i++) { 1477 rvals[0] = bs*(baij->rstartbs + i); 1478 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1479 for (j=ai[i]; j<ai[i+1]; j++) { 1480 col = (baij->cstartbs+aj[j])*bs; 1481 for (k=0; k<bs; k++) { 1482 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1483 col++; a += bs; 1484 } 1485 } 1486 } 1487 /* copy over the B part */ 1488 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1489 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1490 for (i=0; i<mbs; i++) { 1491 rvals[0] = bs*(baij->rstartbs + i); 1492 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1493 for (j=ai[i]; j<ai[i+1]; j++) { 1494 col = baij->garray[aj[j]]*bs; 1495 for (k=0; k<bs; k++) { 1496 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1497 col++; a += bs; 1498 } 1499 } 1500 } 1501 ierr = PetscFree(rvals);CHKERRQ(ierr); 1502 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1503 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1504 1505 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1506 *matout = B; 1507 } else { 1508 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1509 } 1510 PetscFunctionReturn(0); 1511 } 1512 1513 #undef __FUNCT__ 1514 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1515 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1516 { 1517 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1518 Mat a = baij->A,b = baij->B; 1519 PetscErrorCode ierr; 1520 PetscInt s1,s2,s3; 1521 1522 PetscFunctionBegin; 1523 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1524 if (rr) { 1525 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1526 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1527 /* Overlap communication with computation. */ 1528 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1529 } 1530 if (ll) { 1531 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1532 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1533 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1534 } 1535 /* scale the diagonal block */ 1536 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1537 1538 if (rr) { 1539 /* Do a scatter end and then right scale the off-diagonal block */ 1540 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1541 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1542 } 1543 1544 PetscFunctionReturn(0); 1545 } 1546 1547 #undef __FUNCT__ 1548 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1549 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 1550 { 1551 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1552 PetscErrorCode ierr; 1553 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1554 PetscInt i,*owners = A->rmap->range; 1555 PetscInt *nprocs,j,idx,nsends,row; 1556 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1557 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1558 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1559 MPI_Comm comm = ((PetscObject)A)->comm; 1560 MPI_Request *send_waits,*recv_waits; 1561 MPI_Status recv_status,*send_status; 1562 #if defined(PETSC_DEBUG) 1563 PetscTruth found = PETSC_FALSE; 1564 #endif 1565 1566 PetscFunctionBegin; 1567 /* first count number of contributors to each processor */ 1568 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1569 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1570 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1571 j = 0; 1572 for (i=0; i<N; i++) { 1573 if (lastidx > (idx = rows[i])) j = 0; 1574 lastidx = idx; 1575 for (; j<size; j++) { 1576 if (idx >= owners[j] && idx < owners[j+1]) { 1577 nprocs[2*j]++; 1578 nprocs[2*j+1] = 1; 1579 owner[i] = j; 1580 #if defined(PETSC_DEBUG) 1581 found = PETSC_TRUE; 1582 #endif 1583 break; 1584 } 1585 } 1586 #if defined(PETSC_DEBUG) 1587 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1588 found = PETSC_FALSE; 1589 #endif 1590 } 1591 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1592 1593 /* inform other processors of number of messages and max length*/ 1594 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1595 1596 /* post receives: */ 1597 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1598 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1599 for (i=0; i<nrecvs; i++) { 1600 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1601 } 1602 1603 /* do sends: 1604 1) starts[i] gives the starting index in svalues for stuff going to 1605 the ith processor 1606 */ 1607 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1608 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1609 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1610 starts[0] = 0; 1611 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1612 for (i=0; i<N; i++) { 1613 svalues[starts[owner[i]]++] = rows[i]; 1614 } 1615 1616 starts[0] = 0; 1617 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1618 count = 0; 1619 for (i=0; i<size; i++) { 1620 if (nprocs[2*i+1]) { 1621 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1622 } 1623 } 1624 ierr = PetscFree(starts);CHKERRQ(ierr); 1625 1626 base = owners[rank]; 1627 1628 /* wait on receives */ 1629 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1630 count = nrecvs; 1631 slen = 0; 1632 while (count) { 1633 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1634 /* unpack receives into our local space */ 1635 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1636 source[imdex] = recv_status.MPI_SOURCE; 1637 lens[imdex] = n; 1638 slen += n; 1639 count--; 1640 } 1641 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1642 1643 /* move the data into the send scatter */ 1644 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1645 count = 0; 1646 for (i=0; i<nrecvs; i++) { 1647 values = rvalues + i*nmax; 1648 for (j=0; j<lens[i]; j++) { 1649 lrows[count++] = values[j] - base; 1650 } 1651 } 1652 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1653 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1654 ierr = PetscFree(owner);CHKERRQ(ierr); 1655 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1656 1657 /* actually zap the local rows */ 1658 /* 1659 Zero the required rows. If the "diagonal block" of the matrix 1660 is square and the user wishes to set the diagonal we use separate 1661 code so that MatSetValues() is not called for each diagonal allocating 1662 new memory, thus calling lots of mallocs and slowing things down. 1663 1664 */ 1665 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1666 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1667 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1668 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1669 } else if (diag != 0.0) { 1670 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1671 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1672 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1673 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1674 } 1675 for (i=0; i<slen; i++) { 1676 row = lrows[i] + rstart_bs; 1677 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1678 } 1679 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1680 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1681 } else { 1682 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1683 } 1684 1685 ierr = PetscFree(lrows);CHKERRQ(ierr); 1686 1687 /* wait on sends */ 1688 if (nsends) { 1689 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1690 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1691 ierr = PetscFree(send_status);CHKERRQ(ierr); 1692 } 1693 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1694 ierr = PetscFree(svalues);CHKERRQ(ierr); 1695 1696 PetscFunctionReturn(0); 1697 } 1698 1699 #undef __FUNCT__ 1700 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1701 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1702 { 1703 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1704 PetscErrorCode ierr; 1705 1706 PetscFunctionBegin; 1707 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1708 PetscFunctionReturn(0); 1709 } 1710 1711 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1712 1713 #undef __FUNCT__ 1714 #define __FUNCT__ "MatEqual_MPIBAIJ" 1715 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1716 { 1717 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1718 Mat a,b,c,d; 1719 PetscTruth flg; 1720 PetscErrorCode ierr; 1721 1722 PetscFunctionBegin; 1723 a = matA->A; b = matA->B; 1724 c = matB->A; d = matB->B; 1725 1726 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1727 if (flg) { 1728 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1729 } 1730 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1731 PetscFunctionReturn(0); 1732 } 1733 1734 #undef __FUNCT__ 1735 #define __FUNCT__ "MatCopy_MPIBAIJ" 1736 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1737 { 1738 PetscErrorCode ierr; 1739 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1740 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1741 1742 PetscFunctionBegin; 1743 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1744 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1745 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1746 } else { 1747 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1748 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1749 } 1750 PetscFunctionReturn(0); 1751 } 1752 1753 #undef __FUNCT__ 1754 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1755 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1756 { 1757 PetscErrorCode ierr; 1758 1759 PetscFunctionBegin; 1760 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1761 PetscFunctionReturn(0); 1762 } 1763 1764 #undef __FUNCT__ 1765 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1766 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1767 { 1768 PetscErrorCode ierr; 1769 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1770 PetscBLASInt bnz,one=1; 1771 Mat_SeqBAIJ *x,*y; 1772 1773 PetscFunctionBegin; 1774 if (str == SAME_NONZERO_PATTERN) { 1775 PetscScalar alpha = a; 1776 x = (Mat_SeqBAIJ *)xx->A->data; 1777 y = (Mat_SeqBAIJ *)yy->A->data; 1778 bnz = PetscBLASIntCast(x->nz); 1779 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1780 x = (Mat_SeqBAIJ *)xx->B->data; 1781 y = (Mat_SeqBAIJ *)yy->B->data; 1782 bnz = PetscBLASIntCast(x->nz); 1783 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1784 } else { 1785 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1786 } 1787 PetscFunctionReturn(0); 1788 } 1789 1790 #undef __FUNCT__ 1791 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ" 1792 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs) 1793 { 1794 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1795 PetscInt rbs,cbs; 1796 PetscErrorCode ierr; 1797 1798 PetscFunctionBegin; 1799 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1800 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1801 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 1802 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 1803 if (rbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 1804 if (cbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 1805 PetscFunctionReturn(0); 1806 } 1807 1808 #undef __FUNCT__ 1809 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1810 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1811 { 1812 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1813 PetscErrorCode ierr; 1814 1815 PetscFunctionBegin; 1816 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1817 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1818 PetscFunctionReturn(0); 1819 } 1820 1821 #undef __FUNCT__ 1822 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1823 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1824 { 1825 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1826 PetscErrorCode ierr; 1827 1828 PetscFunctionBegin; 1829 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1830 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1831 PetscFunctionReturn(0); 1832 } 1833 1834 #undef __FUNCT__ 1835 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 1836 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1837 { 1838 PetscErrorCode ierr; 1839 IS iscol_local; 1840 PetscInt csize; 1841 1842 PetscFunctionBegin; 1843 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1844 if (call == MAT_REUSE_MATRIX) { 1845 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1846 if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1847 } else { 1848 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1849 } 1850 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1851 if (call == MAT_INITIAL_MATRIX) { 1852 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1853 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 1854 } 1855 PetscFunctionReturn(0); 1856 } 1857 1858 #undef __FUNCT__ 1859 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 1860 /* 1861 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1862 in local and then by concatenating the local matrices the end result. 1863 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 1864 */ 1865 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 1866 { 1867 PetscErrorCode ierr; 1868 PetscMPIInt rank,size; 1869 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 1870 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 1871 Mat *local,M,Mreuse; 1872 MatScalar *vwork,*aa; 1873 MPI_Comm comm = ((PetscObject)mat)->comm; 1874 Mat_SeqBAIJ *aij; 1875 1876 1877 PetscFunctionBegin; 1878 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1879 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1880 1881 if (call == MAT_REUSE_MATRIX) { 1882 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1883 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1884 local = &Mreuse; 1885 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1886 } else { 1887 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1888 Mreuse = *local; 1889 ierr = PetscFree(local);CHKERRQ(ierr); 1890 } 1891 1892 /* 1893 m - number of local rows 1894 n - number of columns (same on all processors) 1895 rstart - first row in new global matrix generated 1896 */ 1897 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 1898 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1899 m = m/bs; 1900 n = n/bs; 1901 1902 if (call == MAT_INITIAL_MATRIX) { 1903 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1904 ii = aij->i; 1905 jj = aij->j; 1906 1907 /* 1908 Determine the number of non-zeros in the diagonal and off-diagonal 1909 portions of the matrix in order to do correct preallocation 1910 */ 1911 1912 /* first get start and end of "diagonal" columns */ 1913 if (csize == PETSC_DECIDE) { 1914 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 1915 if (mglobal == n*bs) { /* square matrix */ 1916 nlocal = m; 1917 } else { 1918 nlocal = n/size + ((n % size) > rank); 1919 } 1920 } else { 1921 nlocal = csize/bs; 1922 } 1923 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1924 rstart = rend - nlocal; 1925 if (rank == size - 1 && rend != n) { 1926 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 1927 } 1928 1929 /* next, compute all the lengths */ 1930 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 1931 olens = dlens + m; 1932 for (i=0; i<m; i++) { 1933 jend = ii[i+1] - ii[i]; 1934 olen = 0; 1935 dlen = 0; 1936 for (j=0; j<jend; j++) { 1937 if (*jj < rstart || *jj >= rend) olen++; 1938 else dlen++; 1939 jj++; 1940 } 1941 olens[i] = olen; 1942 dlens[i] = dlen; 1943 } 1944 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 1945 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 1946 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 1947 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 1948 ierr = PetscFree(dlens);CHKERRQ(ierr); 1949 } else { 1950 PetscInt ml,nl; 1951 1952 M = *newmat; 1953 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1954 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1955 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1956 /* 1957 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1958 rather than the slower MatSetValues(). 1959 */ 1960 M->was_assembled = PETSC_TRUE; 1961 M->assembled = PETSC_FALSE; 1962 } 1963 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1964 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1965 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1966 ii = aij->i; 1967 jj = aij->j; 1968 aa = aij->a; 1969 for (i=0; i<m; i++) { 1970 row = rstart/bs + i; 1971 nz = ii[i+1] - ii[i]; 1972 cwork = jj; jj += nz; 1973 vwork = aa; aa += nz; 1974 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1975 } 1976 1977 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1978 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1979 *newmat = M; 1980 1981 /* save submatrix used in processor for next request */ 1982 if (call == MAT_INITIAL_MATRIX) { 1983 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 1984 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 1985 } 1986 1987 PetscFunctionReturn(0); 1988 } 1989 1990 #undef __FUNCT__ 1991 #define __FUNCT__ "MatPermute_MPIBAIJ" 1992 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 1993 { 1994 MPI_Comm comm,pcomm; 1995 PetscInt first,local_size,nrows; 1996 const PetscInt *rows; 1997 PetscMPIInt size; 1998 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1999 PetscErrorCode ierr; 2000 2001 PetscFunctionBegin; 2002 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2003 /* make a collective version of 'rowp' */ 2004 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2005 if (pcomm==comm) { 2006 crowp = rowp; 2007 } else { 2008 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2009 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2010 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 2011 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2012 } 2013 /* collect the global row permutation and invert it */ 2014 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2015 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2016 if (pcomm!=comm) { 2017 ierr = ISDestroy(crowp);CHKERRQ(ierr); 2018 } 2019 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2020 /* get the local target indices */ 2021 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2022 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2023 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2024 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 2025 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2026 ierr = ISDestroy(irowp);CHKERRQ(ierr); 2027 /* the column permutation is so much easier; 2028 make a local version of 'colp' and invert it */ 2029 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2030 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2031 if (size==1) { 2032 lcolp = colp; 2033 } else { 2034 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2035 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2036 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 2037 } 2038 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2039 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2040 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2041 if (size>1) { 2042 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2043 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 2044 } 2045 /* now we just get the submatrix */ 2046 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2047 /* clean up */ 2048 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 2049 ierr = ISDestroy(icolp);CHKERRQ(ierr); 2050 PetscFunctionReturn(0); 2051 } 2052 2053 #undef __FUNCT__ 2054 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2055 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2056 { 2057 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2058 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2059 2060 PetscFunctionBegin; 2061 if (nghosts) { *nghosts = B->nbs;} 2062 if (ghosts) {*ghosts = baij->garray;} 2063 PetscFunctionReturn(0); 2064 } 2065 2066 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat); 2067 2068 #undef __FUNCT__ 2069 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2070 /* 2071 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2072 */ 2073 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2074 { 2075 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2076 PetscErrorCode ierr; 2077 PetscMPIInt size,*ncolsonproc,*disp,nn; 2078 PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col; 2079 const PetscInt *is; 2080 PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj; 2081 PetscInt *rowhit,M,cstart,cend,colb; 2082 PetscInt *columnsforrow,l; 2083 IS *isa; 2084 PetscTruth done,flg; 2085 ISLocalToGlobalMapping map = mat->bmapping; 2086 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2087 2088 PetscFunctionBegin; 2089 if (!mat->assembled) { 2090 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2091 } 2092 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2093 2094 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2095 2096 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2097 M = mat->rmap->n/bs; 2098 cstart = mat->cmap->rstart/bs; 2099 cend = mat->cmap->rend/bs; 2100 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2101 c->N = mat->cmap->N/bs; 2102 c->m = mat->rmap->n/bs; 2103 c->rstart = mat->rmap->rstart/bs; 2104 2105 c->ncolors = nis; 2106 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2107 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2108 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2109 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2110 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2111 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2112 2113 /* Allow access to data structures of local part of matrix */ 2114 if (!baij->colmap) { 2115 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2116 } 2117 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2118 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2119 2120 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2121 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2122 2123 for (i=0; i<nis; i++) { 2124 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2125 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2126 c->ncolumns[i] = n; 2127 if (n) { 2128 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2129 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2130 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2131 } else { 2132 c->columns[i] = 0; 2133 } 2134 2135 if (ctype == IS_COLORING_GLOBAL){ 2136 /* Determine the total (parallel) number of columns of this color */ 2137 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2138 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2139 2140 nn = PetscMPIIntCast(n); 2141 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2142 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2143 if (!nctot) { 2144 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2145 } 2146 2147 disp[0] = 0; 2148 for (j=1; j<size; j++) { 2149 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2150 } 2151 2152 /* Get complete list of columns for color on each processor */ 2153 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2154 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2155 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2156 } else if (ctype == IS_COLORING_GHOSTED){ 2157 /* Determine local number of columns of this color on this process, including ghost points */ 2158 nctot = n; 2159 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2160 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2161 } else { 2162 SETERRQ(PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2163 } 2164 2165 /* 2166 Mark all rows affect by these columns 2167 */ 2168 /* Temporary option to allow for debugging/testing */ 2169 flg = PETSC_FALSE; 2170 ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2171 if (!flg) {/*-----------------------------------------------------------------------------*/ 2172 /* crude, fast version */ 2173 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2174 /* loop over columns*/ 2175 for (j=0; j<nctot; j++) { 2176 if (ctype == IS_COLORING_GHOSTED) { 2177 col = ltog[cols[j]]; 2178 } else { 2179 col = cols[j]; 2180 } 2181 if (col >= cstart && col < cend) { 2182 /* column is in diagonal block of matrix */ 2183 rows = A_cj + A_ci[col-cstart]; 2184 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2185 } else { 2186 #if defined (PETSC_USE_CTABLE) 2187 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr) 2188 colb --; 2189 #else 2190 colb = baij->colmap[col] - 1; 2191 #endif 2192 if (colb == -1) { 2193 m = 0; 2194 } else { 2195 colb = colb/bs; 2196 rows = B_cj + B_ci[colb]; 2197 m = B_ci[colb+1] - B_ci[colb]; 2198 } 2199 } 2200 /* loop over columns marking them in rowhit */ 2201 for (k=0; k<m; k++) { 2202 rowhit[*rows++] = col + 1; 2203 } 2204 } 2205 2206 /* count the number of hits */ 2207 nrows = 0; 2208 for (j=0; j<M; j++) { 2209 if (rowhit[j]) nrows++; 2210 } 2211 c->nrows[i] = nrows; 2212 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2213 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2214 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2215 nrows = 0; 2216 for (j=0; j<M; j++) { 2217 if (rowhit[j]) { 2218 c->rows[i][nrows] = j; 2219 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2220 nrows++; 2221 } 2222 } 2223 } else {/*-------------------------------------------------------------------------------*/ 2224 /* slow version, using rowhit as a linked list */ 2225 PetscInt currentcol,fm,mfm; 2226 rowhit[M] = M; 2227 nrows = 0; 2228 /* loop over columns*/ 2229 for (j=0; j<nctot; j++) { 2230 if (ctype == IS_COLORING_GHOSTED) { 2231 col = ltog[cols[j]]; 2232 } else { 2233 col = cols[j]; 2234 } 2235 if (col >= cstart && col < cend) { 2236 /* column is in diagonal block of matrix */ 2237 rows = A_cj + A_ci[col-cstart]; 2238 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2239 } else { 2240 #if defined (PETSC_USE_CTABLE) 2241 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2242 colb --; 2243 #else 2244 colb = baij->colmap[col] - 1; 2245 #endif 2246 if (colb == -1) { 2247 m = 0; 2248 } else { 2249 colb = colb/bs; 2250 rows = B_cj + B_ci[colb]; 2251 m = B_ci[colb+1] - B_ci[colb]; 2252 } 2253 } 2254 2255 /* loop over columns marking them in rowhit */ 2256 fm = M; /* fm points to first entry in linked list */ 2257 for (k=0; k<m; k++) { 2258 currentcol = *rows++; 2259 /* is it already in the list? */ 2260 do { 2261 mfm = fm; 2262 fm = rowhit[fm]; 2263 } while (fm < currentcol); 2264 /* not in list so add it */ 2265 if (fm != currentcol) { 2266 nrows++; 2267 columnsforrow[currentcol] = col; 2268 /* next three lines insert new entry into linked list */ 2269 rowhit[mfm] = currentcol; 2270 rowhit[currentcol] = fm; 2271 fm = currentcol; 2272 /* fm points to present position in list since we know the columns are sorted */ 2273 } else { 2274 SETERRQ(PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2275 } 2276 } 2277 } 2278 c->nrows[i] = nrows; 2279 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2280 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2281 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2282 /* now store the linked list of rows into c->rows[i] */ 2283 nrows = 0; 2284 fm = rowhit[M]; 2285 do { 2286 c->rows[i][nrows] = fm; 2287 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2288 fm = rowhit[fm]; 2289 } while (fm < M); 2290 } /* ---------------------------------------------------------------------------------------*/ 2291 ierr = PetscFree(cols);CHKERRQ(ierr); 2292 } 2293 2294 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2295 /* 2296 vscale will contain the "diagonal" on processor scalings followed by the off processor 2297 */ 2298 if (ctype == IS_COLORING_GLOBAL) { 2299 PetscInt *garray; 2300 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2301 for (i=0; i<baij->B->cmap->n/bs; i++) { 2302 for (j=0; j<bs; j++) { 2303 garray[i*bs+j] = bs*baij->garray[i]+j; 2304 } 2305 } 2306 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2307 ierr = PetscFree(garray);CHKERRQ(ierr); 2308 CHKMEMQ; 2309 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2310 for (k=0; k<c->ncolors; k++) { 2311 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2312 for (l=0; l<c->nrows[k]; l++) { 2313 col = c->columnsforrow[k][l]; 2314 if (col >= cstart && col < cend) { 2315 /* column is in diagonal block of matrix */ 2316 colb = col - cstart; 2317 } else { 2318 /* column is in "off-processor" part */ 2319 #if defined (PETSC_USE_CTABLE) 2320 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2321 colb --; 2322 #else 2323 colb = baij->colmap[col] - 1; 2324 #endif 2325 colb = colb/bs; 2326 colb += cend - cstart; 2327 } 2328 c->vscaleforrow[k][l] = colb; 2329 } 2330 } 2331 } else if (ctype == IS_COLORING_GHOSTED) { 2332 /* Get gtol mapping */ 2333 PetscInt N = mat->cmap->N, *gtol; 2334 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2335 for (i=0; i<N; i++) gtol[i] = -1; 2336 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2337 2338 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2339 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2340 for (k=0; k<c->ncolors; k++) { 2341 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2342 for (l=0; l<c->nrows[k]; l++) { 2343 col = c->columnsforrow[k][l]; /* global column index */ 2344 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2345 } 2346 } 2347 ierr = PetscFree(gtol);CHKERRQ(ierr); 2348 } 2349 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2350 2351 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2352 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2353 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2354 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2355 CHKMEMQ; 2356 PetscFunctionReturn(0); 2357 } 2358 2359 #undef __FUNCT__ 2360 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ" 2361 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat) 2362 { 2363 Mat B; 2364 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2365 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2366 Mat_SeqAIJ *b; 2367 PetscErrorCode ierr; 2368 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2369 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2370 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2371 2372 PetscFunctionBegin; 2373 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2374 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2375 2376 /* ---------------------------------------------------------------- 2377 Tell every processor the number of nonzeros per row 2378 */ 2379 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2380 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2381 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2382 } 2383 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2384 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2385 displs = recvcounts + size; 2386 for (i=0; i<size; i++) { 2387 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2388 displs[i] = A->rmap->range[i]/bs; 2389 } 2390 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2391 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2392 #else 2393 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2394 #endif 2395 /* --------------------------------------------------------------- 2396 Create the sequential matrix of the same type as the local block diagonal 2397 */ 2398 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2399 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2400 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2401 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2402 b = (Mat_SeqAIJ *)B->data; 2403 2404 /*-------------------------------------------------------------------- 2405 Copy my part of matrix column indices over 2406 */ 2407 sendcount = ad->nz + bd->nz; 2408 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2409 a_jsendbuf = ad->j; 2410 b_jsendbuf = bd->j; 2411 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2412 cnt = 0; 2413 for (i=0; i<n; i++) { 2414 2415 /* put in lower diagonal portion */ 2416 m = bd->i[i+1] - bd->i[i]; 2417 while (m > 0) { 2418 /* is it above diagonal (in bd (compressed) numbering) */ 2419 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2420 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2421 m--; 2422 } 2423 2424 /* put in diagonal portion */ 2425 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2426 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2427 } 2428 2429 /* put in upper diagonal portion */ 2430 while (m-- > 0) { 2431 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2432 } 2433 } 2434 if (cnt != sendcount) SETERRQ2(PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2435 2436 /*-------------------------------------------------------------------- 2437 Gather all column indices to all processors 2438 */ 2439 for (i=0; i<size; i++) { 2440 recvcounts[i] = 0; 2441 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2442 recvcounts[i] += lens[j]; 2443 } 2444 } 2445 displs[0] = 0; 2446 for (i=1; i<size; i++) { 2447 displs[i] = displs[i-1] + recvcounts[i-1]; 2448 } 2449 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2450 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2451 #else 2452 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2453 #endif 2454 /*-------------------------------------------------------------------- 2455 Assemble the matrix into useable form (note numerical values not yet set) 2456 */ 2457 /* set the b->ilen (length of each row) values */ 2458 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2459 /* set the b->i indices */ 2460 b->i[0] = 0; 2461 for (i=1; i<=A->rmap->N/bs; i++) { 2462 b->i[i] = b->i[i-1] + lens[i-1]; 2463 } 2464 ierr = PetscFree(lens);CHKERRQ(ierr); 2465 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2466 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2467 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2468 2469 if (A->symmetric){ 2470 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2471 } else if (A->hermitian) { 2472 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2473 } else if (A->structurally_symmetric) { 2474 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2475 } 2476 *newmat = B; 2477 PetscFunctionReturn(0); 2478 } 2479 2480 #undef __FUNCT__ 2481 #define __FUNCT__ "MatSOR_MPIBAIJ" 2482 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2483 { 2484 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2485 PetscErrorCode ierr; 2486 Vec bb1 = 0; 2487 2488 PetscFunctionBegin; 2489 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2490 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2491 } 2492 2493 if (flag == SOR_APPLY_UPPER) { 2494 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2495 PetscFunctionReturn(0); 2496 } 2497 2498 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2499 if (flag & SOR_ZERO_INITIAL_GUESS) { 2500 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2501 its--; 2502 } 2503 2504 while (its--) { 2505 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2506 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2507 2508 /* update rhs: bb1 = bb - B*x */ 2509 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2510 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2511 2512 /* local sweep */ 2513 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2514 } 2515 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2516 if (flag & SOR_ZERO_INITIAL_GUESS) { 2517 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2518 its--; 2519 } 2520 while (its--) { 2521 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2522 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2523 2524 /* update rhs: bb1 = bb - B*x */ 2525 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2526 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2527 2528 /* local sweep */ 2529 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2530 } 2531 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2532 if (flag & SOR_ZERO_INITIAL_GUESS) { 2533 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2534 its--; 2535 } 2536 while (its--) { 2537 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2538 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2539 2540 /* update rhs: bb1 = bb - B*x */ 2541 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2542 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2543 2544 /* local sweep */ 2545 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2546 } 2547 } else { 2548 SETERRQ(PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2549 } 2550 2551 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 2552 PetscFunctionReturn(0); 2553 } 2554 2555 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2556 2557 2558 /* -------------------------------------------------------------------*/ 2559 static struct _MatOps MatOps_Values = { 2560 MatSetValues_MPIBAIJ, 2561 MatGetRow_MPIBAIJ, 2562 MatRestoreRow_MPIBAIJ, 2563 MatMult_MPIBAIJ, 2564 /* 4*/ MatMultAdd_MPIBAIJ, 2565 MatMultTranspose_MPIBAIJ, 2566 MatMultTransposeAdd_MPIBAIJ, 2567 0, 2568 0, 2569 0, 2570 /*10*/ 0, 2571 0, 2572 0, 2573 MatSOR_MPIBAIJ, 2574 MatTranspose_MPIBAIJ, 2575 /*15*/ MatGetInfo_MPIBAIJ, 2576 MatEqual_MPIBAIJ, 2577 MatGetDiagonal_MPIBAIJ, 2578 MatDiagonalScale_MPIBAIJ, 2579 MatNorm_MPIBAIJ, 2580 /*20*/ MatAssemblyBegin_MPIBAIJ, 2581 MatAssemblyEnd_MPIBAIJ, 2582 MatSetOption_MPIBAIJ, 2583 MatZeroEntries_MPIBAIJ, 2584 /*24*/ MatZeroRows_MPIBAIJ, 2585 0, 2586 0, 2587 0, 2588 0, 2589 /*29*/ MatSetUpPreallocation_MPIBAIJ, 2590 0, 2591 0, 2592 0, 2593 0, 2594 /*34*/ MatDuplicate_MPIBAIJ, 2595 0, 2596 0, 2597 0, 2598 0, 2599 /*39*/ MatAXPY_MPIBAIJ, 2600 MatGetSubMatrices_MPIBAIJ, 2601 MatIncreaseOverlap_MPIBAIJ, 2602 MatGetValues_MPIBAIJ, 2603 MatCopy_MPIBAIJ, 2604 /*44*/ 0, 2605 MatScale_MPIBAIJ, 2606 0, 2607 0, 2608 0, 2609 /*49*/ MatSetBlockSize_MPIBAIJ, 2610 0, 2611 0, 2612 0, 2613 0, 2614 /*54*/ MatFDColoringCreate_MPIBAIJ, 2615 0, 2616 MatSetUnfactored_MPIBAIJ, 2617 MatPermute_MPIBAIJ, 2618 MatSetValuesBlocked_MPIBAIJ, 2619 /*59*/ MatGetSubMatrix_MPIBAIJ, 2620 MatDestroy_MPIBAIJ, 2621 MatView_MPIBAIJ, 2622 0, 2623 0, 2624 /*64*/ 0, 2625 0, 2626 0, 2627 0, 2628 0, 2629 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2630 0, 2631 0, 2632 0, 2633 0, 2634 /*74*/ 0, 2635 MatFDColoringApply_BAIJ, 2636 0, 2637 0, 2638 0, 2639 /*79*/ 0, 2640 0, 2641 0, 2642 0, 2643 MatLoad_MPIBAIJ, 2644 /*84*/ 0, 2645 0, 2646 0, 2647 0, 2648 0, 2649 /*89*/ 0, 2650 0, 2651 0, 2652 0, 2653 0, 2654 /*94*/ 0, 2655 0, 2656 0, 2657 0, 2658 0, 2659 /*99*/ 0, 2660 0, 2661 0, 2662 0, 2663 0, 2664 /*104*/0, 2665 MatRealPart_MPIBAIJ, 2666 MatImaginaryPart_MPIBAIJ, 2667 0, 2668 0, 2669 /*109*/0, 2670 0, 2671 0, 2672 0, 2673 0, 2674 /*114*/MatGetSeqNonzerostructure_MPIBAIJ, 2675 0, 2676 MatGetGhosts_MPIBAIJ 2677 }; 2678 2679 EXTERN_C_BEGIN 2680 #undef __FUNCT__ 2681 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2682 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2683 { 2684 PetscFunctionBegin; 2685 *a = ((Mat_MPIBAIJ *)A->data)->A; 2686 *iscopy = PETSC_FALSE; 2687 PetscFunctionReturn(0); 2688 } 2689 EXTERN_C_END 2690 2691 EXTERN_C_BEGIN 2692 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2693 EXTERN_C_END 2694 2695 EXTERN_C_BEGIN 2696 #undef __FUNCT__ 2697 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2698 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2699 { 2700 PetscInt m,rstart,cstart,cend; 2701 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2702 const PetscInt *JJ=0; 2703 PetscScalar *values=0; 2704 PetscErrorCode ierr; 2705 2706 PetscFunctionBegin; 2707 2708 if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2709 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2710 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2711 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2712 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2713 m = B->rmap->n/bs; 2714 rstart = B->rmap->rstart/bs; 2715 cstart = B->cmap->rstart/bs; 2716 cend = B->cmap->rend/bs; 2717 2718 if (ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2719 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2720 for (i=0; i<m; i++) { 2721 nz = ii[i+1] - ii[i]; 2722 if (nz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2723 nz_max = PetscMax(nz_max,nz); 2724 JJ = jj + ii[i]; 2725 for (j=0; j<nz; j++) { 2726 if (*JJ >= cstart) break; 2727 JJ++; 2728 } 2729 d = 0; 2730 for (; j<nz; j++) { 2731 if (*JJ++ >= cend) break; 2732 d++; 2733 } 2734 d_nnz[i] = d; 2735 o_nnz[i] = nz - d; 2736 } 2737 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2738 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2739 2740 values = (PetscScalar*)V; 2741 if (!values) { 2742 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2743 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2744 } 2745 for (i=0; i<m; i++) { 2746 PetscInt row = i + rstart; 2747 PetscInt ncols = ii[i+1] - ii[i]; 2748 const PetscInt *icols = jj + ii[i]; 2749 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2750 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2751 } 2752 2753 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2754 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2755 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2756 2757 PetscFunctionReturn(0); 2758 } 2759 EXTERN_C_END 2760 2761 #undef __FUNCT__ 2762 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2763 /*@C 2764 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2765 (the default parallel PETSc format). 2766 2767 Collective on MPI_Comm 2768 2769 Input Parameters: 2770 + A - the matrix 2771 . i - the indices into j for the start of each local row (starts with zero) 2772 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2773 - v - optional values in the matrix 2774 2775 Level: developer 2776 2777 .keywords: matrix, aij, compressed row, sparse, parallel 2778 2779 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2780 @*/ 2781 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2782 { 2783 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2784 2785 PetscFunctionBegin; 2786 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2787 if (f) { 2788 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2789 } 2790 PetscFunctionReturn(0); 2791 } 2792 2793 EXTERN_C_BEGIN 2794 #undef __FUNCT__ 2795 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2796 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2797 { 2798 Mat_MPIBAIJ *b; 2799 PetscErrorCode ierr; 2800 PetscInt i, newbs = PetscAbs(bs); 2801 2802 PetscFunctionBegin; 2803 if (bs < 0) { 2804 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2805 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2806 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2807 bs = PetscAbs(bs); 2808 } 2809 if ((d_nnz || o_nnz) && newbs != bs) { 2810 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 2811 } 2812 bs = newbs; 2813 2814 2815 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2816 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2817 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2818 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2819 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2820 2821 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2822 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2823 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2824 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2825 2826 if (d_nnz) { 2827 for (i=0; i<B->rmap->n/bs; i++) { 2828 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2829 } 2830 } 2831 if (o_nnz) { 2832 for (i=0; i<B->rmap->n/bs; i++) { 2833 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2834 } 2835 } 2836 2837 b = (Mat_MPIBAIJ*)B->data; 2838 b->bs2 = bs*bs; 2839 b->mbs = B->rmap->n/bs; 2840 b->nbs = B->cmap->n/bs; 2841 b->Mbs = B->rmap->N/bs; 2842 b->Nbs = B->cmap->N/bs; 2843 2844 for (i=0; i<=b->size; i++) { 2845 b->rangebs[i] = B->rmap->range[i]/bs; 2846 } 2847 b->rstartbs = B->rmap->rstart/bs; 2848 b->rendbs = B->rmap->rend/bs; 2849 b->cstartbs = B->cmap->rstart/bs; 2850 b->cendbs = B->cmap->rend/bs; 2851 2852 if (!B->preallocated) { 2853 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2854 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2855 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2856 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2857 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2858 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2859 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2860 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2861 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 2862 } 2863 2864 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2865 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2866 B->preallocated = PETSC_TRUE; 2867 PetscFunctionReturn(0); 2868 } 2869 EXTERN_C_END 2870 2871 EXTERN_C_BEGIN 2872 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2873 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2874 EXTERN_C_END 2875 2876 2877 EXTERN_C_BEGIN 2878 #undef __FUNCT__ 2879 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 2880 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj) 2881 { 2882 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2883 PetscErrorCode ierr; 2884 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2885 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2886 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2887 2888 PetscFunctionBegin; 2889 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 2890 ii[0] = 0; 2891 CHKMEMQ; 2892 for (i=0; i<M; i++) { 2893 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2894 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2895 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2896 /* remove one from count of matrix has diagonal */ 2897 for (j=id[i]; j<id[i+1]; j++) { 2898 if (jd[j] == i) {ii[i+1]--;break;} 2899 } 2900 CHKMEMQ; 2901 } 2902 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2903 cnt = 0; 2904 for (i=0; i<M; i++) { 2905 for (j=io[i]; j<io[i+1]; j++) { 2906 if (garray[jo[j]] > rstart) break; 2907 jj[cnt++] = garray[jo[j]]; 2908 CHKMEMQ; 2909 } 2910 for (k=id[i]; k<id[i+1]; k++) { 2911 if (jd[k] != i) { 2912 jj[cnt++] = rstart + jd[k]; 2913 CHKMEMQ; 2914 } 2915 } 2916 for (;j<io[i+1]; j++) { 2917 jj[cnt++] = garray[jo[j]]; 2918 CHKMEMQ; 2919 } 2920 } 2921 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 2922 PetscFunctionReturn(0); 2923 } 2924 EXTERN_C_END 2925 2926 /*MC 2927 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2928 2929 Options Database Keys: 2930 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2931 . -mat_block_size <bs> - set the blocksize used to store the matrix 2932 - -mat_use_hash_table <fact> 2933 2934 Level: beginner 2935 2936 .seealso: MatCreateMPIBAIJ 2937 M*/ 2938 2939 EXTERN_C_BEGIN 2940 #undef __FUNCT__ 2941 #define __FUNCT__ "MatCreate_MPIBAIJ" 2942 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2943 { 2944 Mat_MPIBAIJ *b; 2945 PetscErrorCode ierr; 2946 PetscTruth flg; 2947 2948 PetscFunctionBegin; 2949 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2950 B->data = (void*)b; 2951 2952 2953 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2954 B->mapping = 0; 2955 B->assembled = PETSC_FALSE; 2956 2957 B->insertmode = NOT_SET_VALUES; 2958 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 2959 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 2960 2961 /* build local table of row and column ownerships */ 2962 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 2963 2964 /* build cache for off array entries formed */ 2965 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 2966 b->donotstash = PETSC_FALSE; 2967 b->colmap = PETSC_NULL; 2968 b->garray = PETSC_NULL; 2969 b->roworiented = PETSC_TRUE; 2970 2971 /* stuff used in block assembly */ 2972 b->barray = 0; 2973 2974 /* stuff used for matrix vector multiply */ 2975 b->lvec = 0; 2976 b->Mvctx = 0; 2977 2978 /* stuff for MatGetRow() */ 2979 b->rowindices = 0; 2980 b->rowvalues = 0; 2981 b->getrowactive = PETSC_FALSE; 2982 2983 /* hash table stuff */ 2984 b->ht = 0; 2985 b->hd = 0; 2986 b->ht_size = 0; 2987 b->ht_flag = PETSC_FALSE; 2988 b->ht_fact = 0; 2989 b->ht_total_ct = 0; 2990 b->ht_insert_ct = 0; 2991 2992 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 2993 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2994 if (flg) { 2995 PetscReal fact = 1.39; 2996 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2997 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 2998 if (fact <= 1.0) fact = 1.39; 2999 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3000 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3001 } 3002 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3003 3004 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3005 "MatConvert_MPIBAIJ_MPIAdj", 3006 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3007 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3008 "MatStoreValues_MPIBAIJ", 3009 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3010 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3011 "MatRetrieveValues_MPIBAIJ", 3012 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3013 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3014 "MatGetDiagonalBlock_MPIBAIJ", 3015 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3016 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3017 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3018 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3019 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3020 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3021 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3022 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3023 "MatDiagonalScaleLocal_MPIBAIJ", 3024 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3025 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3026 "MatSetHashTableFactor_MPIBAIJ", 3027 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3028 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3029 PetscFunctionReturn(0); 3030 } 3031 EXTERN_C_END 3032 3033 /*MC 3034 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3035 3036 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3037 and MATMPIBAIJ otherwise. 3038 3039 Options Database Keys: 3040 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3041 3042 Level: beginner 3043 3044 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3045 M*/ 3046 3047 EXTERN_C_BEGIN 3048 #undef __FUNCT__ 3049 #define __FUNCT__ "MatCreate_BAIJ" 3050 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 3051 { 3052 PetscErrorCode ierr; 3053 PetscMPIInt size; 3054 3055 PetscFunctionBegin; 3056 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3057 if (size == 1) { 3058 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 3059 } else { 3060 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 3061 } 3062 PetscFunctionReturn(0); 3063 } 3064 EXTERN_C_END 3065 3066 #undef __FUNCT__ 3067 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3068 /*@C 3069 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3070 (block compressed row). For good matrix assembly performance 3071 the user should preallocate the matrix storage by setting the parameters 3072 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3073 performance can be increased by more than a factor of 50. 3074 3075 Collective on Mat 3076 3077 Input Parameters: 3078 + A - the matrix 3079 . bs - size of blockk 3080 . d_nz - number of block nonzeros per block row in diagonal portion of local 3081 submatrix (same for all local rows) 3082 . d_nnz - array containing the number of block nonzeros in the various block rows 3083 of the in diagonal portion of the local (possibly different for each block 3084 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3085 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3086 submatrix (same for all local rows). 3087 - o_nnz - array containing the number of nonzeros in the various block rows of the 3088 off-diagonal portion of the local submatrix (possibly different for 3089 each block row) or PETSC_NULL. 3090 3091 If the *_nnz parameter is given then the *_nz parameter is ignored 3092 3093 Options Database Keys: 3094 + -mat_block_size - size of the blocks to use 3095 - -mat_use_hash_table <fact> 3096 3097 Notes: 3098 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3099 than it must be used on all processors that share the object for that argument. 3100 3101 Storage Information: 3102 For a square global matrix we define each processor's diagonal portion 3103 to be its local rows and the corresponding columns (a square submatrix); 3104 each processor's off-diagonal portion encompasses the remainder of the 3105 local matrix (a rectangular submatrix). 3106 3107 The user can specify preallocated storage for the diagonal part of 3108 the local submatrix with either d_nz or d_nnz (not both). Set 3109 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3110 memory allocation. Likewise, specify preallocated storage for the 3111 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3112 3113 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3114 the figure below we depict these three local rows and all columns (0-11). 3115 3116 .vb 3117 0 1 2 3 4 5 6 7 8 9 10 11 3118 ------------------- 3119 row 3 | o o o d d d o o o o o o 3120 row 4 | o o o d d d o o o o o o 3121 row 5 | o o o d d d o o o o o o 3122 ------------------- 3123 .ve 3124 3125 Thus, any entries in the d locations are stored in the d (diagonal) 3126 submatrix, and any entries in the o locations are stored in the 3127 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3128 stored simply in the MATSEQBAIJ format for compressed row storage. 3129 3130 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3131 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3132 In general, for PDE problems in which most nonzeros are near the diagonal, 3133 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3134 or you will get TERRIBLE performance; see the users' manual chapter on 3135 matrices. 3136 3137 You can call MatGetInfo() to get information on how effective the preallocation was; 3138 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3139 You can also run with the option -info and look for messages with the string 3140 malloc in them to see if additional memory allocation was needed. 3141 3142 Level: intermediate 3143 3144 .keywords: matrix, block, aij, compressed row, sparse, parallel 3145 3146 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 3147 @*/ 3148 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3149 { 3150 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3151 3152 PetscFunctionBegin; 3153 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3154 if (f) { 3155 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3156 } 3157 PetscFunctionReturn(0); 3158 } 3159 3160 #undef __FUNCT__ 3161 #define __FUNCT__ "MatCreateMPIBAIJ" 3162 /*@C 3163 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 3164 (block compressed row). For good matrix assembly performance 3165 the user should preallocate the matrix storage by setting the parameters 3166 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3167 performance can be increased by more than a factor of 50. 3168 3169 Collective on MPI_Comm 3170 3171 Input Parameters: 3172 + comm - MPI communicator 3173 . bs - size of blockk 3174 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3175 This value should be the same as the local size used in creating the 3176 y vector for the matrix-vector product y = Ax. 3177 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3178 This value should be the same as the local size used in creating the 3179 x vector for the matrix-vector product y = Ax. 3180 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3181 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3182 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3183 submatrix (same for all local rows) 3184 . d_nnz - array containing the number of nonzero blocks in the various block rows 3185 of the in diagonal portion of the local (possibly different for each block 3186 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3187 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3188 submatrix (same for all local rows). 3189 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3190 off-diagonal portion of the local submatrix (possibly different for 3191 each block row) or PETSC_NULL. 3192 3193 Output Parameter: 3194 . A - the matrix 3195 3196 Options Database Keys: 3197 + -mat_block_size - size of the blocks to use 3198 - -mat_use_hash_table <fact> 3199 3200 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3201 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3202 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3203 3204 Notes: 3205 If the *_nnz parameter is given then the *_nz parameter is ignored 3206 3207 A nonzero block is any block that as 1 or more nonzeros in it 3208 3209 The user MUST specify either the local or global matrix dimensions 3210 (possibly both). 3211 3212 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3213 than it must be used on all processors that share the object for that argument. 3214 3215 Storage Information: 3216 For a square global matrix we define each processor's diagonal portion 3217 to be its local rows and the corresponding columns (a square submatrix); 3218 each processor's off-diagonal portion encompasses the remainder of the 3219 local matrix (a rectangular submatrix). 3220 3221 The user can specify preallocated storage for the diagonal part of 3222 the local submatrix with either d_nz or d_nnz (not both). Set 3223 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3224 memory allocation. Likewise, specify preallocated storage for the 3225 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3226 3227 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3228 the figure below we depict these three local rows and all columns (0-11). 3229 3230 .vb 3231 0 1 2 3 4 5 6 7 8 9 10 11 3232 ------------------- 3233 row 3 | o o o d d d o o o o o o 3234 row 4 | o o o d d d o o o o o o 3235 row 5 | o o o d d d o o o o o o 3236 ------------------- 3237 .ve 3238 3239 Thus, any entries in the d locations are stored in the d (diagonal) 3240 submatrix, and any entries in the o locations are stored in the 3241 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3242 stored simply in the MATSEQBAIJ format for compressed row storage. 3243 3244 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3245 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3246 In general, for PDE problems in which most nonzeros are near the diagonal, 3247 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3248 or you will get TERRIBLE performance; see the users' manual chapter on 3249 matrices. 3250 3251 Level: intermediate 3252 3253 .keywords: matrix, block, aij, compressed row, sparse, parallel 3254 3255 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3256 @*/ 3257 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3258 { 3259 PetscErrorCode ierr; 3260 PetscMPIInt size; 3261 3262 PetscFunctionBegin; 3263 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3264 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3265 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3266 if (size > 1) { 3267 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3268 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3269 } else { 3270 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3271 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3272 } 3273 PetscFunctionReturn(0); 3274 } 3275 3276 #undef __FUNCT__ 3277 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3278 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3279 { 3280 Mat mat; 3281 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3282 PetscErrorCode ierr; 3283 PetscInt len=0; 3284 3285 PetscFunctionBegin; 3286 *newmat = 0; 3287 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3288 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3289 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3290 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3291 3292 mat->factor = matin->factor; 3293 mat->preallocated = PETSC_TRUE; 3294 mat->assembled = PETSC_TRUE; 3295 mat->insertmode = NOT_SET_VALUES; 3296 3297 a = (Mat_MPIBAIJ*)mat->data; 3298 mat->rmap->bs = matin->rmap->bs; 3299 a->bs2 = oldmat->bs2; 3300 a->mbs = oldmat->mbs; 3301 a->nbs = oldmat->nbs; 3302 a->Mbs = oldmat->Mbs; 3303 a->Nbs = oldmat->Nbs; 3304 3305 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3306 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3307 3308 a->size = oldmat->size; 3309 a->rank = oldmat->rank; 3310 a->donotstash = oldmat->donotstash; 3311 a->roworiented = oldmat->roworiented; 3312 a->rowindices = 0; 3313 a->rowvalues = 0; 3314 a->getrowactive = PETSC_FALSE; 3315 a->barray = 0; 3316 a->rstartbs = oldmat->rstartbs; 3317 a->rendbs = oldmat->rendbs; 3318 a->cstartbs = oldmat->cstartbs; 3319 a->cendbs = oldmat->cendbs; 3320 3321 /* hash table stuff */ 3322 a->ht = 0; 3323 a->hd = 0; 3324 a->ht_size = 0; 3325 a->ht_flag = oldmat->ht_flag; 3326 a->ht_fact = oldmat->ht_fact; 3327 a->ht_total_ct = 0; 3328 a->ht_insert_ct = 0; 3329 3330 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3331 if (oldmat->colmap) { 3332 #if defined (PETSC_USE_CTABLE) 3333 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3334 #else 3335 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3336 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3337 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3338 #endif 3339 } else a->colmap = 0; 3340 3341 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3342 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3343 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3344 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3345 } else a->garray = 0; 3346 3347 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3348 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3349 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3350 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3351 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3352 3353 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3354 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3355 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3356 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3357 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3358 *newmat = mat; 3359 3360 PetscFunctionReturn(0); 3361 } 3362 3363 #undef __FUNCT__ 3364 #define __FUNCT__ "MatLoad_MPIBAIJ" 3365 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 3366 { 3367 Mat A; 3368 PetscErrorCode ierr; 3369 int fd; 3370 PetscInt i,nz,j,rstart,rend; 3371 PetscScalar *vals,*buf; 3372 MPI_Comm comm = ((PetscObject)viewer)->comm; 3373 MPI_Status status; 3374 PetscMPIInt rank,size,maxnz; 3375 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3376 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3377 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3378 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3379 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3380 PetscInt dcount,kmax,k,nzcount,tmp,mend; 3381 3382 PetscFunctionBegin; 3383 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3384 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3385 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3386 3387 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3388 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3389 if (!rank) { 3390 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3391 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3392 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3393 } 3394 3395 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3396 M = header[1]; N = header[2]; 3397 3398 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 3399 3400 /* 3401 This code adds extra rows to make sure the number of rows is 3402 divisible by the blocksize 3403 */ 3404 Mbs = M/bs; 3405 extra_rows = bs - M + bs*Mbs; 3406 if (extra_rows == bs) extra_rows = 0; 3407 else Mbs++; 3408 if (extra_rows && !rank) { 3409 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3410 } 3411 3412 /* determine ownership of all rows */ 3413 mbs = Mbs/size + ((Mbs % size) > rank); 3414 m = mbs*bs; 3415 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3416 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3417 3418 /* process 0 needs enough room for process with most rows */ 3419 if (!rank) { 3420 mmax = rowners[1]; 3421 for (i=2; i<size; i++) { 3422 mmax = PetscMax(mmax,rowners[i]); 3423 } 3424 mmax*=bs; 3425 } else mmax = m; 3426 3427 rowners[0] = 0; 3428 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3429 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3430 rstart = rowners[rank]; 3431 rend = rowners[rank+1]; 3432 3433 /* distribute row lengths to all processors */ 3434 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3435 if (!rank) { 3436 mend = m; 3437 if (size == 1) mend = mend - extra_rows; 3438 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3439 for (j=mend; j<m; j++) locrowlens[j] = 1; 3440 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3441 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3442 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3443 for (j=0; j<m; j++) { 3444 procsnz[0] += locrowlens[j]; 3445 } 3446 for (i=1; i<size; i++) { 3447 mend = browners[i+1] - browners[i]; 3448 if (i == size-1) mend = mend - extra_rows; 3449 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3450 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3451 /* calculate the number of nonzeros on each processor */ 3452 for (j=0; j<browners[i+1]-browners[i]; j++) { 3453 procsnz[i] += rowlengths[j]; 3454 } 3455 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3456 } 3457 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3458 } else { 3459 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3460 } 3461 3462 if (!rank) { 3463 /* determine max buffer needed and allocate it */ 3464 maxnz = procsnz[0]; 3465 for (i=1; i<size; i++) { 3466 maxnz = PetscMax(maxnz,procsnz[i]); 3467 } 3468 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3469 3470 /* read in my part of the matrix column indices */ 3471 nz = procsnz[0]; 3472 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3473 mycols = ibuf; 3474 if (size == 1) nz -= extra_rows; 3475 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3476 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3477 3478 /* read in every ones (except the last) and ship off */ 3479 for (i=1; i<size-1; i++) { 3480 nz = procsnz[i]; 3481 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3482 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3483 } 3484 /* read in the stuff for the last proc */ 3485 if (size != 1) { 3486 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3487 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3488 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3489 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3490 } 3491 ierr = PetscFree(cols);CHKERRQ(ierr); 3492 } else { 3493 /* determine buffer space needed for message */ 3494 nz = 0; 3495 for (i=0; i<m; i++) { 3496 nz += locrowlens[i]; 3497 } 3498 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3499 mycols = ibuf; 3500 /* receive message of column indices*/ 3501 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3502 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3503 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3504 } 3505 3506 /* loop over local rows, determining number of off diagonal entries */ 3507 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3508 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3509 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3510 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3511 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3512 rowcount = 0; nzcount = 0; 3513 for (i=0; i<mbs; i++) { 3514 dcount = 0; 3515 odcount = 0; 3516 for (j=0; j<bs; j++) { 3517 kmax = locrowlens[rowcount]; 3518 for (k=0; k<kmax; k++) { 3519 tmp = mycols[nzcount++]/bs; 3520 if (!mask[tmp]) { 3521 mask[tmp] = 1; 3522 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3523 else masked1[dcount++] = tmp; 3524 } 3525 } 3526 rowcount++; 3527 } 3528 3529 dlens[i] = dcount; 3530 odlens[i] = odcount; 3531 3532 /* zero out the mask elements we set */ 3533 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3534 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3535 } 3536 3537 /* create our matrix */ 3538 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 3539 ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3540 ierr = MatSetType(A,type);CHKERRQ(ierr) 3541 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3542 3543 if (!rank) { 3544 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3545 /* read in my part of the matrix numerical values */ 3546 nz = procsnz[0]; 3547 vals = buf; 3548 mycols = ibuf; 3549 if (size == 1) nz -= extra_rows; 3550 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3551 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3552 3553 /* insert into matrix */ 3554 jj = rstart*bs; 3555 for (i=0; i<m; i++) { 3556 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3557 mycols += locrowlens[i]; 3558 vals += locrowlens[i]; 3559 jj++; 3560 } 3561 /* read in other processors (except the last one) and ship out */ 3562 for (i=1; i<size-1; i++) { 3563 nz = procsnz[i]; 3564 vals = buf; 3565 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3566 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3567 } 3568 /* the last proc */ 3569 if (size != 1){ 3570 nz = procsnz[i] - extra_rows; 3571 vals = buf; 3572 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3573 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3574 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3575 } 3576 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3577 } else { 3578 /* receive numeric values */ 3579 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3580 3581 /* receive message of values*/ 3582 vals = buf; 3583 mycols = ibuf; 3584 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 3585 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3586 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3587 3588 /* insert into matrix */ 3589 jj = rstart*bs; 3590 for (i=0; i<m; i++) { 3591 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3592 mycols += locrowlens[i]; 3593 vals += locrowlens[i]; 3594 jj++; 3595 } 3596 } 3597 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3598 ierr = PetscFree(buf);CHKERRQ(ierr); 3599 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3600 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3601 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3602 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3603 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3604 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3605 3606 *newmat = A; 3607 PetscFunctionReturn(0); 3608 } 3609 3610 #undef __FUNCT__ 3611 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3612 /*@ 3613 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3614 3615 Input Parameters: 3616 . mat - the matrix 3617 . fact - factor 3618 3619 Collective on Mat 3620 3621 Level: advanced 3622 3623 Notes: 3624 This can also be set by the command line option: -mat_use_hash_table <fact> 3625 3626 .keywords: matrix, hashtable, factor, HT 3627 3628 .seealso: MatSetOption() 3629 @*/ 3630 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3631 { 3632 PetscErrorCode ierr,(*f)(Mat,PetscReal); 3633 3634 PetscFunctionBegin; 3635 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 3636 if (f) { 3637 ierr = (*f)(mat,fact);CHKERRQ(ierr); 3638 } 3639 PetscFunctionReturn(0); 3640 } 3641 3642 EXTERN_C_BEGIN 3643 #undef __FUNCT__ 3644 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3645 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3646 { 3647 Mat_MPIBAIJ *baij; 3648 3649 PetscFunctionBegin; 3650 baij = (Mat_MPIBAIJ*)mat->data; 3651 baij->ht_fact = fact; 3652 PetscFunctionReturn(0); 3653 } 3654 EXTERN_C_END 3655 3656 #undef __FUNCT__ 3657 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3658 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3659 { 3660 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3661 PetscFunctionBegin; 3662 *Ad = a->A; 3663 *Ao = a->B; 3664 *colmap = a->garray; 3665 PetscFunctionReturn(0); 3666 } 3667 3668 /* 3669 Special version for direct calls from Fortran (to eliminate two function call overheads 3670 */ 3671 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3672 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3673 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3674 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3675 #endif 3676 3677 #undef __FUNCT__ 3678 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3679 /*@C 3680 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3681 3682 Collective on Mat 3683 3684 Input Parameters: 3685 + mat - the matrix 3686 . min - number of input rows 3687 . im - input rows 3688 . nin - number of input columns 3689 . in - input columns 3690 . v - numerical values input 3691 - addvin - INSERT_VALUES or ADD_VALUES 3692 3693 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3694 3695 Level: advanced 3696 3697 .seealso: MatSetValuesBlocked() 3698 @*/ 3699 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3700 { 3701 /* convert input arguments to C version */ 3702 Mat mat = *matin; 3703 PetscInt m = *min, n = *nin; 3704 InsertMode addv = *addvin; 3705 3706 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3707 const MatScalar *value; 3708 MatScalar *barray=baij->barray; 3709 PetscTruth roworiented = baij->roworiented; 3710 PetscErrorCode ierr; 3711 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3712 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3713 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3714 3715 PetscFunctionBegin; 3716 /* tasks normally handled by MatSetValuesBlocked() */ 3717 if (mat->insertmode == NOT_SET_VALUES) { 3718 mat->insertmode = addv; 3719 } 3720 #if defined(PETSC_USE_DEBUG) 3721 else if (mat->insertmode != addv) { 3722 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3723 } 3724 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3725 #endif 3726 if (mat->assembled) { 3727 mat->was_assembled = PETSC_TRUE; 3728 mat->assembled = PETSC_FALSE; 3729 } 3730 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3731 3732 3733 if(!barray) { 3734 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3735 baij->barray = barray; 3736 } 3737 3738 if (roworiented) { 3739 stepval = (n-1)*bs; 3740 } else { 3741 stepval = (m-1)*bs; 3742 } 3743 for (i=0; i<m; i++) { 3744 if (im[i] < 0) continue; 3745 #if defined(PETSC_USE_DEBUG) 3746 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3747 #endif 3748 if (im[i] >= rstart && im[i] < rend) { 3749 row = im[i] - rstart; 3750 for (j=0; j<n; j++) { 3751 /* If NumCol = 1 then a copy is not required */ 3752 if ((roworiented) && (n == 1)) { 3753 barray = (MatScalar*)v + i*bs2; 3754 } else if((!roworiented) && (m == 1)) { 3755 barray = (MatScalar*)v + j*bs2; 3756 } else { /* Here a copy is required */ 3757 if (roworiented) { 3758 value = v + i*(stepval+bs)*bs + j*bs; 3759 } else { 3760 value = v + j*(stepval+bs)*bs + i*bs; 3761 } 3762 for (ii=0; ii<bs; ii++,value+=stepval) { 3763 for (jj=0; jj<bs; jj++) { 3764 *barray++ = *value++; 3765 } 3766 } 3767 barray -=bs2; 3768 } 3769 3770 if (in[j] >= cstart && in[j] < cend){ 3771 col = in[j] - cstart; 3772 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3773 } 3774 else if (in[j] < 0) continue; 3775 #if defined(PETSC_USE_DEBUG) 3776 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 3777 #endif 3778 else { 3779 if (mat->was_assembled) { 3780 if (!baij->colmap) { 3781 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3782 } 3783 3784 #if defined(PETSC_USE_DEBUG) 3785 #if defined (PETSC_USE_CTABLE) 3786 { PetscInt data; 3787 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3788 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 3789 } 3790 #else 3791 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 3792 #endif 3793 #endif 3794 #if defined (PETSC_USE_CTABLE) 3795 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3796 col = (col - 1)/bs; 3797 #else 3798 col = (baij->colmap[in[j]] - 1)/bs; 3799 #endif 3800 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3801 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3802 col = in[j]; 3803 } 3804 } 3805 else col = in[j]; 3806 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3807 } 3808 } 3809 } else { 3810 if (!baij->donotstash) { 3811 if (roworiented) { 3812 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3813 } else { 3814 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3815 } 3816 } 3817 } 3818 } 3819 3820 /* task normally handled by MatSetValuesBlocked() */ 3821 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3822 PetscFunctionReturn(0); 3823 } 3824