1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 15 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 18 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 19 { 20 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt i,*idxb = 0; 23 PetscScalar *va,*vb; 24 Vec vtmp; 25 26 PetscFunctionBegin; 27 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 28 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 29 if (idx) { 30 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 31 } 32 33 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 34 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 35 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 36 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 37 38 for (i=0; i<A->rmap->n; i++){ 39 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 40 } 41 42 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 43 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 44 if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);} 45 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 46 PetscFunctionReturn(0); 47 } 48 49 EXTERN_C_BEGIN 50 #undef __FUNCT__ 51 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 52 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 53 { 54 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 55 PetscErrorCode ierr; 56 57 PetscFunctionBegin; 58 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 59 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 60 PetscFunctionReturn(0); 61 } 62 EXTERN_C_END 63 64 EXTERN_C_BEGIN 65 #undef __FUNCT__ 66 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 67 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 68 { 69 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 70 PetscErrorCode ierr; 71 72 PetscFunctionBegin; 73 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 74 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 75 PetscFunctionReturn(0); 76 } 77 EXTERN_C_END 78 79 /* 80 Local utility routine that creates a mapping from the global column 81 number to the local number in the off-diagonal part of the local 82 storage of the matrix. This is done in a non scalable way since the 83 length of colmap equals the global matrix length. 84 */ 85 #undef __FUNCT__ 86 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 87 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 88 { 89 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 90 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 91 PetscErrorCode ierr; 92 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 93 94 PetscFunctionBegin; 95 #if defined (PETSC_USE_CTABLE) 96 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 97 for (i=0; i<nbs; i++){ 98 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 99 } 100 #else 101 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 102 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 103 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 105 #endif 106 PetscFunctionReturn(0); 107 } 108 109 #define CHUNKSIZE 10 110 111 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 112 { \ 113 \ 114 brow = row/bs; \ 115 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 116 rmax = aimax[brow]; nrow = ailen[brow]; \ 117 bcol = col/bs; \ 118 ridx = row % bs; cidx = col % bs; \ 119 low = 0; high = nrow; \ 120 while (high-low > 3) { \ 121 t = (low+high)/2; \ 122 if (rp[t] > bcol) high = t; \ 123 else low = t; \ 124 } \ 125 for (_i=low; _i<high; _i++) { \ 126 if (rp[_i] > bcol) break; \ 127 if (rp[_i] == bcol) { \ 128 bap = ap + bs2*_i + bs*cidx + ridx; \ 129 if (addv == ADD_VALUES) *bap += value; \ 130 else *bap = value; \ 131 goto a_noinsert; \ 132 } \ 133 } \ 134 if (a->nonew == 1) goto a_noinsert; \ 135 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 136 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 137 N = nrow++ - 1; \ 138 /* shift up all the later entries in this row */ \ 139 for (ii=N; ii>=_i; ii--) { \ 140 rp[ii+1] = rp[ii]; \ 141 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 142 } \ 143 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 144 rp[_i] = bcol; \ 145 ap[bs2*_i + bs*cidx + ridx] = value; \ 146 a_noinsert:; \ 147 ailen[brow] = nrow; \ 148 } 149 150 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 151 { \ 152 brow = row/bs; \ 153 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 154 rmax = bimax[brow]; nrow = bilen[brow]; \ 155 bcol = col/bs; \ 156 ridx = row % bs; cidx = col % bs; \ 157 low = 0; high = nrow; \ 158 while (high-low > 3) { \ 159 t = (low+high)/2; \ 160 if (rp[t] > bcol) high = t; \ 161 else low = t; \ 162 } \ 163 for (_i=low; _i<high; _i++) { \ 164 if (rp[_i] > bcol) break; \ 165 if (rp[_i] == bcol) { \ 166 bap = ap + bs2*_i + bs*cidx + ridx; \ 167 if (addv == ADD_VALUES) *bap += value; \ 168 else *bap = value; \ 169 goto b_noinsert; \ 170 } \ 171 } \ 172 if (b->nonew == 1) goto b_noinsert; \ 173 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 174 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 175 CHKMEMQ;\ 176 N = nrow++ - 1; \ 177 /* shift up all the later entries in this row */ \ 178 for (ii=N; ii>=_i; ii--) { \ 179 rp[ii+1] = rp[ii]; \ 180 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 181 } \ 182 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 183 rp[_i] = bcol; \ 184 ap[bs2*_i + bs*cidx + ridx] = value; \ 185 b_noinsert:; \ 186 bilen[brow] = nrow; \ 187 } 188 189 #undef __FUNCT__ 190 #define __FUNCT__ "MatSetValues_MPIBAIJ" 191 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 192 { 193 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 194 MatScalar value; 195 PetscTruth roworiented = baij->roworiented; 196 PetscErrorCode ierr; 197 PetscInt i,j,row,col; 198 PetscInt rstart_orig=mat->rmap->rstart; 199 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 200 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 201 202 /* Some Variables required in the macro */ 203 Mat A = baij->A; 204 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 205 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 206 MatScalar *aa=a->a; 207 208 Mat B = baij->B; 209 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 210 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 211 MatScalar *ba=b->a; 212 213 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 214 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 215 MatScalar *ap,*bap; 216 217 PetscFunctionBegin; 218 if (v) PetscValidScalarPointer(v,6); 219 for (i=0; i<m; i++) { 220 if (im[i] < 0) continue; 221 #if defined(PETSC_USE_DEBUG) 222 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 223 #endif 224 if (im[i] >= rstart_orig && im[i] < rend_orig) { 225 row = im[i] - rstart_orig; 226 for (j=0; j<n; j++) { 227 if (in[j] >= cstart_orig && in[j] < cend_orig){ 228 col = in[j] - cstart_orig; 229 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 230 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_DEBUG) 234 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1);} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!baij->colmap) { 239 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 243 col = col - 1; 244 #else 245 col = baij->colmap[in[j]/bs] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 248 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 251 B = baij->B; 252 b = (Mat_SeqBAIJ*)(B)->data; 253 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 254 ba=b->a; 255 } else col += in[j]%bs; 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 259 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 260 } 261 } 262 } else { 263 if (!baij->donotstash) { 264 if (roworiented) { 265 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 266 } else { 267 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 268 } 269 } 270 } 271 } 272 PetscFunctionReturn(0); 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 277 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 278 { 279 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 280 const PetscScalar *value; 281 MatScalar *barray=baij->barray; 282 PetscTruth roworiented = baij->roworiented; 283 PetscErrorCode ierr; 284 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 285 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 286 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 287 288 PetscFunctionBegin; 289 if(!barray) { 290 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 291 baij->barray = barray; 292 } 293 294 if (roworiented) { 295 stepval = (n-1)*bs; 296 } else { 297 stepval = (m-1)*bs; 298 } 299 for (i=0; i<m; i++) { 300 if (im[i] < 0) continue; 301 #if defined(PETSC_USE_DEBUG) 302 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 303 #endif 304 if (im[i] >= rstart && im[i] < rend) { 305 row = im[i] - rstart; 306 for (j=0; j<n; j++) { 307 /* If NumCol = 1 then a copy is not required */ 308 if ((roworiented) && (n == 1)) { 309 barray = (MatScalar*)v + i*bs2; 310 } else if((!roworiented) && (m == 1)) { 311 barray = (MatScalar*)v + j*bs2; 312 } else { /* Here a copy is required */ 313 if (roworiented) { 314 value = v + i*(stepval+bs)*bs + j*bs; 315 } else { 316 value = v + j*(stepval+bs)*bs + i*bs; 317 } 318 for (ii=0; ii<bs; ii++,value+=stepval) { 319 for (jj=0; jj<bs; jj++) { 320 *barray++ = *value++; 321 } 322 } 323 barray -=bs2; 324 } 325 326 if (in[j] >= cstart && in[j] < cend){ 327 col = in[j] - cstart; 328 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 329 } 330 else if (in[j] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 333 #endif 334 else { 335 if (mat->was_assembled) { 336 if (!baij->colmap) { 337 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 338 } 339 340 #if defined(PETSC_USE_DEBUG) 341 #if defined (PETSC_USE_CTABLE) 342 { PetscInt data; 343 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 344 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 345 } 346 #else 347 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 348 #endif 349 #endif 350 #if defined (PETSC_USE_CTABLE) 351 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 352 col = (col - 1)/bs; 353 #else 354 col = (baij->colmap[in[j]] - 1)/bs; 355 #endif 356 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 357 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 358 col = in[j]; 359 } 360 } 361 else col = in[j]; 362 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 363 } 364 } 365 } else { 366 if (!baij->donotstash) { 367 if (roworiented) { 368 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 369 } else { 370 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 371 } 372 } 373 } 374 } 375 PetscFunctionReturn(0); 376 } 377 378 #define HASH_KEY 0.6180339887 379 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 380 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 381 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 382 #undef __FUNCT__ 383 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 384 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 385 { 386 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 387 PetscTruth roworiented = baij->roworiented; 388 PetscErrorCode ierr; 389 PetscInt i,j,row,col; 390 PetscInt rstart_orig=mat->rmap->rstart; 391 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 392 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 393 PetscReal tmp; 394 MatScalar **HD = baij->hd,value; 395 #if defined(PETSC_USE_DEBUG) 396 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 397 #endif 398 399 PetscFunctionBegin; 400 if (v) PetscValidScalarPointer(v,6); 401 for (i=0; i<m; i++) { 402 #if defined(PETSC_USE_DEBUG) 403 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 404 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 405 #endif 406 row = im[i]; 407 if (row >= rstart_orig && row < rend_orig) { 408 for (j=0; j<n; j++) { 409 col = in[j]; 410 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 411 /* Look up PetscInto the Hash Table */ 412 key = (row/bs)*Nbs+(col/bs)+1; 413 h1 = HASH(size,key,tmp); 414 415 416 idx = h1; 417 #if defined(PETSC_USE_DEBUG) 418 insert_ct++; 419 total_ct++; 420 if (HT[idx] != key) { 421 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 422 if (idx == size) { 423 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 424 if (idx == h1) { 425 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 426 } 427 } 428 } 429 #else 430 if (HT[idx] != key) { 431 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 432 if (idx == size) { 433 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 434 if (idx == h1) { 435 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 436 } 437 } 438 } 439 #endif 440 /* A HASH table entry is found, so insert the values at the correct address */ 441 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 442 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 443 } 444 } else { 445 if (!baij->donotstash) { 446 if (roworiented) { 447 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 448 } else { 449 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 450 } 451 } 452 } 453 } 454 #if defined(PETSC_USE_DEBUG) 455 baij->ht_total_ct = total_ct; 456 baij->ht_insert_ct = insert_ct; 457 #endif 458 PetscFunctionReturn(0); 459 } 460 461 #undef __FUNCT__ 462 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 463 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 464 { 465 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 466 PetscTruth roworiented = baij->roworiented; 467 PetscErrorCode ierr; 468 PetscInt i,j,ii,jj,row,col; 469 PetscInt rstart=baij->rstartbs; 470 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 471 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 472 PetscReal tmp; 473 MatScalar **HD = baij->hd,*baij_a; 474 const PetscScalar *v_t,*value; 475 #if defined(PETSC_USE_DEBUG) 476 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 477 #endif 478 479 PetscFunctionBegin; 480 481 if (roworiented) { 482 stepval = (n-1)*bs; 483 } else { 484 stepval = (m-1)*bs; 485 } 486 for (i=0; i<m; i++) { 487 #if defined(PETSC_USE_DEBUG) 488 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 489 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 490 #endif 491 row = im[i]; 492 v_t = v + i*nbs2; 493 if (row >= rstart && row < rend) { 494 for (j=0; j<n; j++) { 495 col = in[j]; 496 497 /* Look up into the Hash Table */ 498 key = row*Nbs+col+1; 499 h1 = HASH(size,key,tmp); 500 501 idx = h1; 502 #if defined(PETSC_USE_DEBUG) 503 total_ct++; 504 insert_ct++; 505 if (HT[idx] != key) { 506 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 507 if (idx == size) { 508 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 509 if (idx == h1) { 510 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 511 } 512 } 513 } 514 #else 515 if (HT[idx] != key) { 516 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 517 if (idx == size) { 518 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 519 if (idx == h1) { 520 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 521 } 522 } 523 } 524 #endif 525 baij_a = HD[idx]; 526 if (roworiented) { 527 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 528 /* value = v + (i*(stepval+bs)+j)*bs; */ 529 value = v_t; 530 v_t += bs; 531 if (addv == ADD_VALUES) { 532 for (ii=0; ii<bs; ii++,value+=stepval) { 533 for (jj=ii; jj<bs2; jj+=bs) { 534 baij_a[jj] += *value++; 535 } 536 } 537 } else { 538 for (ii=0; ii<bs; ii++,value+=stepval) { 539 for (jj=ii; jj<bs2; jj+=bs) { 540 baij_a[jj] = *value++; 541 } 542 } 543 } 544 } else { 545 value = v + j*(stepval+bs)*bs + i*bs; 546 if (addv == ADD_VALUES) { 547 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 548 for (jj=0; jj<bs; jj++) { 549 baij_a[jj] += *value++; 550 } 551 } 552 } else { 553 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 554 for (jj=0; jj<bs; jj++) { 555 baij_a[jj] = *value++; 556 } 557 } 558 } 559 } 560 } 561 } else { 562 if (!baij->donotstash) { 563 if (roworiented) { 564 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 565 } else { 566 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 567 } 568 } 569 } 570 } 571 #if defined(PETSC_USE_DEBUG) 572 baij->ht_total_ct = total_ct; 573 baij->ht_insert_ct = insert_ct; 574 #endif 575 PetscFunctionReturn(0); 576 } 577 578 #undef __FUNCT__ 579 #define __FUNCT__ "MatGetValues_MPIBAIJ" 580 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 581 { 582 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 583 PetscErrorCode ierr; 584 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 585 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 586 587 PetscFunctionBegin; 588 for (i=0; i<m; i++) { 589 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 590 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 591 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 592 row = idxm[i] - bsrstart; 593 for (j=0; j<n; j++) { 594 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 595 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 596 if (idxn[j] >= bscstart && idxn[j] < bscend){ 597 col = idxn[j] - bscstart; 598 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 599 } else { 600 if (!baij->colmap) { 601 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 602 } 603 #if defined (PETSC_USE_CTABLE) 604 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 605 data --; 606 #else 607 data = baij->colmap[idxn[j]/bs]-1; 608 #endif 609 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 610 else { 611 col = data + idxn[j]%bs; 612 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 613 } 614 } 615 } 616 } else { 617 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 618 } 619 } 620 PetscFunctionReturn(0); 621 } 622 623 #undef __FUNCT__ 624 #define __FUNCT__ "MatNorm_MPIBAIJ" 625 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 626 { 627 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 628 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 629 PetscErrorCode ierr; 630 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 631 PetscReal sum = 0.0; 632 MatScalar *v; 633 634 PetscFunctionBegin; 635 if (baij->size == 1) { 636 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 637 } else { 638 if (type == NORM_FROBENIUS) { 639 v = amat->a; 640 nz = amat->nz*bs2; 641 for (i=0; i<nz; i++) { 642 #if defined(PETSC_USE_COMPLEX) 643 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 644 #else 645 sum += (*v)*(*v); v++; 646 #endif 647 } 648 v = bmat->a; 649 nz = bmat->nz*bs2; 650 for (i=0; i<nz; i++) { 651 #if defined(PETSC_USE_COMPLEX) 652 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 653 #else 654 sum += (*v)*(*v); v++; 655 #endif 656 } 657 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 658 *nrm = sqrt(*nrm); 659 } else if (type == NORM_1) { /* max column sum */ 660 PetscReal *tmp,*tmp2; 661 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 662 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 663 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 664 v = amat->a; jj = amat->j; 665 for (i=0; i<amat->nz; i++) { 666 for (j=0; j<bs; j++){ 667 col = bs*(cstart + *jj) + j; /* column index */ 668 for (row=0; row<bs; row++){ 669 tmp[col] += PetscAbsScalar(*v); v++; 670 } 671 } 672 jj++; 673 } 674 v = bmat->a; jj = bmat->j; 675 for (i=0; i<bmat->nz; i++) { 676 for (j=0; j<bs; j++){ 677 col = bs*garray[*jj] + j; 678 for (row=0; row<bs; row++){ 679 tmp[col] += PetscAbsScalar(*v); v++; 680 } 681 } 682 jj++; 683 } 684 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 685 *nrm = 0.0; 686 for (j=0; j<mat->cmap->N; j++) { 687 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 688 } 689 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 690 } else if (type == NORM_INFINITY) { /* max row sum */ 691 PetscReal *sums; 692 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr) 693 sum = 0.0; 694 for (j=0; j<amat->mbs; j++) { 695 for (row=0; row<bs; row++) sums[row] = 0.0; 696 v = amat->a + bs2*amat->i[j]; 697 nz = amat->i[j+1]-amat->i[j]; 698 for (i=0; i<nz; i++) { 699 for (col=0; col<bs; col++){ 700 for (row=0; row<bs; row++){ 701 sums[row] += PetscAbsScalar(*v); v++; 702 } 703 } 704 } 705 v = bmat->a + bs2*bmat->i[j]; 706 nz = bmat->i[j+1]-bmat->i[j]; 707 for (i=0; i<nz; i++) { 708 for (col=0; col<bs; col++){ 709 for (row=0; row<bs; row++){ 710 sums[row] += PetscAbsScalar(*v); v++; 711 } 712 } 713 } 714 for (row=0; row<bs; row++){ 715 if (sums[row] > sum) sum = sums[row]; 716 } 717 } 718 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 719 ierr = PetscFree(sums);CHKERRQ(ierr); 720 } else { 721 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 722 } 723 } 724 PetscFunctionReturn(0); 725 } 726 727 /* 728 Creates the hash table, and sets the table 729 This table is created only once. 730 If new entried need to be added to the matrix 731 then the hash table has to be destroyed and 732 recreated. 733 */ 734 #undef __FUNCT__ 735 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 736 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 737 { 738 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 739 Mat A = baij->A,B=baij->B; 740 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 741 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 742 PetscErrorCode ierr; 743 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 744 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 745 PetscInt *HT,key; 746 MatScalar **HD; 747 PetscReal tmp; 748 #if defined(PETSC_USE_INFO) 749 PetscInt ct=0,max=0; 750 #endif 751 752 PetscFunctionBegin; 753 if (baij->ht) PetscFunctionReturn(0); 754 755 baij->ht_size = (PetscInt)(factor*nz); 756 ht_size = baij->ht_size; 757 758 /* Allocate Memory for Hash Table */ 759 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 760 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 761 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 762 HD = baij->hd; 763 HT = baij->ht; 764 765 /* Loop Over A */ 766 for (i=0; i<a->mbs; i++) { 767 for (j=ai[i]; j<ai[i+1]; j++) { 768 row = i+rstart; 769 col = aj[j]+cstart; 770 771 key = row*Nbs + col + 1; 772 h1 = HASH(ht_size,key,tmp); 773 for (k=0; k<ht_size; k++){ 774 if (!HT[(h1+k)%ht_size]) { 775 HT[(h1+k)%ht_size] = key; 776 HD[(h1+k)%ht_size] = a->a + j*bs2; 777 break; 778 #if defined(PETSC_USE_INFO) 779 } else { 780 ct++; 781 #endif 782 } 783 } 784 #if defined(PETSC_USE_INFO) 785 if (k> max) max = k; 786 #endif 787 } 788 } 789 /* Loop Over B */ 790 for (i=0; i<b->mbs; i++) { 791 for (j=bi[i]; j<bi[i+1]; j++) { 792 row = i+rstart; 793 col = garray[bj[j]]; 794 key = row*Nbs + col + 1; 795 h1 = HASH(ht_size,key,tmp); 796 for (k=0; k<ht_size; k++){ 797 if (!HT[(h1+k)%ht_size]) { 798 HT[(h1+k)%ht_size] = key; 799 HD[(h1+k)%ht_size] = b->a + j*bs2; 800 break; 801 #if defined(PETSC_USE_INFO) 802 } else { 803 ct++; 804 #endif 805 } 806 } 807 #if defined(PETSC_USE_INFO) 808 if (k> max) max = k; 809 #endif 810 } 811 } 812 813 /* Print Summary */ 814 #if defined(PETSC_USE_INFO) 815 for (i=0,j=0; i<ht_size; i++) { 816 if (HT[i]) {j++;} 817 } 818 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 819 #endif 820 PetscFunctionReturn(0); 821 } 822 823 #undef __FUNCT__ 824 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 825 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 826 { 827 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 828 PetscErrorCode ierr; 829 PetscInt nstash,reallocs; 830 InsertMode addv; 831 832 PetscFunctionBegin; 833 if (baij->donotstash) { 834 PetscFunctionReturn(0); 835 } 836 837 /* make sure all processors are either in INSERTMODE or ADDMODE */ 838 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 839 if (addv == (ADD_VALUES|INSERT_VALUES)) { 840 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 841 } 842 mat->insertmode = addv; /* in case this processor had no cache */ 843 844 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 845 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 846 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 847 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 848 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 849 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 850 PetscFunctionReturn(0); 851 } 852 853 #undef __FUNCT__ 854 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 855 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 856 { 857 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 858 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 859 PetscErrorCode ierr; 860 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 861 PetscInt *row,*col; 862 PetscTruth r1,r2,r3,other_disassembled; 863 MatScalar *val; 864 InsertMode addv = mat->insertmode; 865 PetscMPIInt n; 866 867 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 868 PetscFunctionBegin; 869 if (!baij->donotstash) { 870 while (1) { 871 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 872 if (!flg) break; 873 874 for (i=0; i<n;) { 875 /* Now identify the consecutive vals belonging to the same row */ 876 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 877 if (j < n) ncols = j-i; 878 else ncols = n-i; 879 /* Now assemble all these values with a single function call */ 880 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 881 i = j; 882 } 883 } 884 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 885 /* Now process the block-stash. Since the values are stashed column-oriented, 886 set the roworiented flag to column oriented, and after MatSetValues() 887 restore the original flags */ 888 r1 = baij->roworiented; 889 r2 = a->roworiented; 890 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 891 baij->roworiented = PETSC_FALSE; 892 a->roworiented = PETSC_FALSE; 893 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 894 while (1) { 895 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 896 if (!flg) break; 897 898 for (i=0; i<n;) { 899 /* Now identify the consecutive vals belonging to the same row */ 900 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 901 if (j < n) ncols = j-i; 902 else ncols = n-i; 903 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 904 i = j; 905 } 906 } 907 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 908 baij->roworiented = r1; 909 a->roworiented = r2; 910 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 911 } 912 913 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 914 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 915 916 /* determine if any processor has disassembled, if so we must 917 also disassemble ourselfs, in order that we may reassemble. */ 918 /* 919 if nonzero structure of submatrix B cannot change then we know that 920 no processor disassembled thus we can skip this stuff 921 */ 922 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 923 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 924 if (mat->was_assembled && !other_disassembled) { 925 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 926 } 927 } 928 929 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 930 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 931 } 932 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 933 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 934 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 935 936 #if defined(PETSC_USE_INFO) 937 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 938 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 939 baij->ht_total_ct = 0; 940 baij->ht_insert_ct = 0; 941 } 942 #endif 943 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 944 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 945 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 946 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 947 } 948 949 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 950 baij->rowvalues = 0; 951 PetscFunctionReturn(0); 952 } 953 954 #undef __FUNCT__ 955 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 956 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 957 { 958 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 959 PetscErrorCode ierr; 960 PetscMPIInt size = baij->size,rank = baij->rank; 961 PetscInt bs = mat->rmap->bs; 962 PetscTruth iascii,isdraw; 963 PetscViewer sviewer; 964 PetscViewerFormat format; 965 966 PetscFunctionBegin; 967 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 968 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 969 if (iascii) { 970 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 971 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 972 MatInfo info; 973 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 974 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 975 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 976 rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 977 mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 978 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 979 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 980 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 981 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 982 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 983 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 984 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 985 PetscFunctionReturn(0); 986 } else if (format == PETSC_VIEWER_ASCII_INFO) { 987 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 988 PetscFunctionReturn(0); 989 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 990 PetscFunctionReturn(0); 991 } 992 } 993 994 if (isdraw) { 995 PetscDraw draw; 996 PetscTruth isnull; 997 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 998 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 999 } 1000 1001 if (size == 1) { 1002 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1003 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1004 } else { 1005 /* assemble the entire matrix onto first processor. */ 1006 Mat A; 1007 Mat_SeqBAIJ *Aloc; 1008 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1009 MatScalar *a; 1010 1011 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1012 /* Perhaps this should be the type of mat? */ 1013 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1014 if (!rank) { 1015 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1016 } else { 1017 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1018 } 1019 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1020 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1021 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1022 1023 /* copy over the A part */ 1024 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1025 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1026 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1027 1028 for (i=0; i<mbs; i++) { 1029 rvals[0] = bs*(baij->rstartbs + i); 1030 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1031 for (j=ai[i]; j<ai[i+1]; j++) { 1032 col = (baij->cstartbs+aj[j])*bs; 1033 for (k=0; k<bs; k++) { 1034 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1035 col++; a += bs; 1036 } 1037 } 1038 } 1039 /* copy over the B part */ 1040 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1041 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1042 for (i=0; i<mbs; i++) { 1043 rvals[0] = bs*(baij->rstartbs + i); 1044 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1045 for (j=ai[i]; j<ai[i+1]; j++) { 1046 col = baij->garray[aj[j]]*bs; 1047 for (k=0; k<bs; k++) { 1048 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1049 col++; a += bs; 1050 } 1051 } 1052 } 1053 ierr = PetscFree(rvals);CHKERRQ(ierr); 1054 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1055 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1056 /* 1057 Everyone has to call to draw the matrix since the graphics waits are 1058 synchronized across all processors that share the PetscDraw object 1059 */ 1060 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1061 if (!rank) { 1062 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1063 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1064 } 1065 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1066 ierr = MatDestroy(A);CHKERRQ(ierr); 1067 } 1068 PetscFunctionReturn(0); 1069 } 1070 1071 #undef __FUNCT__ 1072 #define __FUNCT__ "MatView_MPIBAIJ" 1073 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1074 { 1075 PetscErrorCode ierr; 1076 PetscTruth iascii,isdraw,issocket,isbinary; 1077 1078 PetscFunctionBegin; 1079 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1080 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1081 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1082 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1083 if (iascii || isdraw || issocket || isbinary) { 1084 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1085 } else { 1086 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1087 } 1088 PetscFunctionReturn(0); 1089 } 1090 1091 #undef __FUNCT__ 1092 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1093 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1094 { 1095 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1096 PetscErrorCode ierr; 1097 1098 PetscFunctionBegin; 1099 #if defined(PETSC_USE_LOG) 1100 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1101 #endif 1102 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1103 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1104 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1105 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1106 #if defined (PETSC_USE_CTABLE) 1107 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 1108 #else 1109 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1110 #endif 1111 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1112 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1113 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1114 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1115 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1116 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1117 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1118 ierr = PetscFree(baij);CHKERRQ(ierr); 1119 1120 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1121 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1122 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1123 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1124 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1125 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1126 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1127 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1128 PetscFunctionReturn(0); 1129 } 1130 1131 #undef __FUNCT__ 1132 #define __FUNCT__ "MatMult_MPIBAIJ" 1133 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1134 { 1135 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1136 PetscErrorCode ierr; 1137 PetscInt nt; 1138 1139 PetscFunctionBegin; 1140 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1141 if (nt != A->cmap->n) { 1142 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1143 } 1144 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1145 if (nt != A->rmap->n) { 1146 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1147 } 1148 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1149 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1150 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1151 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1152 PetscFunctionReturn(0); 1153 } 1154 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1157 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1158 { 1159 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1160 PetscErrorCode ierr; 1161 1162 PetscFunctionBegin; 1163 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1164 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1165 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1166 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1167 PetscFunctionReturn(0); 1168 } 1169 1170 #undef __FUNCT__ 1171 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1172 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1173 { 1174 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1175 PetscErrorCode ierr; 1176 PetscTruth merged; 1177 1178 PetscFunctionBegin; 1179 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1180 /* do nondiagonal part */ 1181 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1182 if (!merged) { 1183 /* send it on its way */ 1184 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1185 /* do local part */ 1186 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1187 /* receive remote parts: note this assumes the values are not actually */ 1188 /* inserted in yy until the next line */ 1189 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1190 } else { 1191 /* do local part */ 1192 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1193 /* send it on its way */ 1194 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1195 /* values actually were received in the Begin() but we need to call this nop */ 1196 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1197 } 1198 PetscFunctionReturn(0); 1199 } 1200 1201 #undef __FUNCT__ 1202 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1203 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1204 { 1205 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1206 PetscErrorCode ierr; 1207 1208 PetscFunctionBegin; 1209 /* do nondiagonal part */ 1210 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1211 /* send it on its way */ 1212 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1213 /* do local part */ 1214 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1215 /* receive remote parts: note this assumes the values are not actually */ 1216 /* inserted in yy until the next line, which is true for my implementation*/ 1217 /* but is not perhaps always true. */ 1218 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1219 PetscFunctionReturn(0); 1220 } 1221 1222 /* 1223 This only works correctly for square matrices where the subblock A->A is the 1224 diagonal block 1225 */ 1226 #undef __FUNCT__ 1227 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1228 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1229 { 1230 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1231 PetscErrorCode ierr; 1232 1233 PetscFunctionBegin; 1234 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1235 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1236 PetscFunctionReturn(0); 1237 } 1238 1239 #undef __FUNCT__ 1240 #define __FUNCT__ "MatScale_MPIBAIJ" 1241 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1242 { 1243 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1244 PetscErrorCode ierr; 1245 1246 PetscFunctionBegin; 1247 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1248 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNCT__ 1253 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1254 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1255 { 1256 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1257 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1258 PetscErrorCode ierr; 1259 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1260 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1261 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1262 1263 PetscFunctionBegin; 1264 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1265 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1266 mat->getrowactive = PETSC_TRUE; 1267 1268 if (!mat->rowvalues && (idx || v)) { 1269 /* 1270 allocate enough space to hold information from the longest row. 1271 */ 1272 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1273 PetscInt max = 1,mbs = mat->mbs,tmp; 1274 for (i=0; i<mbs; i++) { 1275 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1276 if (max < tmp) { max = tmp; } 1277 } 1278 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1279 } 1280 lrow = row - brstart; 1281 1282 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1283 if (!v) {pvA = 0; pvB = 0;} 1284 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1285 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1286 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1287 nztot = nzA + nzB; 1288 1289 cmap = mat->garray; 1290 if (v || idx) { 1291 if (nztot) { 1292 /* Sort by increasing column numbers, assuming A and B already sorted */ 1293 PetscInt imark = -1; 1294 if (v) { 1295 *v = v_p = mat->rowvalues; 1296 for (i=0; i<nzB; i++) { 1297 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1298 else break; 1299 } 1300 imark = i; 1301 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1302 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1303 } 1304 if (idx) { 1305 *idx = idx_p = mat->rowindices; 1306 if (imark > -1) { 1307 for (i=0; i<imark; i++) { 1308 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1309 } 1310 } else { 1311 for (i=0; i<nzB; i++) { 1312 if (cmap[cworkB[i]/bs] < cstart) 1313 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1314 else break; 1315 } 1316 imark = i; 1317 } 1318 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1319 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1320 } 1321 } else { 1322 if (idx) *idx = 0; 1323 if (v) *v = 0; 1324 } 1325 } 1326 *nz = nztot; 1327 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1328 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1329 PetscFunctionReturn(0); 1330 } 1331 1332 #undef __FUNCT__ 1333 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1334 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1335 { 1336 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1337 1338 PetscFunctionBegin; 1339 if (!baij->getrowactive) { 1340 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1341 } 1342 baij->getrowactive = PETSC_FALSE; 1343 PetscFunctionReturn(0); 1344 } 1345 1346 #undef __FUNCT__ 1347 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1348 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1349 { 1350 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1351 PetscErrorCode ierr; 1352 1353 PetscFunctionBegin; 1354 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1355 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1356 PetscFunctionReturn(0); 1357 } 1358 1359 #undef __FUNCT__ 1360 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1361 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1362 { 1363 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1364 Mat A = a->A,B = a->B; 1365 PetscErrorCode ierr; 1366 PetscReal isend[5],irecv[5]; 1367 1368 PetscFunctionBegin; 1369 info->block_size = (PetscReal)matin->rmap->bs; 1370 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1371 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1372 isend[3] = info->memory; isend[4] = info->mallocs; 1373 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1374 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1375 isend[3] += info->memory; isend[4] += info->mallocs; 1376 if (flag == MAT_LOCAL) { 1377 info->nz_used = isend[0]; 1378 info->nz_allocated = isend[1]; 1379 info->nz_unneeded = isend[2]; 1380 info->memory = isend[3]; 1381 info->mallocs = isend[4]; 1382 } else if (flag == MAT_GLOBAL_MAX) { 1383 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1384 info->nz_used = irecv[0]; 1385 info->nz_allocated = irecv[1]; 1386 info->nz_unneeded = irecv[2]; 1387 info->memory = irecv[3]; 1388 info->mallocs = irecv[4]; 1389 } else if (flag == MAT_GLOBAL_SUM) { 1390 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1391 info->nz_used = irecv[0]; 1392 info->nz_allocated = irecv[1]; 1393 info->nz_unneeded = irecv[2]; 1394 info->memory = irecv[3]; 1395 info->mallocs = irecv[4]; 1396 } else { 1397 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1398 } 1399 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1400 info->fill_ratio_needed = 0; 1401 info->factor_mallocs = 0; 1402 PetscFunctionReturn(0); 1403 } 1404 1405 #undef __FUNCT__ 1406 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1407 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg) 1408 { 1409 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1410 PetscErrorCode ierr; 1411 1412 PetscFunctionBegin; 1413 switch (op) { 1414 case MAT_NEW_NONZERO_LOCATIONS: 1415 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1416 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1417 case MAT_KEEP_NONZERO_PATTERN: 1418 case MAT_NEW_NONZERO_LOCATION_ERR: 1419 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1420 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1421 break; 1422 case MAT_ROW_ORIENTED: 1423 a->roworiented = flg; 1424 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1425 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1426 break; 1427 case MAT_NEW_DIAGONALS: 1428 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1429 break; 1430 case MAT_IGNORE_OFF_PROC_ENTRIES: 1431 a->donotstash = flg; 1432 break; 1433 case MAT_USE_HASH_TABLE: 1434 a->ht_flag = flg; 1435 break; 1436 case MAT_SYMMETRIC: 1437 case MAT_STRUCTURALLY_SYMMETRIC: 1438 case MAT_HERMITIAN: 1439 case MAT_SYMMETRY_ETERNAL: 1440 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1441 break; 1442 default: 1443 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1444 } 1445 PetscFunctionReturn(0); 1446 } 1447 1448 #undef __FUNCT__ 1449 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1450 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1451 { 1452 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1453 Mat_SeqBAIJ *Aloc; 1454 Mat B; 1455 PetscErrorCode ierr; 1456 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1457 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1458 MatScalar *a; 1459 1460 PetscFunctionBegin; 1461 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1462 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1463 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1464 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1465 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1466 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1467 } else { 1468 B = *matout; 1469 } 1470 1471 /* copy over the A part */ 1472 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1473 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1474 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1475 1476 for (i=0; i<mbs; i++) { 1477 rvals[0] = bs*(baij->rstartbs + i); 1478 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1479 for (j=ai[i]; j<ai[i+1]; j++) { 1480 col = (baij->cstartbs+aj[j])*bs; 1481 for (k=0; k<bs; k++) { 1482 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1483 col++; a += bs; 1484 } 1485 } 1486 } 1487 /* copy over the B part */ 1488 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1489 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1490 for (i=0; i<mbs; i++) { 1491 rvals[0] = bs*(baij->rstartbs + i); 1492 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1493 for (j=ai[i]; j<ai[i+1]; j++) { 1494 col = baij->garray[aj[j]]*bs; 1495 for (k=0; k<bs; k++) { 1496 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1497 col++; a += bs; 1498 } 1499 } 1500 } 1501 ierr = PetscFree(rvals);CHKERRQ(ierr); 1502 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1503 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1504 1505 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1506 *matout = B; 1507 } else { 1508 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1509 } 1510 PetscFunctionReturn(0); 1511 } 1512 1513 #undef __FUNCT__ 1514 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1515 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1516 { 1517 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1518 Mat a = baij->A,b = baij->B; 1519 PetscErrorCode ierr; 1520 PetscInt s1,s2,s3; 1521 1522 PetscFunctionBegin; 1523 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1524 if (rr) { 1525 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1526 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1527 /* Overlap communication with computation. */ 1528 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1529 } 1530 if (ll) { 1531 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1532 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1533 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1534 } 1535 /* scale the diagonal block */ 1536 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1537 1538 if (rr) { 1539 /* Do a scatter end and then right scale the off-diagonal block */ 1540 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1541 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1542 } 1543 1544 PetscFunctionReturn(0); 1545 } 1546 1547 #undef __FUNCT__ 1548 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1549 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 1550 { 1551 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1552 PetscErrorCode ierr; 1553 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1554 PetscInt i,*owners = A->rmap->range; 1555 PetscInt *nprocs,j,idx,nsends,row; 1556 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1557 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1558 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1559 MPI_Comm comm = ((PetscObject)A)->comm; 1560 MPI_Request *send_waits,*recv_waits; 1561 MPI_Status recv_status,*send_status; 1562 #if defined(PETSC_DEBUG) 1563 PetscTruth found = PETSC_FALSE; 1564 #endif 1565 1566 PetscFunctionBegin; 1567 /* first count number of contributors to each processor */ 1568 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1569 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1570 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1571 j = 0; 1572 for (i=0; i<N; i++) { 1573 if (lastidx > (idx = rows[i])) j = 0; 1574 lastidx = idx; 1575 for (; j<size; j++) { 1576 if (idx >= owners[j] && idx < owners[j+1]) { 1577 nprocs[2*j]++; 1578 nprocs[2*j+1] = 1; 1579 owner[i] = j; 1580 #if defined(PETSC_DEBUG) 1581 found = PETSC_TRUE; 1582 #endif 1583 break; 1584 } 1585 } 1586 #if defined(PETSC_DEBUG) 1587 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1588 found = PETSC_FALSE; 1589 #endif 1590 } 1591 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1592 1593 /* inform other processors of number of messages and max length*/ 1594 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1595 1596 /* post receives: */ 1597 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1598 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1599 for (i=0; i<nrecvs; i++) { 1600 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1601 } 1602 1603 /* do sends: 1604 1) starts[i] gives the starting index in svalues for stuff going to 1605 the ith processor 1606 */ 1607 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1608 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1609 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1610 starts[0] = 0; 1611 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1612 for (i=0; i<N; i++) { 1613 svalues[starts[owner[i]]++] = rows[i]; 1614 } 1615 1616 starts[0] = 0; 1617 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1618 count = 0; 1619 for (i=0; i<size; i++) { 1620 if (nprocs[2*i+1]) { 1621 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1622 } 1623 } 1624 ierr = PetscFree(starts);CHKERRQ(ierr); 1625 1626 base = owners[rank]; 1627 1628 /* wait on receives */ 1629 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1630 count = nrecvs; 1631 slen = 0; 1632 while (count) { 1633 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1634 /* unpack receives into our local space */ 1635 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1636 source[imdex] = recv_status.MPI_SOURCE; 1637 lens[imdex] = n; 1638 slen += n; 1639 count--; 1640 } 1641 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1642 1643 /* move the data into the send scatter */ 1644 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1645 count = 0; 1646 for (i=0; i<nrecvs; i++) { 1647 values = rvalues + i*nmax; 1648 for (j=0; j<lens[i]; j++) { 1649 lrows[count++] = values[j] - base; 1650 } 1651 } 1652 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1653 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1654 ierr = PetscFree(owner);CHKERRQ(ierr); 1655 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1656 1657 /* actually zap the local rows */ 1658 /* 1659 Zero the required rows. If the "diagonal block" of the matrix 1660 is square and the user wishes to set the diagonal we use separate 1661 code so that MatSetValues() is not called for each diagonal allocating 1662 new memory, thus calling lots of mallocs and slowing things down. 1663 1664 */ 1665 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1666 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1667 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1668 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1669 } else if (diag != 0.0) { 1670 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1671 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1672 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1673 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1674 } 1675 for (i=0; i<slen; i++) { 1676 row = lrows[i] + rstart_bs; 1677 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1678 } 1679 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1680 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1681 } else { 1682 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1683 } 1684 1685 ierr = PetscFree(lrows);CHKERRQ(ierr); 1686 1687 /* wait on sends */ 1688 if (nsends) { 1689 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1690 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1691 ierr = PetscFree(send_status);CHKERRQ(ierr); 1692 } 1693 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1694 ierr = PetscFree(svalues);CHKERRQ(ierr); 1695 1696 PetscFunctionReturn(0); 1697 } 1698 1699 #undef __FUNCT__ 1700 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1701 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1702 { 1703 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1704 PetscErrorCode ierr; 1705 1706 PetscFunctionBegin; 1707 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1708 PetscFunctionReturn(0); 1709 } 1710 1711 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1712 1713 #undef __FUNCT__ 1714 #define __FUNCT__ "MatEqual_MPIBAIJ" 1715 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1716 { 1717 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1718 Mat a,b,c,d; 1719 PetscTruth flg; 1720 PetscErrorCode ierr; 1721 1722 PetscFunctionBegin; 1723 a = matA->A; b = matA->B; 1724 c = matB->A; d = matB->B; 1725 1726 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1727 if (flg) { 1728 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1729 } 1730 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1731 PetscFunctionReturn(0); 1732 } 1733 1734 #undef __FUNCT__ 1735 #define __FUNCT__ "MatCopy_MPIBAIJ" 1736 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1737 { 1738 PetscErrorCode ierr; 1739 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1740 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1741 1742 PetscFunctionBegin; 1743 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1744 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1745 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1746 } else { 1747 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1748 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1749 } 1750 PetscFunctionReturn(0); 1751 } 1752 1753 #undef __FUNCT__ 1754 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1755 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1756 { 1757 PetscErrorCode ierr; 1758 1759 PetscFunctionBegin; 1760 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1761 PetscFunctionReturn(0); 1762 } 1763 1764 #include "petscblaslapack.h" 1765 #undef __FUNCT__ 1766 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1767 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1768 { 1769 PetscErrorCode ierr; 1770 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1771 PetscBLASInt bnz,one=1; 1772 Mat_SeqBAIJ *x,*y; 1773 1774 PetscFunctionBegin; 1775 if (str == SAME_NONZERO_PATTERN) { 1776 PetscScalar alpha = a; 1777 x = (Mat_SeqBAIJ *)xx->A->data; 1778 y = (Mat_SeqBAIJ *)yy->A->data; 1779 bnz = PetscBLASIntCast(x->nz); 1780 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1781 x = (Mat_SeqBAIJ *)xx->B->data; 1782 y = (Mat_SeqBAIJ *)yy->B->data; 1783 bnz = PetscBLASIntCast(x->nz); 1784 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1785 } else { 1786 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1787 } 1788 PetscFunctionReturn(0); 1789 } 1790 1791 #undef __FUNCT__ 1792 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ" 1793 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs) 1794 { 1795 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1796 PetscInt rbs,cbs; 1797 PetscErrorCode ierr; 1798 1799 PetscFunctionBegin; 1800 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1801 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1802 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 1803 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 1804 if (rbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 1805 if (cbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 1806 PetscFunctionReturn(0); 1807 } 1808 1809 #undef __FUNCT__ 1810 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1811 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1812 { 1813 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1814 PetscErrorCode ierr; 1815 1816 PetscFunctionBegin; 1817 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1818 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1819 PetscFunctionReturn(0); 1820 } 1821 1822 #undef __FUNCT__ 1823 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1824 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1825 { 1826 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1827 PetscErrorCode ierr; 1828 1829 PetscFunctionBegin; 1830 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1831 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1832 PetscFunctionReturn(0); 1833 } 1834 1835 #undef __FUNCT__ 1836 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 1837 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1838 { 1839 PetscErrorCode ierr; 1840 IS iscol_local; 1841 PetscInt csize; 1842 1843 PetscFunctionBegin; 1844 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1845 if (call == MAT_REUSE_MATRIX) { 1846 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1847 if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1848 } else { 1849 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1850 } 1851 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1852 if (call == MAT_INITIAL_MATRIX) { 1853 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1854 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 1855 } 1856 PetscFunctionReturn(0); 1857 } 1858 1859 #undef __FUNCT__ 1860 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 1861 /* 1862 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1863 in local and then by concatenating the local matrices the end result. 1864 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 1865 */ 1866 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 1867 { 1868 PetscErrorCode ierr; 1869 PetscMPIInt rank,size; 1870 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 1871 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 1872 Mat *local,M,Mreuse; 1873 MatScalar *vwork,*aa; 1874 MPI_Comm comm = ((PetscObject)mat)->comm; 1875 Mat_SeqBAIJ *aij; 1876 1877 1878 PetscFunctionBegin; 1879 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1880 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1881 1882 if (call == MAT_REUSE_MATRIX) { 1883 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1884 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1885 local = &Mreuse; 1886 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1887 } else { 1888 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1889 Mreuse = *local; 1890 ierr = PetscFree(local);CHKERRQ(ierr); 1891 } 1892 1893 /* 1894 m - number of local rows 1895 n - number of columns (same on all processors) 1896 rstart - first row in new global matrix generated 1897 */ 1898 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 1899 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1900 m = m/bs; 1901 n = n/bs; 1902 1903 if (call == MAT_INITIAL_MATRIX) { 1904 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1905 ii = aij->i; 1906 jj = aij->j; 1907 1908 /* 1909 Determine the number of non-zeros in the diagonal and off-diagonal 1910 portions of the matrix in order to do correct preallocation 1911 */ 1912 1913 /* first get start and end of "diagonal" columns */ 1914 if (csize == PETSC_DECIDE) { 1915 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 1916 if (mglobal == n*bs) { /* square matrix */ 1917 nlocal = m; 1918 } else { 1919 nlocal = n/size + ((n % size) > rank); 1920 } 1921 } else { 1922 nlocal = csize/bs; 1923 } 1924 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1925 rstart = rend - nlocal; 1926 if (rank == size - 1 && rend != n) { 1927 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 1928 } 1929 1930 /* next, compute all the lengths */ 1931 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 1932 olens = dlens + m; 1933 for (i=0; i<m; i++) { 1934 jend = ii[i+1] - ii[i]; 1935 olen = 0; 1936 dlen = 0; 1937 for (j=0; j<jend; j++) { 1938 if (*jj < rstart || *jj >= rend) olen++; 1939 else dlen++; 1940 jj++; 1941 } 1942 olens[i] = olen; 1943 dlens[i] = dlen; 1944 } 1945 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 1946 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 1947 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 1948 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 1949 ierr = PetscFree(dlens);CHKERRQ(ierr); 1950 } else { 1951 PetscInt ml,nl; 1952 1953 M = *newmat; 1954 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1955 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1956 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1957 /* 1958 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1959 rather than the slower MatSetValues(). 1960 */ 1961 M->was_assembled = PETSC_TRUE; 1962 M->assembled = PETSC_FALSE; 1963 } 1964 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1965 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1966 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1967 ii = aij->i; 1968 jj = aij->j; 1969 aa = aij->a; 1970 for (i=0; i<m; i++) { 1971 row = rstart/bs + i; 1972 nz = ii[i+1] - ii[i]; 1973 cwork = jj; jj += nz; 1974 vwork = aa; aa += nz; 1975 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1976 } 1977 1978 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1979 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1980 *newmat = M; 1981 1982 /* save submatrix used in processor for next request */ 1983 if (call == MAT_INITIAL_MATRIX) { 1984 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 1985 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 1986 } 1987 1988 PetscFunctionReturn(0); 1989 } 1990 1991 #undef __FUNCT__ 1992 #define __FUNCT__ "MatPermute_MPIBAIJ" 1993 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 1994 { 1995 MPI_Comm comm,pcomm; 1996 PetscInt first,local_size,nrows; 1997 const PetscInt *rows; 1998 PetscMPIInt size; 1999 IS crowp,growp,irowp,lrowp,lcolp,icolp; 2000 PetscErrorCode ierr; 2001 2002 PetscFunctionBegin; 2003 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2004 /* make a collective version of 'rowp' */ 2005 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2006 if (pcomm==comm) { 2007 crowp = rowp; 2008 } else { 2009 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2010 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2011 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 2012 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2013 } 2014 /* collect the global row permutation and invert it */ 2015 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2016 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2017 if (pcomm!=comm) { 2018 ierr = ISDestroy(crowp);CHKERRQ(ierr); 2019 } 2020 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2021 /* get the local target indices */ 2022 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2023 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2024 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2025 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 2026 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2027 ierr = ISDestroy(irowp);CHKERRQ(ierr); 2028 /* the column permutation is so much easier; 2029 make a local version of 'colp' and invert it */ 2030 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2031 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2032 if (size==1) { 2033 lcolp = colp; 2034 } else { 2035 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2036 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2037 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 2038 } 2039 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2040 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2041 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2042 if (size>1) { 2043 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2044 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 2045 } 2046 /* now we just get the submatrix */ 2047 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2048 /* clean up */ 2049 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 2050 ierr = ISDestroy(icolp);CHKERRQ(ierr); 2051 PetscFunctionReturn(0); 2052 } 2053 2054 #undef __FUNCT__ 2055 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2056 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2057 { 2058 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2059 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2060 2061 PetscFunctionBegin; 2062 if (nghosts) { *nghosts = B->nbs;} 2063 if (ghosts) {*ghosts = baij->garray;} 2064 PetscFunctionReturn(0); 2065 } 2066 2067 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat); 2068 2069 #undef __FUNCT__ 2070 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2071 /* 2072 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2073 */ 2074 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2075 { 2076 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2077 PetscErrorCode ierr; 2078 PetscMPIInt size,*ncolsonproc,*disp,nn; 2079 PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col; 2080 const PetscInt *is; 2081 PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj; 2082 PetscInt *rowhit,M,cstart,cend,colb; 2083 PetscInt *columnsforrow,l; 2084 IS *isa; 2085 PetscTruth done,flg; 2086 ISLocalToGlobalMapping map = mat->bmapping; 2087 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2088 2089 PetscFunctionBegin; 2090 if (!mat->assembled) { 2091 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2092 } 2093 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2094 2095 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2096 2097 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2098 M = mat->rmap->n/bs; 2099 cstart = mat->cmap->rstart/bs; 2100 cend = mat->cmap->rend/bs; 2101 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2102 c->N = mat->cmap->N/bs; 2103 c->m = mat->rmap->n/bs; 2104 c->rstart = mat->rmap->rstart/bs; 2105 2106 c->ncolors = nis; 2107 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2108 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2109 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2110 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2111 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2112 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2113 2114 /* Allow access to data structures of local part of matrix */ 2115 if (!baij->colmap) { 2116 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2117 } 2118 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2119 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2120 2121 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2122 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2123 2124 for (i=0; i<nis; i++) { 2125 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2126 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2127 c->ncolumns[i] = n; 2128 if (n) { 2129 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2130 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2131 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2132 } else { 2133 c->columns[i] = 0; 2134 } 2135 2136 if (ctype == IS_COLORING_GLOBAL){ 2137 /* Determine the total (parallel) number of columns of this color */ 2138 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2139 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2140 2141 nn = PetscMPIIntCast(n); 2142 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2143 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2144 if (!nctot) { 2145 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2146 } 2147 2148 disp[0] = 0; 2149 for (j=1; j<size; j++) { 2150 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2151 } 2152 2153 /* Get complete list of columns for color on each processor */ 2154 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2155 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2156 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2157 } else if (ctype == IS_COLORING_GHOSTED){ 2158 /* Determine local number of columns of this color on this process, including ghost points */ 2159 nctot = n; 2160 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2161 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2162 } else { 2163 SETERRQ(PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2164 } 2165 2166 /* 2167 Mark all rows affect by these columns 2168 */ 2169 /* Temporary option to allow for debugging/testing */ 2170 flg = PETSC_FALSE; 2171 ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2172 if (!flg) {/*-----------------------------------------------------------------------------*/ 2173 /* crude, fast version */ 2174 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2175 /* loop over columns*/ 2176 for (j=0; j<nctot; j++) { 2177 if (ctype == IS_COLORING_GHOSTED) { 2178 col = ltog[cols[j]]; 2179 } else { 2180 col = cols[j]; 2181 } 2182 if (col >= cstart && col < cend) { 2183 /* column is in diagonal block of matrix */ 2184 rows = A_cj + A_ci[col-cstart]; 2185 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2186 } else { 2187 #if defined (PETSC_USE_CTABLE) 2188 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr) 2189 colb --; 2190 #else 2191 colb = baij->colmap[col] - 1; 2192 #endif 2193 if (colb == -1) { 2194 m = 0; 2195 } else { 2196 colb = colb/bs; 2197 rows = B_cj + B_ci[colb]; 2198 m = B_ci[colb+1] - B_ci[colb]; 2199 } 2200 } 2201 /* loop over columns marking them in rowhit */ 2202 for (k=0; k<m; k++) { 2203 rowhit[*rows++] = col + 1; 2204 } 2205 } 2206 2207 /* count the number of hits */ 2208 nrows = 0; 2209 for (j=0; j<M; j++) { 2210 if (rowhit[j]) nrows++; 2211 } 2212 c->nrows[i] = nrows; 2213 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2214 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2215 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2216 nrows = 0; 2217 for (j=0; j<M; j++) { 2218 if (rowhit[j]) { 2219 c->rows[i][nrows] = j; 2220 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2221 nrows++; 2222 } 2223 } 2224 } else {/*-------------------------------------------------------------------------------*/ 2225 /* slow version, using rowhit as a linked list */ 2226 PetscInt currentcol,fm,mfm; 2227 rowhit[M] = M; 2228 nrows = 0; 2229 /* loop over columns*/ 2230 for (j=0; j<nctot; j++) { 2231 if (ctype == IS_COLORING_GHOSTED) { 2232 col = ltog[cols[j]]; 2233 } else { 2234 col = cols[j]; 2235 } 2236 if (col >= cstart && col < cend) { 2237 /* column is in diagonal block of matrix */ 2238 rows = A_cj + A_ci[col-cstart]; 2239 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2240 } else { 2241 #if defined (PETSC_USE_CTABLE) 2242 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2243 colb --; 2244 #else 2245 colb = baij->colmap[col] - 1; 2246 #endif 2247 if (colb == -1) { 2248 m = 0; 2249 } else { 2250 colb = colb/bs; 2251 rows = B_cj + B_ci[colb]; 2252 m = B_ci[colb+1] - B_ci[colb]; 2253 } 2254 } 2255 2256 /* loop over columns marking them in rowhit */ 2257 fm = M; /* fm points to first entry in linked list */ 2258 for (k=0; k<m; k++) { 2259 currentcol = *rows++; 2260 /* is it already in the list? */ 2261 do { 2262 mfm = fm; 2263 fm = rowhit[fm]; 2264 } while (fm < currentcol); 2265 /* not in list so add it */ 2266 if (fm != currentcol) { 2267 nrows++; 2268 columnsforrow[currentcol] = col; 2269 /* next three lines insert new entry into linked list */ 2270 rowhit[mfm] = currentcol; 2271 rowhit[currentcol] = fm; 2272 fm = currentcol; 2273 /* fm points to present position in list since we know the columns are sorted */ 2274 } else { 2275 SETERRQ(PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2276 } 2277 } 2278 } 2279 c->nrows[i] = nrows; 2280 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2281 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2282 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2283 /* now store the linked list of rows into c->rows[i] */ 2284 nrows = 0; 2285 fm = rowhit[M]; 2286 do { 2287 c->rows[i][nrows] = fm; 2288 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2289 fm = rowhit[fm]; 2290 } while (fm < M); 2291 } /* ---------------------------------------------------------------------------------------*/ 2292 ierr = PetscFree(cols);CHKERRQ(ierr); 2293 } 2294 2295 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2296 /* 2297 vscale will contain the "diagonal" on processor scalings followed by the off processor 2298 */ 2299 if (ctype == IS_COLORING_GLOBAL) { 2300 PetscInt *garray; 2301 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2302 for (i=0; i<baij->B->cmap->n/bs; i++) { 2303 for (j=0; j<bs; j++) { 2304 garray[i*bs+j] = bs*baij->garray[i]+j; 2305 } 2306 } 2307 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2308 ierr = PetscFree(garray);CHKERRQ(ierr); 2309 CHKMEMQ; 2310 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2311 for (k=0; k<c->ncolors; k++) { 2312 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2313 for (l=0; l<c->nrows[k]; l++) { 2314 col = c->columnsforrow[k][l]; 2315 if (col >= cstart && col < cend) { 2316 /* column is in diagonal block of matrix */ 2317 colb = col - cstart; 2318 } else { 2319 /* column is in "off-processor" part */ 2320 #if defined (PETSC_USE_CTABLE) 2321 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2322 colb --; 2323 #else 2324 colb = baij->colmap[col] - 1; 2325 #endif 2326 colb = colb/bs; 2327 colb += cend - cstart; 2328 } 2329 c->vscaleforrow[k][l] = colb; 2330 } 2331 } 2332 } else if (ctype == IS_COLORING_GHOSTED) { 2333 /* Get gtol mapping */ 2334 PetscInt N = mat->cmap->N, *gtol; 2335 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2336 for (i=0; i<N; i++) gtol[i] = -1; 2337 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2338 2339 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2340 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2341 for (k=0; k<c->ncolors; k++) { 2342 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2343 for (l=0; l<c->nrows[k]; l++) { 2344 col = c->columnsforrow[k][l]; /* global column index */ 2345 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2346 } 2347 } 2348 ierr = PetscFree(gtol);CHKERRQ(ierr); 2349 } 2350 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2351 2352 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2353 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2354 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2355 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2356 CHKMEMQ; 2357 PetscFunctionReturn(0); 2358 } 2359 2360 #undef __FUNCT__ 2361 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ" 2362 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat) 2363 { 2364 Mat B; 2365 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2366 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2367 Mat_SeqAIJ *b; 2368 PetscErrorCode ierr; 2369 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2370 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2371 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2372 2373 PetscFunctionBegin; 2374 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2375 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2376 2377 /* ---------------------------------------------------------------- 2378 Tell every processor the number of nonzeros per row 2379 */ 2380 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2381 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2382 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2383 } 2384 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2385 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2386 displs = recvcounts + size; 2387 for (i=0; i<size; i++) { 2388 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2389 displs[i] = A->rmap->range[i]/bs; 2390 } 2391 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2392 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2393 #else 2394 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2395 #endif 2396 /* --------------------------------------------------------------- 2397 Create the sequential matrix of the same type as the local block diagonal 2398 */ 2399 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2400 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2401 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2402 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2403 b = (Mat_SeqAIJ *)B->data; 2404 2405 /*-------------------------------------------------------------------- 2406 Copy my part of matrix column indices over 2407 */ 2408 sendcount = ad->nz + bd->nz; 2409 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2410 a_jsendbuf = ad->j; 2411 b_jsendbuf = bd->j; 2412 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2413 cnt = 0; 2414 for (i=0; i<n; i++) { 2415 2416 /* put in lower diagonal portion */ 2417 m = bd->i[i+1] - bd->i[i]; 2418 while (m > 0) { 2419 /* is it above diagonal (in bd (compressed) numbering) */ 2420 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2421 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2422 m--; 2423 } 2424 2425 /* put in diagonal portion */ 2426 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2427 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2428 } 2429 2430 /* put in upper diagonal portion */ 2431 while (m-- > 0) { 2432 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2433 } 2434 } 2435 if (cnt != sendcount) SETERRQ2(PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2436 2437 /*-------------------------------------------------------------------- 2438 Gather all column indices to all processors 2439 */ 2440 for (i=0; i<size; i++) { 2441 recvcounts[i] = 0; 2442 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2443 recvcounts[i] += lens[j]; 2444 } 2445 } 2446 displs[0] = 0; 2447 for (i=1; i<size; i++) { 2448 displs[i] = displs[i-1] + recvcounts[i-1]; 2449 } 2450 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2451 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2452 #else 2453 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2454 #endif 2455 /*-------------------------------------------------------------------- 2456 Assemble the matrix into useable form (note numerical values not yet set) 2457 */ 2458 /* set the b->ilen (length of each row) values */ 2459 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2460 /* set the b->i indices */ 2461 b->i[0] = 0; 2462 for (i=1; i<=A->rmap->N/bs; i++) { 2463 b->i[i] = b->i[i-1] + lens[i-1]; 2464 } 2465 ierr = PetscFree(lens);CHKERRQ(ierr); 2466 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2467 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2468 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2469 2470 if (A->symmetric){ 2471 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2472 } else if (A->hermitian) { 2473 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2474 } else if (A->structurally_symmetric) { 2475 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2476 } 2477 *newmat = B; 2478 PetscFunctionReturn(0); 2479 } 2480 2481 #undef __FUNCT__ 2482 #define __FUNCT__ "MatSOR_MPIBAIJ" 2483 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2484 { 2485 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2486 PetscErrorCode ierr; 2487 Vec bb1 = 0; 2488 2489 PetscFunctionBegin; 2490 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2491 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2492 } 2493 2494 if (flag == SOR_APPLY_UPPER) { 2495 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2496 PetscFunctionReturn(0); 2497 } 2498 2499 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2500 if (flag & SOR_ZERO_INITIAL_GUESS) { 2501 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2502 its--; 2503 } 2504 2505 while (its--) { 2506 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2507 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2508 2509 /* update rhs: bb1 = bb - B*x */ 2510 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2511 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2512 2513 /* local sweep */ 2514 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2515 } 2516 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2517 if (flag & SOR_ZERO_INITIAL_GUESS) { 2518 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2519 its--; 2520 } 2521 while (its--) { 2522 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2523 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2524 2525 /* update rhs: bb1 = bb - B*x */ 2526 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2527 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2528 2529 /* local sweep */ 2530 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2531 } 2532 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2533 if (flag & SOR_ZERO_INITIAL_GUESS) { 2534 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2535 its--; 2536 } 2537 while (its--) { 2538 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2539 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2540 2541 /* update rhs: bb1 = bb - B*x */ 2542 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2543 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2544 2545 /* local sweep */ 2546 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2547 } 2548 } else { 2549 SETERRQ(PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2550 } 2551 2552 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 2553 PetscFunctionReturn(0); 2554 } 2555 2556 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2557 2558 2559 /* -------------------------------------------------------------------*/ 2560 static struct _MatOps MatOps_Values = { 2561 MatSetValues_MPIBAIJ, 2562 MatGetRow_MPIBAIJ, 2563 MatRestoreRow_MPIBAIJ, 2564 MatMult_MPIBAIJ, 2565 /* 4*/ MatMultAdd_MPIBAIJ, 2566 MatMultTranspose_MPIBAIJ, 2567 MatMultTransposeAdd_MPIBAIJ, 2568 0, 2569 0, 2570 0, 2571 /*10*/ 0, 2572 0, 2573 0, 2574 MatSOR_MPIBAIJ, 2575 MatTranspose_MPIBAIJ, 2576 /*15*/ MatGetInfo_MPIBAIJ, 2577 MatEqual_MPIBAIJ, 2578 MatGetDiagonal_MPIBAIJ, 2579 MatDiagonalScale_MPIBAIJ, 2580 MatNorm_MPIBAIJ, 2581 /*20*/ MatAssemblyBegin_MPIBAIJ, 2582 MatAssemblyEnd_MPIBAIJ, 2583 MatSetOption_MPIBAIJ, 2584 MatZeroEntries_MPIBAIJ, 2585 /*24*/ MatZeroRows_MPIBAIJ, 2586 0, 2587 0, 2588 0, 2589 0, 2590 /*29*/ MatSetUpPreallocation_MPIBAIJ, 2591 0, 2592 0, 2593 0, 2594 0, 2595 /*34*/ MatDuplicate_MPIBAIJ, 2596 0, 2597 0, 2598 0, 2599 0, 2600 /*39*/ MatAXPY_MPIBAIJ, 2601 MatGetSubMatrices_MPIBAIJ, 2602 MatIncreaseOverlap_MPIBAIJ, 2603 MatGetValues_MPIBAIJ, 2604 MatCopy_MPIBAIJ, 2605 /*44*/ 0, 2606 MatScale_MPIBAIJ, 2607 0, 2608 0, 2609 0, 2610 /*49*/ MatSetBlockSize_MPIBAIJ, 2611 0, 2612 0, 2613 0, 2614 0, 2615 /*54*/ MatFDColoringCreate_MPIBAIJ, 2616 0, 2617 MatSetUnfactored_MPIBAIJ, 2618 MatPermute_MPIBAIJ, 2619 MatSetValuesBlocked_MPIBAIJ, 2620 /*59*/ MatGetSubMatrix_MPIBAIJ, 2621 MatDestroy_MPIBAIJ, 2622 MatView_MPIBAIJ, 2623 0, 2624 0, 2625 /*64*/ 0, 2626 0, 2627 0, 2628 0, 2629 0, 2630 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2631 0, 2632 0, 2633 0, 2634 0, 2635 /*74*/ 0, 2636 MatFDColoringApply_BAIJ, 2637 0, 2638 0, 2639 0, 2640 /*79*/ 0, 2641 0, 2642 0, 2643 0, 2644 MatLoad_MPIBAIJ, 2645 /*84*/ 0, 2646 0, 2647 0, 2648 0, 2649 0, 2650 /*89*/ 0, 2651 0, 2652 0, 2653 0, 2654 0, 2655 /*94*/ 0, 2656 0, 2657 0, 2658 0, 2659 0, 2660 /*99*/ 0, 2661 0, 2662 0, 2663 0, 2664 0, 2665 /*104*/0, 2666 MatRealPart_MPIBAIJ, 2667 MatImaginaryPart_MPIBAIJ, 2668 0, 2669 0, 2670 /*109*/0, 2671 0, 2672 0, 2673 0, 2674 0, 2675 /*114*/MatGetSeqNonzerostructure_MPIBAIJ, 2676 0, 2677 MatGetGhosts_MPIBAIJ 2678 }; 2679 2680 EXTERN_C_BEGIN 2681 #undef __FUNCT__ 2682 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2683 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2684 { 2685 PetscFunctionBegin; 2686 *a = ((Mat_MPIBAIJ *)A->data)->A; 2687 *iscopy = PETSC_FALSE; 2688 PetscFunctionReturn(0); 2689 } 2690 EXTERN_C_END 2691 2692 EXTERN_C_BEGIN 2693 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2694 EXTERN_C_END 2695 2696 EXTERN_C_BEGIN 2697 #undef __FUNCT__ 2698 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2699 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2700 { 2701 PetscInt m,rstart,cstart,cend; 2702 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2703 const PetscInt *JJ=0; 2704 PetscScalar *values=0; 2705 PetscErrorCode ierr; 2706 2707 PetscFunctionBegin; 2708 2709 if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2710 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2711 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2712 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2713 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2714 m = B->rmap->n/bs; 2715 rstart = B->rmap->rstart/bs; 2716 cstart = B->cmap->rstart/bs; 2717 cend = B->cmap->rend/bs; 2718 2719 if (ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2720 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2721 for (i=0; i<m; i++) { 2722 nz = ii[i+1] - ii[i]; 2723 if (nz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2724 nz_max = PetscMax(nz_max,nz); 2725 JJ = jj + ii[i]; 2726 for (j=0; j<nz; j++) { 2727 if (*JJ >= cstart) break; 2728 JJ++; 2729 } 2730 d = 0; 2731 for (; j<nz; j++) { 2732 if (*JJ++ >= cend) break; 2733 d++; 2734 } 2735 d_nnz[i] = d; 2736 o_nnz[i] = nz - d; 2737 } 2738 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2739 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2740 2741 values = (PetscScalar*)V; 2742 if (!values) { 2743 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2744 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2745 } 2746 for (i=0; i<m; i++) { 2747 PetscInt row = i + rstart; 2748 PetscInt ncols = ii[i+1] - ii[i]; 2749 const PetscInt *icols = jj + ii[i]; 2750 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2751 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2752 } 2753 2754 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2755 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2756 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2757 2758 PetscFunctionReturn(0); 2759 } 2760 EXTERN_C_END 2761 2762 #undef __FUNCT__ 2763 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2764 /*@C 2765 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2766 (the default parallel PETSc format). 2767 2768 Collective on MPI_Comm 2769 2770 Input Parameters: 2771 + A - the matrix 2772 . i - the indices into j for the start of each local row (starts with zero) 2773 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2774 - v - optional values in the matrix 2775 2776 Level: developer 2777 2778 .keywords: matrix, aij, compressed row, sparse, parallel 2779 2780 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2781 @*/ 2782 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2783 { 2784 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2785 2786 PetscFunctionBegin; 2787 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2788 if (f) { 2789 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2790 } 2791 PetscFunctionReturn(0); 2792 } 2793 2794 EXTERN_C_BEGIN 2795 #undef __FUNCT__ 2796 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2797 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2798 { 2799 Mat_MPIBAIJ *b; 2800 PetscErrorCode ierr; 2801 PetscInt i, newbs = PetscAbs(bs); 2802 2803 PetscFunctionBegin; 2804 if (bs < 0) { 2805 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2806 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2807 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2808 bs = PetscAbs(bs); 2809 } 2810 if ((d_nnz || o_nnz) && newbs != bs) { 2811 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 2812 } 2813 bs = newbs; 2814 2815 2816 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2817 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2818 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2819 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2820 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2821 2822 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2823 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2824 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2825 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2826 2827 if (d_nnz) { 2828 for (i=0; i<B->rmap->n/bs; i++) { 2829 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2830 } 2831 } 2832 if (o_nnz) { 2833 for (i=0; i<B->rmap->n/bs; i++) { 2834 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2835 } 2836 } 2837 2838 b = (Mat_MPIBAIJ*)B->data; 2839 b->bs2 = bs*bs; 2840 b->mbs = B->rmap->n/bs; 2841 b->nbs = B->cmap->n/bs; 2842 b->Mbs = B->rmap->N/bs; 2843 b->Nbs = B->cmap->N/bs; 2844 2845 for (i=0; i<=b->size; i++) { 2846 b->rangebs[i] = B->rmap->range[i]/bs; 2847 } 2848 b->rstartbs = B->rmap->rstart/bs; 2849 b->rendbs = B->rmap->rend/bs; 2850 b->cstartbs = B->cmap->rstart/bs; 2851 b->cendbs = B->cmap->rend/bs; 2852 2853 if (!B->preallocated) { 2854 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2855 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2856 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2857 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2858 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2859 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2860 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2861 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2862 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 2863 } 2864 2865 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2866 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2867 B->preallocated = PETSC_TRUE; 2868 PetscFunctionReturn(0); 2869 } 2870 EXTERN_C_END 2871 2872 EXTERN_C_BEGIN 2873 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2874 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2875 EXTERN_C_END 2876 2877 2878 EXTERN_C_BEGIN 2879 #undef __FUNCT__ 2880 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 2881 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj) 2882 { 2883 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2884 PetscErrorCode ierr; 2885 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2886 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2887 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2888 2889 PetscFunctionBegin; 2890 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 2891 ii[0] = 0; 2892 CHKMEMQ; 2893 for (i=0; i<M; i++) { 2894 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2895 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2896 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2897 /* remove one from count of matrix has diagonal */ 2898 for (j=id[i]; j<id[i+1]; j++) { 2899 if (jd[j] == i) {ii[i+1]--;break;} 2900 } 2901 CHKMEMQ; 2902 } 2903 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2904 cnt = 0; 2905 for (i=0; i<M; i++) { 2906 for (j=io[i]; j<io[i+1]; j++) { 2907 if (garray[jo[j]] > rstart) break; 2908 jj[cnt++] = garray[jo[j]]; 2909 CHKMEMQ; 2910 } 2911 for (k=id[i]; k<id[i+1]; k++) { 2912 if (jd[k] != i) { 2913 jj[cnt++] = rstart + jd[k]; 2914 CHKMEMQ; 2915 } 2916 } 2917 for (;j<io[i+1]; j++) { 2918 jj[cnt++] = garray[jo[j]]; 2919 CHKMEMQ; 2920 } 2921 } 2922 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 2923 PetscFunctionReturn(0); 2924 } 2925 EXTERN_C_END 2926 2927 /*MC 2928 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2929 2930 Options Database Keys: 2931 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2932 . -mat_block_size <bs> - set the blocksize used to store the matrix 2933 - -mat_use_hash_table <fact> 2934 2935 Level: beginner 2936 2937 .seealso: MatCreateMPIBAIJ 2938 M*/ 2939 2940 EXTERN_C_BEGIN 2941 #undef __FUNCT__ 2942 #define __FUNCT__ "MatCreate_MPIBAIJ" 2943 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2944 { 2945 Mat_MPIBAIJ *b; 2946 PetscErrorCode ierr; 2947 PetscTruth flg; 2948 2949 PetscFunctionBegin; 2950 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2951 B->data = (void*)b; 2952 2953 2954 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2955 B->mapping = 0; 2956 B->assembled = PETSC_FALSE; 2957 2958 B->insertmode = NOT_SET_VALUES; 2959 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 2960 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 2961 2962 /* build local table of row and column ownerships */ 2963 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 2964 2965 /* build cache for off array entries formed */ 2966 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 2967 b->donotstash = PETSC_FALSE; 2968 b->colmap = PETSC_NULL; 2969 b->garray = PETSC_NULL; 2970 b->roworiented = PETSC_TRUE; 2971 2972 /* stuff used in block assembly */ 2973 b->barray = 0; 2974 2975 /* stuff used for matrix vector multiply */ 2976 b->lvec = 0; 2977 b->Mvctx = 0; 2978 2979 /* stuff for MatGetRow() */ 2980 b->rowindices = 0; 2981 b->rowvalues = 0; 2982 b->getrowactive = PETSC_FALSE; 2983 2984 /* hash table stuff */ 2985 b->ht = 0; 2986 b->hd = 0; 2987 b->ht_size = 0; 2988 b->ht_flag = PETSC_FALSE; 2989 b->ht_fact = 0; 2990 b->ht_total_ct = 0; 2991 b->ht_insert_ct = 0; 2992 2993 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 2994 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2995 if (flg) { 2996 PetscReal fact = 1.39; 2997 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2998 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 2999 if (fact <= 1.0) fact = 1.39; 3000 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3001 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3002 } 3003 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3004 3005 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3006 "MatConvert_MPIBAIJ_MPIAdj", 3007 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3008 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3009 "MatStoreValues_MPIBAIJ", 3010 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3011 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3012 "MatRetrieveValues_MPIBAIJ", 3013 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3014 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3015 "MatGetDiagonalBlock_MPIBAIJ", 3016 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3017 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3018 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3019 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3020 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3021 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3022 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3023 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3024 "MatDiagonalScaleLocal_MPIBAIJ", 3025 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3026 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3027 "MatSetHashTableFactor_MPIBAIJ", 3028 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3029 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3030 PetscFunctionReturn(0); 3031 } 3032 EXTERN_C_END 3033 3034 /*MC 3035 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3036 3037 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3038 and MATMPIBAIJ otherwise. 3039 3040 Options Database Keys: 3041 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3042 3043 Level: beginner 3044 3045 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3046 M*/ 3047 3048 EXTERN_C_BEGIN 3049 #undef __FUNCT__ 3050 #define __FUNCT__ "MatCreate_BAIJ" 3051 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 3052 { 3053 PetscErrorCode ierr; 3054 PetscMPIInt size; 3055 3056 PetscFunctionBegin; 3057 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3058 if (size == 1) { 3059 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 3060 } else { 3061 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 3062 } 3063 PetscFunctionReturn(0); 3064 } 3065 EXTERN_C_END 3066 3067 #undef __FUNCT__ 3068 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3069 /*@C 3070 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3071 (block compressed row). For good matrix assembly performance 3072 the user should preallocate the matrix storage by setting the parameters 3073 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3074 performance can be increased by more than a factor of 50. 3075 3076 Collective on Mat 3077 3078 Input Parameters: 3079 + A - the matrix 3080 . bs - size of blockk 3081 . d_nz - number of block nonzeros per block row in diagonal portion of local 3082 submatrix (same for all local rows) 3083 . d_nnz - array containing the number of block nonzeros in the various block rows 3084 of the in diagonal portion of the local (possibly different for each block 3085 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3086 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3087 submatrix (same for all local rows). 3088 - o_nnz - array containing the number of nonzeros in the various block rows of the 3089 off-diagonal portion of the local submatrix (possibly different for 3090 each block row) or PETSC_NULL. 3091 3092 If the *_nnz parameter is given then the *_nz parameter is ignored 3093 3094 Options Database Keys: 3095 + -mat_block_size - size of the blocks to use 3096 - -mat_use_hash_table <fact> 3097 3098 Notes: 3099 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3100 than it must be used on all processors that share the object for that argument. 3101 3102 Storage Information: 3103 For a square global matrix we define each processor's diagonal portion 3104 to be its local rows and the corresponding columns (a square submatrix); 3105 each processor's off-diagonal portion encompasses the remainder of the 3106 local matrix (a rectangular submatrix). 3107 3108 The user can specify preallocated storage for the diagonal part of 3109 the local submatrix with either d_nz or d_nnz (not both). Set 3110 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3111 memory allocation. Likewise, specify preallocated storage for the 3112 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3113 3114 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3115 the figure below we depict these three local rows and all columns (0-11). 3116 3117 .vb 3118 0 1 2 3 4 5 6 7 8 9 10 11 3119 ------------------- 3120 row 3 | o o o d d d o o o o o o 3121 row 4 | o o o d d d o o o o o o 3122 row 5 | o o o d d d o o o o o o 3123 ------------------- 3124 .ve 3125 3126 Thus, any entries in the d locations are stored in the d (diagonal) 3127 submatrix, and any entries in the o locations are stored in the 3128 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3129 stored simply in the MATSEQBAIJ format for compressed row storage. 3130 3131 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3132 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3133 In general, for PDE problems in which most nonzeros are near the diagonal, 3134 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3135 or you will get TERRIBLE performance; see the users' manual chapter on 3136 matrices. 3137 3138 You can call MatGetInfo() to get information on how effective the preallocation was; 3139 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3140 You can also run with the option -info and look for messages with the string 3141 malloc in them to see if additional memory allocation was needed. 3142 3143 Level: intermediate 3144 3145 .keywords: matrix, block, aij, compressed row, sparse, parallel 3146 3147 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 3148 @*/ 3149 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3150 { 3151 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3152 3153 PetscFunctionBegin; 3154 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3155 if (f) { 3156 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3157 } 3158 PetscFunctionReturn(0); 3159 } 3160 3161 #undef __FUNCT__ 3162 #define __FUNCT__ "MatCreateMPIBAIJ" 3163 /*@C 3164 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 3165 (block compressed row). For good matrix assembly performance 3166 the user should preallocate the matrix storage by setting the parameters 3167 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3168 performance can be increased by more than a factor of 50. 3169 3170 Collective on MPI_Comm 3171 3172 Input Parameters: 3173 + comm - MPI communicator 3174 . bs - size of blockk 3175 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3176 This value should be the same as the local size used in creating the 3177 y vector for the matrix-vector product y = Ax. 3178 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3179 This value should be the same as the local size used in creating the 3180 x vector for the matrix-vector product y = Ax. 3181 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3182 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3183 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3184 submatrix (same for all local rows) 3185 . d_nnz - array containing the number of nonzero blocks in the various block rows 3186 of the in diagonal portion of the local (possibly different for each block 3187 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3188 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3189 submatrix (same for all local rows). 3190 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3191 off-diagonal portion of the local submatrix (possibly different for 3192 each block row) or PETSC_NULL. 3193 3194 Output Parameter: 3195 . A - the matrix 3196 3197 Options Database Keys: 3198 + -mat_block_size - size of the blocks to use 3199 - -mat_use_hash_table <fact> 3200 3201 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3202 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3203 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3204 3205 Notes: 3206 If the *_nnz parameter is given then the *_nz parameter is ignored 3207 3208 A nonzero block is any block that as 1 or more nonzeros in it 3209 3210 The user MUST specify either the local or global matrix dimensions 3211 (possibly both). 3212 3213 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3214 than it must be used on all processors that share the object for that argument. 3215 3216 Storage Information: 3217 For a square global matrix we define each processor's diagonal portion 3218 to be its local rows and the corresponding columns (a square submatrix); 3219 each processor's off-diagonal portion encompasses the remainder of the 3220 local matrix (a rectangular submatrix). 3221 3222 The user can specify preallocated storage for the diagonal part of 3223 the local submatrix with either d_nz or d_nnz (not both). Set 3224 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3225 memory allocation. Likewise, specify preallocated storage for the 3226 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3227 3228 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3229 the figure below we depict these three local rows and all columns (0-11). 3230 3231 .vb 3232 0 1 2 3 4 5 6 7 8 9 10 11 3233 ------------------- 3234 row 3 | o o o d d d o o o o o o 3235 row 4 | o o o d d d o o o o o o 3236 row 5 | o o o d d d o o o o o o 3237 ------------------- 3238 .ve 3239 3240 Thus, any entries in the d locations are stored in the d (diagonal) 3241 submatrix, and any entries in the o locations are stored in the 3242 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3243 stored simply in the MATSEQBAIJ format for compressed row storage. 3244 3245 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3246 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3247 In general, for PDE problems in which most nonzeros are near the diagonal, 3248 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3249 or you will get TERRIBLE performance; see the users' manual chapter on 3250 matrices. 3251 3252 Level: intermediate 3253 3254 .keywords: matrix, block, aij, compressed row, sparse, parallel 3255 3256 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3257 @*/ 3258 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3259 { 3260 PetscErrorCode ierr; 3261 PetscMPIInt size; 3262 3263 PetscFunctionBegin; 3264 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3265 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3266 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3267 if (size > 1) { 3268 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3269 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3270 } else { 3271 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3272 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3273 } 3274 PetscFunctionReturn(0); 3275 } 3276 3277 #undef __FUNCT__ 3278 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3279 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3280 { 3281 Mat mat; 3282 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3283 PetscErrorCode ierr; 3284 PetscInt len=0; 3285 3286 PetscFunctionBegin; 3287 *newmat = 0; 3288 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3289 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3290 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3291 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3292 3293 mat->factor = matin->factor; 3294 mat->preallocated = PETSC_TRUE; 3295 mat->assembled = PETSC_TRUE; 3296 mat->insertmode = NOT_SET_VALUES; 3297 3298 a = (Mat_MPIBAIJ*)mat->data; 3299 mat->rmap->bs = matin->rmap->bs; 3300 a->bs2 = oldmat->bs2; 3301 a->mbs = oldmat->mbs; 3302 a->nbs = oldmat->nbs; 3303 a->Mbs = oldmat->Mbs; 3304 a->Nbs = oldmat->Nbs; 3305 3306 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3307 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3308 3309 a->size = oldmat->size; 3310 a->rank = oldmat->rank; 3311 a->donotstash = oldmat->donotstash; 3312 a->roworiented = oldmat->roworiented; 3313 a->rowindices = 0; 3314 a->rowvalues = 0; 3315 a->getrowactive = PETSC_FALSE; 3316 a->barray = 0; 3317 a->rstartbs = oldmat->rstartbs; 3318 a->rendbs = oldmat->rendbs; 3319 a->cstartbs = oldmat->cstartbs; 3320 a->cendbs = oldmat->cendbs; 3321 3322 /* hash table stuff */ 3323 a->ht = 0; 3324 a->hd = 0; 3325 a->ht_size = 0; 3326 a->ht_flag = oldmat->ht_flag; 3327 a->ht_fact = oldmat->ht_fact; 3328 a->ht_total_ct = 0; 3329 a->ht_insert_ct = 0; 3330 3331 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3332 if (oldmat->colmap) { 3333 #if defined (PETSC_USE_CTABLE) 3334 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3335 #else 3336 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3337 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3338 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3339 #endif 3340 } else a->colmap = 0; 3341 3342 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3343 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3344 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3345 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3346 } else a->garray = 0; 3347 3348 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3349 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3350 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3351 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3352 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3353 3354 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3355 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3356 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3357 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3358 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3359 *newmat = mat; 3360 3361 PetscFunctionReturn(0); 3362 } 3363 3364 #undef __FUNCT__ 3365 #define __FUNCT__ "MatLoad_MPIBAIJ" 3366 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 3367 { 3368 Mat A; 3369 PetscErrorCode ierr; 3370 int fd; 3371 PetscInt i,nz,j,rstart,rend; 3372 PetscScalar *vals,*buf; 3373 MPI_Comm comm = ((PetscObject)viewer)->comm; 3374 MPI_Status status; 3375 PetscMPIInt rank,size,maxnz; 3376 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3377 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3378 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3379 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3380 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3381 PetscInt dcount,kmax,k,nzcount,tmp,mend; 3382 3383 PetscFunctionBegin; 3384 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3385 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3386 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3387 3388 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3389 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3390 if (!rank) { 3391 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3392 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3393 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3394 } 3395 3396 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3397 M = header[1]; N = header[2]; 3398 3399 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 3400 3401 /* 3402 This code adds extra rows to make sure the number of rows is 3403 divisible by the blocksize 3404 */ 3405 Mbs = M/bs; 3406 extra_rows = bs - M + bs*Mbs; 3407 if (extra_rows == bs) extra_rows = 0; 3408 else Mbs++; 3409 if (extra_rows && !rank) { 3410 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3411 } 3412 3413 /* determine ownership of all rows */ 3414 mbs = Mbs/size + ((Mbs % size) > rank); 3415 m = mbs*bs; 3416 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3417 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3418 3419 /* process 0 needs enough room for process with most rows */ 3420 if (!rank) { 3421 mmax = rowners[1]; 3422 for (i=2; i<size; i++) { 3423 mmax = PetscMax(mmax,rowners[i]); 3424 } 3425 mmax*=bs; 3426 } else mmax = m; 3427 3428 rowners[0] = 0; 3429 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3430 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3431 rstart = rowners[rank]; 3432 rend = rowners[rank+1]; 3433 3434 /* distribute row lengths to all processors */ 3435 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3436 if (!rank) { 3437 mend = m; 3438 if (size == 1) mend = mend - extra_rows; 3439 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3440 for (j=mend; j<m; j++) locrowlens[j] = 1; 3441 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3442 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3443 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3444 for (j=0; j<m; j++) { 3445 procsnz[0] += locrowlens[j]; 3446 } 3447 for (i=1; i<size; i++) { 3448 mend = browners[i+1] - browners[i]; 3449 if (i == size-1) mend = mend - extra_rows; 3450 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3451 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3452 /* calculate the number of nonzeros on each processor */ 3453 for (j=0; j<browners[i+1]-browners[i]; j++) { 3454 procsnz[i] += rowlengths[j]; 3455 } 3456 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3457 } 3458 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3459 } else { 3460 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3461 } 3462 3463 if (!rank) { 3464 /* determine max buffer needed and allocate it */ 3465 maxnz = procsnz[0]; 3466 for (i=1; i<size; i++) { 3467 maxnz = PetscMax(maxnz,procsnz[i]); 3468 } 3469 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3470 3471 /* read in my part of the matrix column indices */ 3472 nz = procsnz[0]; 3473 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3474 mycols = ibuf; 3475 if (size == 1) nz -= extra_rows; 3476 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3477 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3478 3479 /* read in every ones (except the last) and ship off */ 3480 for (i=1; i<size-1; i++) { 3481 nz = procsnz[i]; 3482 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3483 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3484 } 3485 /* read in the stuff for the last proc */ 3486 if (size != 1) { 3487 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3488 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3489 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3490 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3491 } 3492 ierr = PetscFree(cols);CHKERRQ(ierr); 3493 } else { 3494 /* determine buffer space needed for message */ 3495 nz = 0; 3496 for (i=0; i<m; i++) { 3497 nz += locrowlens[i]; 3498 } 3499 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3500 mycols = ibuf; 3501 /* receive message of column indices*/ 3502 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3503 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3504 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3505 } 3506 3507 /* loop over local rows, determining number of off diagonal entries */ 3508 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3509 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3510 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3511 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3512 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3513 rowcount = 0; nzcount = 0; 3514 for (i=0; i<mbs; i++) { 3515 dcount = 0; 3516 odcount = 0; 3517 for (j=0; j<bs; j++) { 3518 kmax = locrowlens[rowcount]; 3519 for (k=0; k<kmax; k++) { 3520 tmp = mycols[nzcount++]/bs; 3521 if (!mask[tmp]) { 3522 mask[tmp] = 1; 3523 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3524 else masked1[dcount++] = tmp; 3525 } 3526 } 3527 rowcount++; 3528 } 3529 3530 dlens[i] = dcount; 3531 odlens[i] = odcount; 3532 3533 /* zero out the mask elements we set */ 3534 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3535 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3536 } 3537 3538 /* create our matrix */ 3539 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 3540 ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3541 ierr = MatSetType(A,type);CHKERRQ(ierr) 3542 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3543 3544 if (!rank) { 3545 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3546 /* read in my part of the matrix numerical values */ 3547 nz = procsnz[0]; 3548 vals = buf; 3549 mycols = ibuf; 3550 if (size == 1) nz -= extra_rows; 3551 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3552 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3553 3554 /* insert into matrix */ 3555 jj = rstart*bs; 3556 for (i=0; i<m; i++) { 3557 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3558 mycols += locrowlens[i]; 3559 vals += locrowlens[i]; 3560 jj++; 3561 } 3562 /* read in other processors (except the last one) and ship out */ 3563 for (i=1; i<size-1; i++) { 3564 nz = procsnz[i]; 3565 vals = buf; 3566 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3567 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3568 } 3569 /* the last proc */ 3570 if (size != 1){ 3571 nz = procsnz[i] - extra_rows; 3572 vals = buf; 3573 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3574 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3575 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3576 } 3577 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3578 } else { 3579 /* receive numeric values */ 3580 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3581 3582 /* receive message of values*/ 3583 vals = buf; 3584 mycols = ibuf; 3585 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 3586 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3587 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3588 3589 /* insert into matrix */ 3590 jj = rstart*bs; 3591 for (i=0; i<m; i++) { 3592 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3593 mycols += locrowlens[i]; 3594 vals += locrowlens[i]; 3595 jj++; 3596 } 3597 } 3598 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3599 ierr = PetscFree(buf);CHKERRQ(ierr); 3600 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3601 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3602 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3603 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3604 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3605 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3606 3607 *newmat = A; 3608 PetscFunctionReturn(0); 3609 } 3610 3611 #undef __FUNCT__ 3612 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3613 /*@ 3614 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3615 3616 Input Parameters: 3617 . mat - the matrix 3618 . fact - factor 3619 3620 Collective on Mat 3621 3622 Level: advanced 3623 3624 Notes: 3625 This can also be set by the command line option: -mat_use_hash_table <fact> 3626 3627 .keywords: matrix, hashtable, factor, HT 3628 3629 .seealso: MatSetOption() 3630 @*/ 3631 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3632 { 3633 PetscErrorCode ierr,(*f)(Mat,PetscReal); 3634 3635 PetscFunctionBegin; 3636 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 3637 if (f) { 3638 ierr = (*f)(mat,fact);CHKERRQ(ierr); 3639 } 3640 PetscFunctionReturn(0); 3641 } 3642 3643 EXTERN_C_BEGIN 3644 #undef __FUNCT__ 3645 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3646 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3647 { 3648 Mat_MPIBAIJ *baij; 3649 3650 PetscFunctionBegin; 3651 baij = (Mat_MPIBAIJ*)mat->data; 3652 baij->ht_fact = fact; 3653 PetscFunctionReturn(0); 3654 } 3655 EXTERN_C_END 3656 3657 #undef __FUNCT__ 3658 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3659 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3660 { 3661 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3662 PetscFunctionBegin; 3663 *Ad = a->A; 3664 *Ao = a->B; 3665 *colmap = a->garray; 3666 PetscFunctionReturn(0); 3667 } 3668 3669 /* 3670 Special version for direct calls from Fortran (to eliminate two function call overheads 3671 */ 3672 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3673 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3674 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3675 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3676 #endif 3677 3678 #undef __FUNCT__ 3679 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3680 /*@C 3681 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3682 3683 Collective on Mat 3684 3685 Input Parameters: 3686 + mat - the matrix 3687 . min - number of input rows 3688 . im - input rows 3689 . nin - number of input columns 3690 . in - input columns 3691 . v - numerical values input 3692 - addvin - INSERT_VALUES or ADD_VALUES 3693 3694 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3695 3696 Level: advanced 3697 3698 .seealso: MatSetValuesBlocked() 3699 @*/ 3700 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3701 { 3702 /* convert input arguments to C version */ 3703 Mat mat = *matin; 3704 PetscInt m = *min, n = *nin; 3705 InsertMode addv = *addvin; 3706 3707 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3708 const MatScalar *value; 3709 MatScalar *barray=baij->barray; 3710 PetscTruth roworiented = baij->roworiented; 3711 PetscErrorCode ierr; 3712 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3713 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3714 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3715 3716 PetscFunctionBegin; 3717 /* tasks normally handled by MatSetValuesBlocked() */ 3718 if (mat->insertmode == NOT_SET_VALUES) { 3719 mat->insertmode = addv; 3720 } 3721 #if defined(PETSC_USE_DEBUG) 3722 else if (mat->insertmode != addv) { 3723 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3724 } 3725 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3726 #endif 3727 if (mat->assembled) { 3728 mat->was_assembled = PETSC_TRUE; 3729 mat->assembled = PETSC_FALSE; 3730 } 3731 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3732 3733 3734 if(!barray) { 3735 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3736 baij->barray = barray; 3737 } 3738 3739 if (roworiented) { 3740 stepval = (n-1)*bs; 3741 } else { 3742 stepval = (m-1)*bs; 3743 } 3744 for (i=0; i<m; i++) { 3745 if (im[i] < 0) continue; 3746 #if defined(PETSC_USE_DEBUG) 3747 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3748 #endif 3749 if (im[i] >= rstart && im[i] < rend) { 3750 row = im[i] - rstart; 3751 for (j=0; j<n; j++) { 3752 /* If NumCol = 1 then a copy is not required */ 3753 if ((roworiented) && (n == 1)) { 3754 barray = (MatScalar*)v + i*bs2; 3755 } else if((!roworiented) && (m == 1)) { 3756 barray = (MatScalar*)v + j*bs2; 3757 } else { /* Here a copy is required */ 3758 if (roworiented) { 3759 value = v + i*(stepval+bs)*bs + j*bs; 3760 } else { 3761 value = v + j*(stepval+bs)*bs + i*bs; 3762 } 3763 for (ii=0; ii<bs; ii++,value+=stepval) { 3764 for (jj=0; jj<bs; jj++) { 3765 *barray++ = *value++; 3766 } 3767 } 3768 barray -=bs2; 3769 } 3770 3771 if (in[j] >= cstart && in[j] < cend){ 3772 col = in[j] - cstart; 3773 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3774 } 3775 else if (in[j] < 0) continue; 3776 #if defined(PETSC_USE_DEBUG) 3777 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 3778 #endif 3779 else { 3780 if (mat->was_assembled) { 3781 if (!baij->colmap) { 3782 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3783 } 3784 3785 #if defined(PETSC_USE_DEBUG) 3786 #if defined (PETSC_USE_CTABLE) 3787 { PetscInt data; 3788 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3789 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 3790 } 3791 #else 3792 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 3793 #endif 3794 #endif 3795 #if defined (PETSC_USE_CTABLE) 3796 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3797 col = (col - 1)/bs; 3798 #else 3799 col = (baij->colmap[in[j]] - 1)/bs; 3800 #endif 3801 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3802 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3803 col = in[j]; 3804 } 3805 } 3806 else col = in[j]; 3807 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3808 } 3809 } 3810 } else { 3811 if (!baij->donotstash) { 3812 if (roworiented) { 3813 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3814 } else { 3815 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3816 } 3817 } 3818 } 3819 } 3820 3821 /* task normally handled by MatSetValuesBlocked() */ 3822 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3823 PetscFunctionReturn(0); 3824 } 3825