1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 15 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 18 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 19 { 20 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt i,*idxb = 0; 23 PetscScalar *va,*vb; 24 Vec vtmp; 25 26 PetscFunctionBegin; 27 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 28 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 29 if (idx) { 30 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 31 } 32 33 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 34 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 35 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 36 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 37 38 for (i=0; i<A->rmap->n; i++){ 39 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 40 } 41 42 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 43 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 44 if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);} 45 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 46 PetscFunctionReturn(0); 47 } 48 49 EXTERN_C_BEGIN 50 #undef __FUNCT__ 51 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 52 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 53 { 54 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 55 PetscErrorCode ierr; 56 57 PetscFunctionBegin; 58 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 59 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 60 PetscFunctionReturn(0); 61 } 62 EXTERN_C_END 63 64 EXTERN_C_BEGIN 65 #undef __FUNCT__ 66 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 67 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 68 { 69 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 70 PetscErrorCode ierr; 71 72 PetscFunctionBegin; 73 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 74 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 75 PetscFunctionReturn(0); 76 } 77 EXTERN_C_END 78 79 /* 80 Local utility routine that creates a mapping from the global column 81 number to the local number in the off-diagonal part of the local 82 storage of the matrix. This is done in a non scable way since the 83 length of colmap equals the global matrix length. 84 */ 85 #undef __FUNCT__ 86 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 87 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 88 { 89 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 90 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 91 PetscErrorCode ierr; 92 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 93 94 PetscFunctionBegin; 95 #if defined (PETSC_USE_CTABLE) 96 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 97 for (i=0; i<nbs; i++){ 98 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 99 } 100 #else 101 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 102 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 103 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 105 #endif 106 PetscFunctionReturn(0); 107 } 108 109 #define CHUNKSIZE 10 110 111 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 112 { \ 113 \ 114 brow = row/bs; \ 115 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 116 rmax = aimax[brow]; nrow = ailen[brow]; \ 117 bcol = col/bs; \ 118 ridx = row % bs; cidx = col % bs; \ 119 low = 0; high = nrow; \ 120 while (high-low > 3) { \ 121 t = (low+high)/2; \ 122 if (rp[t] > bcol) high = t; \ 123 else low = t; \ 124 } \ 125 for (_i=low; _i<high; _i++) { \ 126 if (rp[_i] > bcol) break; \ 127 if (rp[_i] == bcol) { \ 128 bap = ap + bs2*_i + bs*cidx + ridx; \ 129 if (addv == ADD_VALUES) *bap += value; \ 130 else *bap = value; \ 131 goto a_noinsert; \ 132 } \ 133 } \ 134 if (a->nonew == 1) goto a_noinsert; \ 135 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 136 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 137 N = nrow++ - 1; \ 138 /* shift up all the later entries in this row */ \ 139 for (ii=N; ii>=_i; ii--) { \ 140 rp[ii+1] = rp[ii]; \ 141 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 142 } \ 143 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 144 rp[_i] = bcol; \ 145 ap[bs2*_i + bs*cidx + ridx] = value; \ 146 a_noinsert:; \ 147 ailen[brow] = nrow; \ 148 } 149 150 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 151 { \ 152 brow = row/bs; \ 153 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 154 rmax = bimax[brow]; nrow = bilen[brow]; \ 155 bcol = col/bs; \ 156 ridx = row % bs; cidx = col % bs; \ 157 low = 0; high = nrow; \ 158 while (high-low > 3) { \ 159 t = (low+high)/2; \ 160 if (rp[t] > bcol) high = t; \ 161 else low = t; \ 162 } \ 163 for (_i=low; _i<high; _i++) { \ 164 if (rp[_i] > bcol) break; \ 165 if (rp[_i] == bcol) { \ 166 bap = ap + bs2*_i + bs*cidx + ridx; \ 167 if (addv == ADD_VALUES) *bap += value; \ 168 else *bap = value; \ 169 goto b_noinsert; \ 170 } \ 171 } \ 172 if (b->nonew == 1) goto b_noinsert; \ 173 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 174 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 175 CHKMEMQ;\ 176 N = nrow++ - 1; \ 177 /* shift up all the later entries in this row */ \ 178 for (ii=N; ii>=_i; ii--) { \ 179 rp[ii+1] = rp[ii]; \ 180 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 181 } \ 182 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 183 rp[_i] = bcol; \ 184 ap[bs2*_i + bs*cidx + ridx] = value; \ 185 b_noinsert:; \ 186 bilen[brow] = nrow; \ 187 } 188 189 #undef __FUNCT__ 190 #define __FUNCT__ "MatSetValues_MPIBAIJ" 191 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 192 { 193 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 194 MatScalar value; 195 PetscTruth roworiented = baij->roworiented; 196 PetscErrorCode ierr; 197 PetscInt i,j,row,col; 198 PetscInt rstart_orig=mat->rmap->rstart; 199 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 200 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 201 202 /* Some Variables required in the macro */ 203 Mat A = baij->A; 204 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 205 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 206 MatScalar *aa=a->a; 207 208 Mat B = baij->B; 209 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 210 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 211 MatScalar *ba=b->a; 212 213 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 214 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 215 MatScalar *ap,*bap; 216 217 PetscFunctionBegin; 218 if (v) PetscValidScalarPointer(v,6); 219 for (i=0; i<m; i++) { 220 if (im[i] < 0) continue; 221 #if defined(PETSC_USE_DEBUG) 222 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 223 #endif 224 if (im[i] >= rstart_orig && im[i] < rend_orig) { 225 row = im[i] - rstart_orig; 226 for (j=0; j<n; j++) { 227 if (in[j] >= cstart_orig && in[j] < cend_orig){ 228 col = in[j] - cstart_orig; 229 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 230 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_DEBUG) 234 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1);} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!baij->colmap) { 239 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 243 col = col - 1; 244 #else 245 col = baij->colmap[in[j]/bs] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 248 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 251 B = baij->B; 252 b = (Mat_SeqBAIJ*)(B)->data; 253 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 254 ba=b->a; 255 } else col += in[j]%bs; 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 259 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 260 } 261 } 262 } else { 263 if (!baij->donotstash) { 264 if (roworiented) { 265 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 266 } else { 267 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 268 } 269 } 270 } 271 } 272 PetscFunctionReturn(0); 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 277 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 278 { 279 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 280 const PetscScalar *value; 281 MatScalar *barray=baij->barray; 282 PetscTruth roworiented = baij->roworiented; 283 PetscErrorCode ierr; 284 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 285 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 286 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 287 288 PetscFunctionBegin; 289 if(!barray) { 290 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 291 baij->barray = barray; 292 } 293 294 if (roworiented) { 295 stepval = (n-1)*bs; 296 } else { 297 stepval = (m-1)*bs; 298 } 299 for (i=0; i<m; i++) { 300 if (im[i] < 0) continue; 301 #if defined(PETSC_USE_DEBUG) 302 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 303 #endif 304 if (im[i] >= rstart && im[i] < rend) { 305 row = im[i] - rstart; 306 for (j=0; j<n; j++) { 307 /* If NumCol = 1 then a copy is not required */ 308 if ((roworiented) && (n == 1)) { 309 barray = (MatScalar*)v + i*bs2; 310 } else if((!roworiented) && (m == 1)) { 311 barray = (MatScalar*)v + j*bs2; 312 } else { /* Here a copy is required */ 313 if (roworiented) { 314 value = v + i*(stepval+bs)*bs + j*bs; 315 } else { 316 value = v + j*(stepval+bs)*bs + i*bs; 317 } 318 for (ii=0; ii<bs; ii++,value+=stepval) { 319 for (jj=0; jj<bs; jj++) { 320 *barray++ = *value++; 321 } 322 } 323 barray -=bs2; 324 } 325 326 if (in[j] >= cstart && in[j] < cend){ 327 col = in[j] - cstart; 328 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 329 } 330 else if (in[j] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 333 #endif 334 else { 335 if (mat->was_assembled) { 336 if (!baij->colmap) { 337 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 338 } 339 340 #if defined(PETSC_USE_DEBUG) 341 #if defined (PETSC_USE_CTABLE) 342 { PetscInt data; 343 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 344 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 345 } 346 #else 347 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 348 #endif 349 #endif 350 #if defined (PETSC_USE_CTABLE) 351 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 352 col = (col - 1)/bs; 353 #else 354 col = (baij->colmap[in[j]] - 1)/bs; 355 #endif 356 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 357 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 358 col = in[j]; 359 } 360 } 361 else col = in[j]; 362 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 363 } 364 } 365 } else { 366 if (!baij->donotstash) { 367 if (roworiented) { 368 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 369 } else { 370 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 371 } 372 } 373 } 374 } 375 PetscFunctionReturn(0); 376 } 377 378 #define HASH_KEY 0.6180339887 379 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 380 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 381 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 382 #undef __FUNCT__ 383 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 384 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 385 { 386 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 387 PetscTruth roworiented = baij->roworiented; 388 PetscErrorCode ierr; 389 PetscInt i,j,row,col; 390 PetscInt rstart_orig=mat->rmap->rstart; 391 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 392 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 393 PetscReal tmp; 394 MatScalar **HD = baij->hd,value; 395 #if defined(PETSC_USE_DEBUG) 396 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 397 #endif 398 399 PetscFunctionBegin; 400 if (v) PetscValidScalarPointer(v,6); 401 for (i=0; i<m; i++) { 402 #if defined(PETSC_USE_DEBUG) 403 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 404 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 405 #endif 406 row = im[i]; 407 if (row >= rstart_orig && row < rend_orig) { 408 for (j=0; j<n; j++) { 409 col = in[j]; 410 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 411 /* Look up PetscInto the Hash Table */ 412 key = (row/bs)*Nbs+(col/bs)+1; 413 h1 = HASH(size,key,tmp); 414 415 416 idx = h1; 417 #if defined(PETSC_USE_DEBUG) 418 insert_ct++; 419 total_ct++; 420 if (HT[idx] != key) { 421 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 422 if (idx == size) { 423 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 424 if (idx == h1) { 425 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 426 } 427 } 428 } 429 #else 430 if (HT[idx] != key) { 431 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 432 if (idx == size) { 433 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 434 if (idx == h1) { 435 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 436 } 437 } 438 } 439 #endif 440 /* A HASH table entry is found, so insert the values at the correct address */ 441 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 442 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 443 } 444 } else { 445 if (!baij->donotstash) { 446 if (roworiented) { 447 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 448 } else { 449 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 450 } 451 } 452 } 453 } 454 #if defined(PETSC_USE_DEBUG) 455 baij->ht_total_ct = total_ct; 456 baij->ht_insert_ct = insert_ct; 457 #endif 458 PetscFunctionReturn(0); 459 } 460 461 #undef __FUNCT__ 462 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 463 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 464 { 465 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 466 PetscTruth roworiented = baij->roworiented; 467 PetscErrorCode ierr; 468 PetscInt i,j,ii,jj,row,col; 469 PetscInt rstart=baij->rstartbs; 470 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 471 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 472 PetscReal tmp; 473 MatScalar **HD = baij->hd,*baij_a; 474 const PetscScalar *v_t,*value; 475 #if defined(PETSC_USE_DEBUG) 476 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 477 #endif 478 479 PetscFunctionBegin; 480 481 if (roworiented) { 482 stepval = (n-1)*bs; 483 } else { 484 stepval = (m-1)*bs; 485 } 486 for (i=0; i<m; i++) { 487 #if defined(PETSC_USE_DEBUG) 488 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 489 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 490 #endif 491 row = im[i]; 492 v_t = v + i*nbs2; 493 if (row >= rstart && row < rend) { 494 for (j=0; j<n; j++) { 495 col = in[j]; 496 497 /* Look up into the Hash Table */ 498 key = row*Nbs+col+1; 499 h1 = HASH(size,key,tmp); 500 501 idx = h1; 502 #if defined(PETSC_USE_DEBUG) 503 total_ct++; 504 insert_ct++; 505 if (HT[idx] != key) { 506 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 507 if (idx == size) { 508 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 509 if (idx == h1) { 510 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 511 } 512 } 513 } 514 #else 515 if (HT[idx] != key) { 516 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 517 if (idx == size) { 518 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 519 if (idx == h1) { 520 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 521 } 522 } 523 } 524 #endif 525 baij_a = HD[idx]; 526 if (roworiented) { 527 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 528 /* value = v + (i*(stepval+bs)+j)*bs; */ 529 value = v_t; 530 v_t += bs; 531 if (addv == ADD_VALUES) { 532 for (ii=0; ii<bs; ii++,value+=stepval) { 533 for (jj=ii; jj<bs2; jj+=bs) { 534 baij_a[jj] += *value++; 535 } 536 } 537 } else { 538 for (ii=0; ii<bs; ii++,value+=stepval) { 539 for (jj=ii; jj<bs2; jj+=bs) { 540 baij_a[jj] = *value++; 541 } 542 } 543 } 544 } else { 545 value = v + j*(stepval+bs)*bs + i*bs; 546 if (addv == ADD_VALUES) { 547 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 548 for (jj=0; jj<bs; jj++) { 549 baij_a[jj] += *value++; 550 } 551 } 552 } else { 553 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 554 for (jj=0; jj<bs; jj++) { 555 baij_a[jj] = *value++; 556 } 557 } 558 } 559 } 560 } 561 } else { 562 if (!baij->donotstash) { 563 if (roworiented) { 564 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 565 } else { 566 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 567 } 568 } 569 } 570 } 571 #if defined(PETSC_USE_DEBUG) 572 baij->ht_total_ct = total_ct; 573 baij->ht_insert_ct = insert_ct; 574 #endif 575 PetscFunctionReturn(0); 576 } 577 578 #undef __FUNCT__ 579 #define __FUNCT__ "MatGetValues_MPIBAIJ" 580 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 581 { 582 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 583 PetscErrorCode ierr; 584 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 585 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 586 587 PetscFunctionBegin; 588 for (i=0; i<m; i++) { 589 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 590 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 591 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 592 row = idxm[i] - bsrstart; 593 for (j=0; j<n; j++) { 594 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 595 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 596 if (idxn[j] >= bscstart && idxn[j] < bscend){ 597 col = idxn[j] - bscstart; 598 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 599 } else { 600 if (!baij->colmap) { 601 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 602 } 603 #if defined (PETSC_USE_CTABLE) 604 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 605 data --; 606 #else 607 data = baij->colmap[idxn[j]/bs]-1; 608 #endif 609 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 610 else { 611 col = data + idxn[j]%bs; 612 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 613 } 614 } 615 } 616 } else { 617 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 618 } 619 } 620 PetscFunctionReturn(0); 621 } 622 623 #undef __FUNCT__ 624 #define __FUNCT__ "MatNorm_MPIBAIJ" 625 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 626 { 627 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 628 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 629 PetscErrorCode ierr; 630 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 631 PetscReal sum = 0.0; 632 MatScalar *v; 633 634 PetscFunctionBegin; 635 if (baij->size == 1) { 636 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 637 } else { 638 if (type == NORM_FROBENIUS) { 639 v = amat->a; 640 nz = amat->nz*bs2; 641 for (i=0; i<nz; i++) { 642 #if defined(PETSC_USE_COMPLEX) 643 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 644 #else 645 sum += (*v)*(*v); v++; 646 #endif 647 } 648 v = bmat->a; 649 nz = bmat->nz*bs2; 650 for (i=0; i<nz; i++) { 651 #if defined(PETSC_USE_COMPLEX) 652 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 653 #else 654 sum += (*v)*(*v); v++; 655 #endif 656 } 657 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 658 *nrm = sqrt(*nrm); 659 } else if (type == NORM_1) { /* max column sum */ 660 PetscReal *tmp,*tmp2; 661 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 662 ierr = PetscMalloc((2*mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 663 tmp2 = tmp + mat->cmap->N; 664 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 665 v = amat->a; jj = amat->j; 666 for (i=0; i<amat->nz; i++) { 667 for (j=0; j<bs; j++){ 668 col = bs*(cstart + *jj) + j; /* column index */ 669 for (row=0; row<bs; row++){ 670 tmp[col] += PetscAbsScalar(*v); v++; 671 } 672 } 673 jj++; 674 } 675 v = bmat->a; jj = bmat->j; 676 for (i=0; i<bmat->nz; i++) { 677 for (j=0; j<bs; j++){ 678 col = bs*garray[*jj] + j; 679 for (row=0; row<bs; row++){ 680 tmp[col] += PetscAbsScalar(*v); v++; 681 } 682 } 683 jj++; 684 } 685 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 686 *nrm = 0.0; 687 for (j=0; j<mat->cmap->N; j++) { 688 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 689 } 690 ierr = PetscFree(tmp);CHKERRQ(ierr); 691 } else if (type == NORM_INFINITY) { /* max row sum */ 692 PetscReal *sums; 693 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr) 694 sum = 0.0; 695 for (j=0; j<amat->mbs; j++) { 696 for (row=0; row<bs; row++) sums[row] = 0.0; 697 v = amat->a + bs2*amat->i[j]; 698 nz = amat->i[j+1]-amat->i[j]; 699 for (i=0; i<nz; i++) { 700 for (col=0; col<bs; col++){ 701 for (row=0; row<bs; row++){ 702 sums[row] += PetscAbsScalar(*v); v++; 703 } 704 } 705 } 706 v = bmat->a + bs2*bmat->i[j]; 707 nz = bmat->i[j+1]-bmat->i[j]; 708 for (i=0; i<nz; i++) { 709 for (col=0; col<bs; col++){ 710 for (row=0; row<bs; row++){ 711 sums[row] += PetscAbsScalar(*v); v++; 712 } 713 } 714 } 715 for (row=0; row<bs; row++){ 716 if (sums[row] > sum) sum = sums[row]; 717 } 718 } 719 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 720 ierr = PetscFree(sums);CHKERRQ(ierr); 721 } else { 722 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 723 } 724 } 725 PetscFunctionReturn(0); 726 } 727 728 /* 729 Creates the hash table, and sets the table 730 This table is created only once. 731 If new entried need to be added to the matrix 732 then the hash table has to be destroyed and 733 recreated. 734 */ 735 #undef __FUNCT__ 736 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 737 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 738 { 739 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 740 Mat A = baij->A,B=baij->B; 741 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 742 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 743 PetscErrorCode ierr; 744 PetscInt size,bs2=baij->bs2,rstart=baij->rstartbs; 745 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 746 PetscInt *HT,key; 747 MatScalar **HD; 748 PetscReal tmp; 749 #if defined(PETSC_USE_INFO) 750 PetscInt ct=0,max=0; 751 #endif 752 753 PetscFunctionBegin; 754 baij->ht_size=(PetscInt)(factor*nz); 755 size = baij->ht_size; 756 757 if (baij->ht) { 758 PetscFunctionReturn(0); 759 } 760 761 /* Allocate Memory for Hash Table */ 762 ierr = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 763 baij->ht = (PetscInt*)(baij->hd + size); 764 HD = baij->hd; 765 HT = baij->ht; 766 767 768 ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr); 769 770 771 /* Loop Over A */ 772 for (i=0; i<a->mbs; i++) { 773 for (j=ai[i]; j<ai[i+1]; j++) { 774 row = i+rstart; 775 col = aj[j]+cstart; 776 777 key = row*Nbs + col + 1; 778 h1 = HASH(size,key,tmp); 779 for (k=0; k<size; k++){ 780 if (!HT[(h1+k)%size]) { 781 HT[(h1+k)%size] = key; 782 HD[(h1+k)%size] = a->a + j*bs2; 783 break; 784 #if defined(PETSC_USE_INFO) 785 } else { 786 ct++; 787 #endif 788 } 789 } 790 #if defined(PETSC_USE_INFO) 791 if (k> max) max = k; 792 #endif 793 } 794 } 795 /* Loop Over B */ 796 for (i=0; i<b->mbs; i++) { 797 for (j=bi[i]; j<bi[i+1]; j++) { 798 row = i+rstart; 799 col = garray[bj[j]]; 800 key = row*Nbs + col + 1; 801 h1 = HASH(size,key,tmp); 802 for (k=0; k<size; k++){ 803 if (!HT[(h1+k)%size]) { 804 HT[(h1+k)%size] = key; 805 HD[(h1+k)%size] = b->a + j*bs2; 806 break; 807 #if defined(PETSC_USE_INFO) 808 } else { 809 ct++; 810 #endif 811 } 812 } 813 #if defined(PETSC_USE_INFO) 814 if (k> max) max = k; 815 #endif 816 } 817 } 818 819 /* Print Summary */ 820 #if defined(PETSC_USE_INFO) 821 for (i=0,j=0; i<size; i++) { 822 if (HT[i]) {j++;} 823 } 824 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 825 #endif 826 PetscFunctionReturn(0); 827 } 828 829 #undef __FUNCT__ 830 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 831 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 832 { 833 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 834 PetscErrorCode ierr; 835 PetscInt nstash,reallocs; 836 InsertMode addv; 837 838 PetscFunctionBegin; 839 if (baij->donotstash) { 840 PetscFunctionReturn(0); 841 } 842 843 /* make sure all processors are either in INSERTMODE or ADDMODE */ 844 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 845 if (addv == (ADD_VALUES|INSERT_VALUES)) { 846 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 847 } 848 mat->insertmode = addv; /* in case this processor had no cache */ 849 850 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 851 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 852 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 853 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 854 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 855 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 856 PetscFunctionReturn(0); 857 } 858 859 #undef __FUNCT__ 860 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 861 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 862 { 863 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 864 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 865 PetscErrorCode ierr; 866 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 867 PetscInt *row,*col; 868 PetscTruth r1,r2,r3,other_disassembled; 869 MatScalar *val; 870 InsertMode addv = mat->insertmode; 871 PetscMPIInt n; 872 873 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 874 PetscFunctionBegin; 875 if (!baij->donotstash) { 876 while (1) { 877 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 878 if (!flg) break; 879 880 for (i=0; i<n;) { 881 /* Now identify the consecutive vals belonging to the same row */ 882 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 883 if (j < n) ncols = j-i; 884 else ncols = n-i; 885 /* Now assemble all these values with a single function call */ 886 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 887 i = j; 888 } 889 } 890 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 891 /* Now process the block-stash. Since the values are stashed column-oriented, 892 set the roworiented flag to column oriented, and after MatSetValues() 893 restore the original flags */ 894 r1 = baij->roworiented; 895 r2 = a->roworiented; 896 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 897 baij->roworiented = PETSC_FALSE; 898 a->roworiented = PETSC_FALSE; 899 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 900 while (1) { 901 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 902 if (!flg) break; 903 904 for (i=0; i<n;) { 905 /* Now identify the consecutive vals belonging to the same row */ 906 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 907 if (j < n) ncols = j-i; 908 else ncols = n-i; 909 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 910 i = j; 911 } 912 } 913 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 914 baij->roworiented = r1; 915 a->roworiented = r2; 916 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 917 } 918 919 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 920 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 921 922 /* determine if any processor has disassembled, if so we must 923 also disassemble ourselfs, in order that we may reassemble. */ 924 /* 925 if nonzero structure of submatrix B cannot change then we know that 926 no processor disassembled thus we can skip this stuff 927 */ 928 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 929 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 930 if (mat->was_assembled && !other_disassembled) { 931 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 932 } 933 } 934 935 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 936 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 937 } 938 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 939 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 940 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 941 942 #if defined(PETSC_USE_INFO) 943 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 944 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 945 baij->ht_total_ct = 0; 946 baij->ht_insert_ct = 0; 947 } 948 #endif 949 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 950 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 951 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 952 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 953 } 954 955 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 956 baij->rowvalues = 0; 957 PetscFunctionReturn(0); 958 } 959 960 #undef __FUNCT__ 961 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 962 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 963 { 964 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 965 PetscErrorCode ierr; 966 PetscMPIInt size = baij->size,rank = baij->rank; 967 PetscInt bs = mat->rmap->bs; 968 PetscTruth iascii,isdraw; 969 PetscViewer sviewer; 970 PetscViewerFormat format; 971 972 PetscFunctionBegin; 973 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 974 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 975 if (iascii) { 976 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 977 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 978 MatInfo info; 979 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 980 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 981 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 982 rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 983 mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 984 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 985 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 986 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 987 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 988 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 989 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 990 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 991 PetscFunctionReturn(0); 992 } else if (format == PETSC_VIEWER_ASCII_INFO) { 993 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 994 PetscFunctionReturn(0); 995 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 996 PetscFunctionReturn(0); 997 } 998 } 999 1000 if (isdraw) { 1001 PetscDraw draw; 1002 PetscTruth isnull; 1003 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1004 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1005 } 1006 1007 if (size == 1) { 1008 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1009 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1010 } else { 1011 /* assemble the entire matrix onto first processor. */ 1012 Mat A; 1013 Mat_SeqBAIJ *Aloc; 1014 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1015 MatScalar *a; 1016 1017 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1018 /* Perhaps this should be the type of mat? */ 1019 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1020 if (!rank) { 1021 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1022 } else { 1023 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1024 } 1025 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1026 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1027 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1028 1029 /* copy over the A part */ 1030 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1031 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1032 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1033 1034 for (i=0; i<mbs; i++) { 1035 rvals[0] = bs*(baij->rstartbs + i); 1036 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1037 for (j=ai[i]; j<ai[i+1]; j++) { 1038 col = (baij->cstartbs+aj[j])*bs; 1039 for (k=0; k<bs; k++) { 1040 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1041 col++; a += bs; 1042 } 1043 } 1044 } 1045 /* copy over the B part */ 1046 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1047 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1048 for (i=0; i<mbs; i++) { 1049 rvals[0] = bs*(baij->rstartbs + i); 1050 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1051 for (j=ai[i]; j<ai[i+1]; j++) { 1052 col = baij->garray[aj[j]]*bs; 1053 for (k=0; k<bs; k++) { 1054 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1055 col++; a += bs; 1056 } 1057 } 1058 } 1059 ierr = PetscFree(rvals);CHKERRQ(ierr); 1060 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1061 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1062 /* 1063 Everyone has to call to draw the matrix since the graphics waits are 1064 synchronized across all processors that share the PetscDraw object 1065 */ 1066 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1067 if (!rank) { 1068 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1069 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1070 } 1071 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1072 ierr = MatDestroy(A);CHKERRQ(ierr); 1073 } 1074 PetscFunctionReturn(0); 1075 } 1076 1077 #undef __FUNCT__ 1078 #define __FUNCT__ "MatView_MPIBAIJ" 1079 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1080 { 1081 PetscErrorCode ierr; 1082 PetscTruth iascii,isdraw,issocket,isbinary; 1083 1084 PetscFunctionBegin; 1085 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1086 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1087 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1088 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1089 if (iascii || isdraw || issocket || isbinary) { 1090 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1091 } else { 1092 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1093 } 1094 PetscFunctionReturn(0); 1095 } 1096 1097 #undef __FUNCT__ 1098 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1099 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1100 { 1101 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1102 PetscErrorCode ierr; 1103 1104 PetscFunctionBegin; 1105 #if defined(PETSC_USE_LOG) 1106 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1107 #endif 1108 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1109 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1110 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1111 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1112 #if defined (PETSC_USE_CTABLE) 1113 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 1114 #else 1115 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1116 #endif 1117 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1118 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1119 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1120 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1121 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1122 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1123 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1124 ierr = PetscFree(baij);CHKERRQ(ierr); 1125 1126 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1127 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1128 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1129 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1130 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1131 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1132 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1133 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1134 PetscFunctionReturn(0); 1135 } 1136 1137 #undef __FUNCT__ 1138 #define __FUNCT__ "MatMult_MPIBAIJ" 1139 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1140 { 1141 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1142 PetscErrorCode ierr; 1143 PetscInt nt; 1144 1145 PetscFunctionBegin; 1146 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1147 if (nt != A->cmap->n) { 1148 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1149 } 1150 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1151 if (nt != A->rmap->n) { 1152 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1153 } 1154 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1155 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1156 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1157 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1158 PetscFunctionReturn(0); 1159 } 1160 1161 #undef __FUNCT__ 1162 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1163 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1164 { 1165 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1166 PetscErrorCode ierr; 1167 1168 PetscFunctionBegin; 1169 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1170 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1171 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1172 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1178 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1179 { 1180 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1181 PetscErrorCode ierr; 1182 PetscTruth merged; 1183 1184 PetscFunctionBegin; 1185 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1186 /* do nondiagonal part */ 1187 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1188 if (!merged) { 1189 /* send it on its way */ 1190 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1191 /* do local part */ 1192 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1193 /* receive remote parts: note this assumes the values are not actually */ 1194 /* inserted in yy until the next line */ 1195 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1196 } else { 1197 /* do local part */ 1198 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1199 /* send it on its way */ 1200 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1201 /* values actually were received in the Begin() but we need to call this nop */ 1202 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1203 } 1204 PetscFunctionReturn(0); 1205 } 1206 1207 #undef __FUNCT__ 1208 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1209 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1210 { 1211 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1212 PetscErrorCode ierr; 1213 1214 PetscFunctionBegin; 1215 /* do nondiagonal part */ 1216 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1217 /* send it on its way */ 1218 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1219 /* do local part */ 1220 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1221 /* receive remote parts: note this assumes the values are not actually */ 1222 /* inserted in yy until the next line, which is true for my implementation*/ 1223 /* but is not perhaps always true. */ 1224 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 /* 1229 This only works correctly for square matrices where the subblock A->A is the 1230 diagonal block 1231 */ 1232 #undef __FUNCT__ 1233 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1234 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1235 { 1236 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1237 PetscErrorCode ierr; 1238 1239 PetscFunctionBegin; 1240 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1241 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1242 PetscFunctionReturn(0); 1243 } 1244 1245 #undef __FUNCT__ 1246 #define __FUNCT__ "MatScale_MPIBAIJ" 1247 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1248 { 1249 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1250 PetscErrorCode ierr; 1251 1252 PetscFunctionBegin; 1253 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1254 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1260 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1261 { 1262 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1263 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1264 PetscErrorCode ierr; 1265 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1266 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1267 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1268 1269 PetscFunctionBegin; 1270 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1271 mat->getrowactive = PETSC_TRUE; 1272 1273 if (!mat->rowvalues && (idx || v)) { 1274 /* 1275 allocate enough space to hold information from the longest row. 1276 */ 1277 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1278 PetscInt max = 1,mbs = mat->mbs,tmp; 1279 for (i=0; i<mbs; i++) { 1280 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1281 if (max < tmp) { max = tmp; } 1282 } 1283 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1284 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1285 } 1286 1287 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1288 lrow = row - brstart; 1289 1290 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1291 if (!v) {pvA = 0; pvB = 0;} 1292 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1293 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1294 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1295 nztot = nzA + nzB; 1296 1297 cmap = mat->garray; 1298 if (v || idx) { 1299 if (nztot) { 1300 /* Sort by increasing column numbers, assuming A and B already sorted */ 1301 PetscInt imark = -1; 1302 if (v) { 1303 *v = v_p = mat->rowvalues; 1304 for (i=0; i<nzB; i++) { 1305 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1306 else break; 1307 } 1308 imark = i; 1309 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1310 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1311 } 1312 if (idx) { 1313 *idx = idx_p = mat->rowindices; 1314 if (imark > -1) { 1315 for (i=0; i<imark; i++) { 1316 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1317 } 1318 } else { 1319 for (i=0; i<nzB; i++) { 1320 if (cmap[cworkB[i]/bs] < cstart) 1321 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1322 else break; 1323 } 1324 imark = i; 1325 } 1326 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1327 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1328 } 1329 } else { 1330 if (idx) *idx = 0; 1331 if (v) *v = 0; 1332 } 1333 } 1334 *nz = nztot; 1335 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1336 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1337 PetscFunctionReturn(0); 1338 } 1339 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1342 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1343 { 1344 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1345 1346 PetscFunctionBegin; 1347 if (!baij->getrowactive) { 1348 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1349 } 1350 baij->getrowactive = PETSC_FALSE; 1351 PetscFunctionReturn(0); 1352 } 1353 1354 #undef __FUNCT__ 1355 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1356 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1357 { 1358 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1359 PetscErrorCode ierr; 1360 1361 PetscFunctionBegin; 1362 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1363 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1364 PetscFunctionReturn(0); 1365 } 1366 1367 #undef __FUNCT__ 1368 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1369 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1370 { 1371 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1372 Mat A = a->A,B = a->B; 1373 PetscErrorCode ierr; 1374 PetscReal isend[5],irecv[5]; 1375 1376 PetscFunctionBegin; 1377 info->block_size = (PetscReal)matin->rmap->bs; 1378 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1379 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1380 isend[3] = info->memory; isend[4] = info->mallocs; 1381 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1382 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1383 isend[3] += info->memory; isend[4] += info->mallocs; 1384 if (flag == MAT_LOCAL) { 1385 info->nz_used = isend[0]; 1386 info->nz_allocated = isend[1]; 1387 info->nz_unneeded = isend[2]; 1388 info->memory = isend[3]; 1389 info->mallocs = isend[4]; 1390 } else if (flag == MAT_GLOBAL_MAX) { 1391 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1392 info->nz_used = irecv[0]; 1393 info->nz_allocated = irecv[1]; 1394 info->nz_unneeded = irecv[2]; 1395 info->memory = irecv[3]; 1396 info->mallocs = irecv[4]; 1397 } else if (flag == MAT_GLOBAL_SUM) { 1398 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1399 info->nz_used = irecv[0]; 1400 info->nz_allocated = irecv[1]; 1401 info->nz_unneeded = irecv[2]; 1402 info->memory = irecv[3]; 1403 info->mallocs = irecv[4]; 1404 } else { 1405 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1406 } 1407 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1408 info->fill_ratio_needed = 0; 1409 info->factor_mallocs = 0; 1410 PetscFunctionReturn(0); 1411 } 1412 1413 #undef __FUNCT__ 1414 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1415 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg) 1416 { 1417 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1418 PetscErrorCode ierr; 1419 1420 PetscFunctionBegin; 1421 switch (op) { 1422 case MAT_NEW_NONZERO_LOCATIONS: 1423 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1424 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1425 case MAT_KEEP_NONZERO_PATTERN: 1426 case MAT_NEW_NONZERO_LOCATION_ERR: 1427 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1428 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1429 break; 1430 case MAT_ROW_ORIENTED: 1431 a->roworiented = flg; 1432 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1433 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1434 break; 1435 case MAT_NEW_DIAGONALS: 1436 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1437 break; 1438 case MAT_IGNORE_OFF_PROC_ENTRIES: 1439 a->donotstash = flg; 1440 break; 1441 case MAT_USE_HASH_TABLE: 1442 a->ht_flag = flg; 1443 break; 1444 case MAT_SYMMETRIC: 1445 case MAT_STRUCTURALLY_SYMMETRIC: 1446 case MAT_HERMITIAN: 1447 case MAT_SYMMETRY_ETERNAL: 1448 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1449 break; 1450 default: 1451 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1452 } 1453 PetscFunctionReturn(0); 1454 } 1455 1456 #undef __FUNCT__ 1457 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1458 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1459 { 1460 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1461 Mat_SeqBAIJ *Aloc; 1462 Mat B; 1463 PetscErrorCode ierr; 1464 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1465 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1466 MatScalar *a; 1467 1468 PetscFunctionBegin; 1469 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1470 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1471 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1472 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1473 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1474 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1475 } else { 1476 B = *matout; 1477 } 1478 1479 /* copy over the A part */ 1480 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1481 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1482 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1483 1484 for (i=0; i<mbs; i++) { 1485 rvals[0] = bs*(baij->rstartbs + i); 1486 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1487 for (j=ai[i]; j<ai[i+1]; j++) { 1488 col = (baij->cstartbs+aj[j])*bs; 1489 for (k=0; k<bs; k++) { 1490 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1491 col++; a += bs; 1492 } 1493 } 1494 } 1495 /* copy over the B part */ 1496 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1497 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1498 for (i=0; i<mbs; i++) { 1499 rvals[0] = bs*(baij->rstartbs + i); 1500 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1501 for (j=ai[i]; j<ai[i+1]; j++) { 1502 col = baij->garray[aj[j]]*bs; 1503 for (k=0; k<bs; k++) { 1504 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1505 col++; a += bs; 1506 } 1507 } 1508 } 1509 ierr = PetscFree(rvals);CHKERRQ(ierr); 1510 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1511 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1512 1513 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1514 *matout = B; 1515 } else { 1516 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1517 } 1518 PetscFunctionReturn(0); 1519 } 1520 1521 #undef __FUNCT__ 1522 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1523 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1524 { 1525 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1526 Mat a = baij->A,b = baij->B; 1527 PetscErrorCode ierr; 1528 PetscInt s1,s2,s3; 1529 1530 PetscFunctionBegin; 1531 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1532 if (rr) { 1533 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1534 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1535 /* Overlap communication with computation. */ 1536 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1537 } 1538 if (ll) { 1539 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1540 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1541 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1542 } 1543 /* scale the diagonal block */ 1544 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1545 1546 if (rr) { 1547 /* Do a scatter end and then right scale the off-diagonal block */ 1548 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1549 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1550 } 1551 1552 PetscFunctionReturn(0); 1553 } 1554 1555 #undef __FUNCT__ 1556 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1557 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 1558 { 1559 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1560 PetscErrorCode ierr; 1561 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1562 PetscInt i,*owners = A->rmap->range; 1563 PetscInt *nprocs,j,idx,nsends,row; 1564 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1565 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1566 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1567 MPI_Comm comm = ((PetscObject)A)->comm; 1568 MPI_Request *send_waits,*recv_waits; 1569 MPI_Status recv_status,*send_status; 1570 #if defined(PETSC_DEBUG) 1571 PetscTruth found = PETSC_FALSE; 1572 #endif 1573 1574 PetscFunctionBegin; 1575 /* first count number of contributors to each processor */ 1576 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1577 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1578 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1579 j = 0; 1580 for (i=0; i<N; i++) { 1581 if (lastidx > (idx = rows[i])) j = 0; 1582 lastidx = idx; 1583 for (; j<size; j++) { 1584 if (idx >= owners[j] && idx < owners[j+1]) { 1585 nprocs[2*j]++; 1586 nprocs[2*j+1] = 1; 1587 owner[i] = j; 1588 #if defined(PETSC_DEBUG) 1589 found = PETSC_TRUE; 1590 #endif 1591 break; 1592 } 1593 } 1594 #if defined(PETSC_DEBUG) 1595 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1596 found = PETSC_FALSE; 1597 #endif 1598 } 1599 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1600 1601 /* inform other processors of number of messages and max length*/ 1602 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1603 1604 /* post receives: */ 1605 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1606 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1607 for (i=0; i<nrecvs; i++) { 1608 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1609 } 1610 1611 /* do sends: 1612 1) starts[i] gives the starting index in svalues for stuff going to 1613 the ith processor 1614 */ 1615 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1616 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1617 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1618 starts[0] = 0; 1619 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1620 for (i=0; i<N; i++) { 1621 svalues[starts[owner[i]]++] = rows[i]; 1622 } 1623 1624 starts[0] = 0; 1625 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1626 count = 0; 1627 for (i=0; i<size; i++) { 1628 if (nprocs[2*i+1]) { 1629 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1630 } 1631 } 1632 ierr = PetscFree(starts);CHKERRQ(ierr); 1633 1634 base = owners[rank]; 1635 1636 /* wait on receives */ 1637 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 1638 source = lens + nrecvs; 1639 count = nrecvs; slen = 0; 1640 while (count) { 1641 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1642 /* unpack receives into our local space */ 1643 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1644 source[imdex] = recv_status.MPI_SOURCE; 1645 lens[imdex] = n; 1646 slen += n; 1647 count--; 1648 } 1649 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1650 1651 /* move the data into the send scatter */ 1652 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1653 count = 0; 1654 for (i=0; i<nrecvs; i++) { 1655 values = rvalues + i*nmax; 1656 for (j=0; j<lens[i]; j++) { 1657 lrows[count++] = values[j] - base; 1658 } 1659 } 1660 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1661 ierr = PetscFree(lens);CHKERRQ(ierr); 1662 ierr = PetscFree(owner);CHKERRQ(ierr); 1663 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1664 1665 /* actually zap the local rows */ 1666 /* 1667 Zero the required rows. If the "diagonal block" of the matrix 1668 is square and the user wishes to set the diagonal we use separate 1669 code so that MatSetValues() is not called for each diagonal allocating 1670 new memory, thus calling lots of mallocs and slowing things down. 1671 1672 */ 1673 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1674 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1675 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1676 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1677 } else if (diag != 0.0) { 1678 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1679 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1680 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1681 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1682 } 1683 for (i=0; i<slen; i++) { 1684 row = lrows[i] + rstart_bs; 1685 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1686 } 1687 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1688 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1689 } else { 1690 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1691 } 1692 1693 ierr = PetscFree(lrows);CHKERRQ(ierr); 1694 1695 /* wait on sends */ 1696 if (nsends) { 1697 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1698 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1699 ierr = PetscFree(send_status);CHKERRQ(ierr); 1700 } 1701 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1702 ierr = PetscFree(svalues);CHKERRQ(ierr); 1703 1704 PetscFunctionReturn(0); 1705 } 1706 1707 #undef __FUNCT__ 1708 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1709 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1710 { 1711 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1712 PetscErrorCode ierr; 1713 1714 PetscFunctionBegin; 1715 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1716 PetscFunctionReturn(0); 1717 } 1718 1719 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1720 1721 #undef __FUNCT__ 1722 #define __FUNCT__ "MatEqual_MPIBAIJ" 1723 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1724 { 1725 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1726 Mat a,b,c,d; 1727 PetscTruth flg; 1728 PetscErrorCode ierr; 1729 1730 PetscFunctionBegin; 1731 a = matA->A; b = matA->B; 1732 c = matB->A; d = matB->B; 1733 1734 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1735 if (flg) { 1736 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1737 } 1738 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1739 PetscFunctionReturn(0); 1740 } 1741 1742 #undef __FUNCT__ 1743 #define __FUNCT__ "MatCopy_MPIBAIJ" 1744 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1745 { 1746 PetscErrorCode ierr; 1747 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1748 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1749 1750 PetscFunctionBegin; 1751 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1752 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1753 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1754 } else { 1755 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1756 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1757 } 1758 PetscFunctionReturn(0); 1759 } 1760 1761 #undef __FUNCT__ 1762 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1763 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1764 { 1765 PetscErrorCode ierr; 1766 1767 PetscFunctionBegin; 1768 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1769 PetscFunctionReturn(0); 1770 } 1771 1772 #include "petscblaslapack.h" 1773 #undef __FUNCT__ 1774 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1775 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1776 { 1777 PetscErrorCode ierr; 1778 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1779 PetscBLASInt bnz,one=1; 1780 Mat_SeqBAIJ *x,*y; 1781 1782 PetscFunctionBegin; 1783 if (str == SAME_NONZERO_PATTERN) { 1784 PetscScalar alpha = a; 1785 x = (Mat_SeqBAIJ *)xx->A->data; 1786 y = (Mat_SeqBAIJ *)yy->A->data; 1787 bnz = PetscBLASIntCast(x->nz); 1788 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1789 x = (Mat_SeqBAIJ *)xx->B->data; 1790 y = (Mat_SeqBAIJ *)yy->B->data; 1791 bnz = PetscBLASIntCast(x->nz); 1792 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1793 } else { 1794 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1795 } 1796 PetscFunctionReturn(0); 1797 } 1798 1799 #undef __FUNCT__ 1800 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ" 1801 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs) 1802 { 1803 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1804 PetscErrorCode ierr; 1805 1806 PetscFunctionBegin; 1807 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1808 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1809 PetscFunctionReturn(0); 1810 } 1811 1812 #undef __FUNCT__ 1813 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1814 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1815 { 1816 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1817 PetscErrorCode ierr; 1818 1819 PetscFunctionBegin; 1820 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1821 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1822 PetscFunctionReturn(0); 1823 } 1824 1825 #undef __FUNCT__ 1826 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1827 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1828 { 1829 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1830 PetscErrorCode ierr; 1831 1832 PetscFunctionBegin; 1833 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1834 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1835 PetscFunctionReturn(0); 1836 } 1837 1838 #undef __FUNCT__ 1839 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 1840 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1841 { 1842 PetscErrorCode ierr; 1843 IS iscol_local; 1844 PetscInt csize; 1845 1846 PetscFunctionBegin; 1847 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1848 if (call == MAT_REUSE_MATRIX) { 1849 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1850 if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1851 } else { 1852 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1853 } 1854 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1855 if (call == MAT_INITIAL_MATRIX) { 1856 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1857 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 1858 } 1859 PetscFunctionReturn(0); 1860 } 1861 1862 #undef __FUNCT__ 1863 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 1864 /* 1865 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1866 in local and then by concatenating the local matrices the end result. 1867 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 1868 */ 1869 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 1870 { 1871 PetscErrorCode ierr; 1872 PetscMPIInt rank,size; 1873 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 1874 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 1875 Mat *local,M,Mreuse; 1876 MatScalar *vwork,*aa; 1877 MPI_Comm comm = ((PetscObject)mat)->comm; 1878 Mat_SeqBAIJ *aij; 1879 1880 1881 PetscFunctionBegin; 1882 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1883 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1884 1885 if (call == MAT_REUSE_MATRIX) { 1886 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1887 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1888 local = &Mreuse; 1889 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1890 } else { 1891 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1892 Mreuse = *local; 1893 ierr = PetscFree(local);CHKERRQ(ierr); 1894 } 1895 1896 /* 1897 m - number of local rows 1898 n - number of columns (same on all processors) 1899 rstart - first row in new global matrix generated 1900 */ 1901 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 1902 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1903 m = m/bs; 1904 n = n/bs; 1905 1906 if (call == MAT_INITIAL_MATRIX) { 1907 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1908 ii = aij->i; 1909 jj = aij->j; 1910 1911 /* 1912 Determine the number of non-zeros in the diagonal and off-diagonal 1913 portions of the matrix in order to do correct preallocation 1914 */ 1915 1916 /* first get start and end of "diagonal" columns */ 1917 if (csize == PETSC_DECIDE) { 1918 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 1919 if (mglobal == n*bs) { /* square matrix */ 1920 nlocal = m; 1921 } else { 1922 nlocal = n/size + ((n % size) > rank); 1923 } 1924 } else { 1925 nlocal = csize/bs; 1926 } 1927 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1928 rstart = rend - nlocal; 1929 if (rank == size - 1 && rend != n) { 1930 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 1931 } 1932 1933 /* next, compute all the lengths */ 1934 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 1935 olens = dlens + m; 1936 for (i=0; i<m; i++) { 1937 jend = ii[i+1] - ii[i]; 1938 olen = 0; 1939 dlen = 0; 1940 for (j=0; j<jend; j++) { 1941 if (*jj < rstart || *jj >= rend) olen++; 1942 else dlen++; 1943 jj++; 1944 } 1945 olens[i] = olen; 1946 dlens[i] = dlen; 1947 } 1948 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 1949 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 1950 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 1951 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 1952 ierr = PetscFree(dlens);CHKERRQ(ierr); 1953 } else { 1954 PetscInt ml,nl; 1955 1956 M = *newmat; 1957 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1958 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1959 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1960 /* 1961 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1962 rather than the slower MatSetValues(). 1963 */ 1964 M->was_assembled = PETSC_TRUE; 1965 M->assembled = PETSC_FALSE; 1966 } 1967 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1968 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1969 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1970 ii = aij->i; 1971 jj = aij->j; 1972 aa = aij->a; 1973 for (i=0; i<m; i++) { 1974 row = rstart/bs + i; 1975 nz = ii[i+1] - ii[i]; 1976 cwork = jj; jj += nz; 1977 vwork = aa; aa += nz; 1978 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1979 } 1980 1981 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1982 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1983 *newmat = M; 1984 1985 /* save submatrix used in processor for next request */ 1986 if (call == MAT_INITIAL_MATRIX) { 1987 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 1988 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 1989 } 1990 1991 PetscFunctionReturn(0); 1992 } 1993 1994 #undef __FUNCT__ 1995 #define __FUNCT__ "MatPermute_MPIBAIJ" 1996 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 1997 { 1998 MPI_Comm comm,pcomm; 1999 PetscInt first,local_size,nrows; 2000 const PetscInt *rows; 2001 PetscMPIInt size; 2002 IS crowp,growp,irowp,lrowp,lcolp,icolp; 2003 PetscErrorCode ierr; 2004 2005 PetscFunctionBegin; 2006 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2007 /* make a collective version of 'rowp' */ 2008 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2009 if (pcomm==comm) { 2010 crowp = rowp; 2011 } else { 2012 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2013 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2014 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 2015 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2016 } 2017 /* collect the global row permutation and invert it */ 2018 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2019 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2020 if (pcomm!=comm) { 2021 ierr = ISDestroy(crowp);CHKERRQ(ierr); 2022 } 2023 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2024 /* get the local target indices */ 2025 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2026 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2027 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2028 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 2029 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2030 ierr = ISDestroy(irowp);CHKERRQ(ierr); 2031 /* the column permutation is so much easier; 2032 make a local version of 'colp' and invert it */ 2033 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2034 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2035 if (size==1) { 2036 lcolp = colp; 2037 } else { 2038 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2039 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2040 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 2041 } 2042 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2043 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2044 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2045 if (size>1) { 2046 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2047 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 2048 } 2049 /* now we just get the submatrix */ 2050 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2051 /* clean up */ 2052 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 2053 ierr = ISDestroy(icolp);CHKERRQ(ierr); 2054 PetscFunctionReturn(0); 2055 } 2056 2057 #undef __FUNCT__ 2058 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2059 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2060 { 2061 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2062 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2063 2064 PetscFunctionBegin; 2065 if (nghosts) { *nghosts = B->nbs;} 2066 if (ghosts) {*ghosts = baij->garray;} 2067 PetscFunctionReturn(0); 2068 } 2069 2070 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat); 2071 2072 #undef __FUNCT__ 2073 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2074 /* 2075 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2076 */ 2077 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2078 { 2079 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2080 PetscErrorCode ierr; 2081 PetscMPIInt size,*ncolsonproc,*disp,nn; 2082 PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col; 2083 const PetscInt *is; 2084 PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj; 2085 PetscInt *rowhit,M,cstart,cend,colb; 2086 PetscInt *columnsforrow,l; 2087 IS *isa; 2088 PetscTruth done,flg; 2089 ISLocalToGlobalMapping map = mat->bmapping; 2090 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2091 2092 PetscFunctionBegin; 2093 if (!mat->assembled) { 2094 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2095 } 2096 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2097 2098 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2099 2100 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2101 M = mat->rmap->n/bs; 2102 cstart = mat->cmap->rstart/bs; 2103 cend = mat->cmap->rend/bs; 2104 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2105 c->N = mat->cmap->N/bs; 2106 c->m = mat->rmap->n/bs; 2107 c->rstart = mat->rmap->rstart/bs; 2108 2109 c->ncolors = nis; 2110 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2111 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2112 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2113 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2114 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2115 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2116 2117 /* Allow access to data structures of local part of matrix */ 2118 if (!baij->colmap) { 2119 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2120 } 2121 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2122 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2123 2124 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2125 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2126 2127 for (i=0; i<nis; i++) { 2128 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2129 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2130 c->ncolumns[i] = n; 2131 if (n) { 2132 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2133 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2134 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2135 } else { 2136 c->columns[i] = 0; 2137 } 2138 2139 if (ctype == IS_COLORING_GLOBAL){ 2140 /* Determine the total (parallel) number of columns of this color */ 2141 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2142 ierr = PetscMalloc(2*size*sizeof(PetscInt*),&ncolsonproc);CHKERRQ(ierr); 2143 disp = ncolsonproc + size; 2144 2145 nn = PetscMPIIntCast(n); 2146 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2147 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2148 if (!nctot) { 2149 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2150 } 2151 2152 disp[0] = 0; 2153 for (j=1; j<size; j++) { 2154 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2155 } 2156 2157 /* Get complete list of columns for color on each processor */ 2158 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2159 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2160 ierr = PetscFree(ncolsonproc);CHKERRQ(ierr); 2161 } else if (ctype == IS_COLORING_GHOSTED){ 2162 /* Determine local number of columns of this color on this process, including ghost points */ 2163 nctot = n; 2164 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2165 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2166 } else { 2167 SETERRQ(PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2168 } 2169 2170 /* 2171 Mark all rows affect by these columns 2172 */ 2173 /* Temporary option to allow for debugging/testing */ 2174 flg = PETSC_FALSE; 2175 ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2176 if (!flg) {/*-----------------------------------------------------------------------------*/ 2177 /* crude, fast version */ 2178 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2179 /* loop over columns*/ 2180 for (j=0; j<nctot; j++) { 2181 if (ctype == IS_COLORING_GHOSTED) { 2182 col = ltog[cols[j]]; 2183 } else { 2184 col = cols[j]; 2185 } 2186 if (col >= cstart && col < cend) { 2187 /* column is in diagonal block of matrix */ 2188 rows = A_cj + A_ci[col-cstart]; 2189 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2190 } else { 2191 #if defined (PETSC_USE_CTABLE) 2192 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr) 2193 colb --; 2194 #else 2195 colb = baij->colmap[col] - 1; 2196 #endif 2197 if (colb == -1) { 2198 m = 0; 2199 } else { 2200 colb = colb/bs; 2201 rows = B_cj + B_ci[colb]; 2202 m = B_ci[colb+1] - B_ci[colb]; 2203 } 2204 } 2205 /* loop over columns marking them in rowhit */ 2206 for (k=0; k<m; k++) { 2207 rowhit[*rows++] = col + 1; 2208 } 2209 } 2210 2211 /* count the number of hits */ 2212 nrows = 0; 2213 for (j=0; j<M; j++) { 2214 if (rowhit[j]) nrows++; 2215 } 2216 c->nrows[i] = nrows; 2217 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2218 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2219 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2220 nrows = 0; 2221 for (j=0; j<M; j++) { 2222 if (rowhit[j]) { 2223 c->rows[i][nrows] = j; 2224 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2225 nrows++; 2226 } 2227 } 2228 } else {/*-------------------------------------------------------------------------------*/ 2229 /* slow version, using rowhit as a linked list */ 2230 PetscInt currentcol,fm,mfm; 2231 rowhit[M] = M; 2232 nrows = 0; 2233 /* loop over columns*/ 2234 for (j=0; j<nctot; j++) { 2235 if (ctype == IS_COLORING_GHOSTED) { 2236 col = ltog[cols[j]]; 2237 } else { 2238 col = cols[j]; 2239 } 2240 if (col >= cstart && col < cend) { 2241 /* column is in diagonal block of matrix */ 2242 rows = A_cj + A_ci[col-cstart]; 2243 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2244 } else { 2245 #if defined (PETSC_USE_CTABLE) 2246 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2247 colb --; 2248 #else 2249 colb = baij->colmap[col] - 1; 2250 #endif 2251 if (colb == -1) { 2252 m = 0; 2253 } else { 2254 colb = colb/bs; 2255 rows = B_cj + B_ci[colb]; 2256 m = B_ci[colb+1] - B_ci[colb]; 2257 } 2258 } 2259 2260 /* loop over columns marking them in rowhit */ 2261 fm = M; /* fm points to first entry in linked list */ 2262 for (k=0; k<m; k++) { 2263 currentcol = *rows++; 2264 /* is it already in the list? */ 2265 do { 2266 mfm = fm; 2267 fm = rowhit[fm]; 2268 } while (fm < currentcol); 2269 /* not in list so add it */ 2270 if (fm != currentcol) { 2271 nrows++; 2272 columnsforrow[currentcol] = col; 2273 /* next three lines insert new entry into linked list */ 2274 rowhit[mfm] = currentcol; 2275 rowhit[currentcol] = fm; 2276 fm = currentcol; 2277 /* fm points to present position in list since we know the columns are sorted */ 2278 } else { 2279 SETERRQ(PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2280 } 2281 } 2282 } 2283 c->nrows[i] = nrows; 2284 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2285 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2286 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2287 /* now store the linked list of rows into c->rows[i] */ 2288 nrows = 0; 2289 fm = rowhit[M]; 2290 do { 2291 c->rows[i][nrows] = fm; 2292 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2293 fm = rowhit[fm]; 2294 } while (fm < M); 2295 } /* ---------------------------------------------------------------------------------------*/ 2296 ierr = PetscFree(cols);CHKERRQ(ierr); 2297 } 2298 2299 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2300 /* 2301 vscale will contain the "diagonal" on processor scalings followed by the off processor 2302 */ 2303 if (ctype == IS_COLORING_GLOBAL) { 2304 PetscInt *garray; 2305 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2306 for (i=0; i<baij->B->cmap->n/bs; i++) { 2307 for (j=0; j<bs; j++) { 2308 garray[i*bs+j] = bs*baij->garray[i]+j; 2309 } 2310 } 2311 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2312 ierr = PetscFree(garray);CHKERRQ(ierr); 2313 CHKMEMQ; 2314 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2315 for (k=0; k<c->ncolors; k++) { 2316 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2317 for (l=0; l<c->nrows[k]; l++) { 2318 col = c->columnsforrow[k][l]; 2319 if (col >= cstart && col < cend) { 2320 /* column is in diagonal block of matrix */ 2321 colb = col - cstart; 2322 } else { 2323 /* column is in "off-processor" part */ 2324 #if defined (PETSC_USE_CTABLE) 2325 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2326 colb --; 2327 #else 2328 colb = baij->colmap[col] - 1; 2329 #endif 2330 colb = colb/bs; 2331 colb += cend - cstart; 2332 } 2333 c->vscaleforrow[k][l] = colb; 2334 } 2335 } 2336 } else if (ctype == IS_COLORING_GHOSTED) { 2337 /* Get gtol mapping */ 2338 PetscInt N = mat->cmap->N, *gtol; 2339 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2340 for (i=0; i<N; i++) gtol[i] = -1; 2341 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2342 2343 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2344 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2345 for (k=0; k<c->ncolors; k++) { 2346 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2347 for (l=0; l<c->nrows[k]; l++) { 2348 col = c->columnsforrow[k][l]; /* global column index */ 2349 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2350 } 2351 } 2352 ierr = PetscFree(gtol);CHKERRQ(ierr); 2353 } 2354 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2355 2356 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2357 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2358 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2359 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2360 CHKMEMQ; 2361 PetscFunctionReturn(0); 2362 } 2363 2364 #undef __FUNCT__ 2365 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ" 2366 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat) 2367 { 2368 Mat B; 2369 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2370 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2371 Mat_SeqAIJ *b; 2372 PetscErrorCode ierr; 2373 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2374 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2375 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2376 2377 PetscFunctionBegin; 2378 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2379 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2380 2381 /* ---------------------------------------------------------------- 2382 Tell every processor the number of nonzeros per row 2383 */ 2384 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2385 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2386 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2387 } 2388 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2389 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2390 displs = recvcounts + size; 2391 for (i=0; i<size; i++) { 2392 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2393 displs[i] = A->rmap->range[i]/bs; 2394 } 2395 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2396 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2397 #else 2398 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2399 #endif 2400 /* --------------------------------------------------------------- 2401 Create the sequential matrix of the same type as the local block diagonal 2402 */ 2403 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2404 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2405 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2406 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2407 b = (Mat_SeqAIJ *)B->data; 2408 2409 /*-------------------------------------------------------------------- 2410 Copy my part of matrix column indices over 2411 */ 2412 sendcount = ad->nz + bd->nz; 2413 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2414 a_jsendbuf = ad->j; 2415 b_jsendbuf = bd->j; 2416 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2417 cnt = 0; 2418 for (i=0; i<n; i++) { 2419 2420 /* put in lower diagonal portion */ 2421 m = bd->i[i+1] - bd->i[i]; 2422 while (m > 0) { 2423 /* is it above diagonal (in bd (compressed) numbering) */ 2424 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2425 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2426 m--; 2427 } 2428 2429 /* put in diagonal portion */ 2430 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2431 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2432 } 2433 2434 /* put in upper diagonal portion */ 2435 while (m-- > 0) { 2436 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2437 } 2438 } 2439 if (cnt != sendcount) SETERRQ2(PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2440 2441 /*-------------------------------------------------------------------- 2442 Gather all column indices to all processors 2443 */ 2444 for (i=0; i<size; i++) { 2445 recvcounts[i] = 0; 2446 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2447 recvcounts[i] += lens[j]; 2448 } 2449 } 2450 displs[0] = 0; 2451 for (i=1; i<size; i++) { 2452 displs[i] = displs[i-1] + recvcounts[i-1]; 2453 } 2454 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2455 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2456 #else 2457 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2458 #endif 2459 /*-------------------------------------------------------------------- 2460 Assemble the matrix into useable form (note numerical values not yet set) 2461 */ 2462 /* set the b->ilen (length of each row) values */ 2463 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2464 /* set the b->i indices */ 2465 b->i[0] = 0; 2466 for (i=1; i<=A->rmap->N/bs; i++) { 2467 b->i[i] = b->i[i-1] + lens[i-1]; 2468 } 2469 ierr = PetscFree(lens);CHKERRQ(ierr); 2470 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2471 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2472 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2473 2474 if (A->symmetric){ 2475 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2476 } else if (A->hermitian) { 2477 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2478 } else if (A->structurally_symmetric) { 2479 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2480 } 2481 *newmat = B; 2482 PetscFunctionReturn(0); 2483 } 2484 2485 #undef __FUNCT__ 2486 #define __FUNCT__ "MatSOR_MPIBAIJ" 2487 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2488 { 2489 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2490 PetscErrorCode ierr; 2491 Vec bb1 = 0; 2492 2493 PetscFunctionBegin; 2494 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2495 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2496 } 2497 2498 if (flag == SOR_APPLY_UPPER) { 2499 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2500 PetscFunctionReturn(0); 2501 } 2502 2503 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2504 if (flag & SOR_ZERO_INITIAL_GUESS) { 2505 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2506 its--; 2507 } 2508 2509 while (its--) { 2510 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2511 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2512 2513 /* update rhs: bb1 = bb - B*x */ 2514 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2515 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2516 2517 /* local sweep */ 2518 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2519 } 2520 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2521 if (flag & SOR_ZERO_INITIAL_GUESS) { 2522 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2523 its--; 2524 } 2525 while (its--) { 2526 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2527 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2528 2529 /* update rhs: bb1 = bb - B*x */ 2530 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2531 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2532 2533 /* local sweep */ 2534 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2535 } 2536 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2537 if (flag & SOR_ZERO_INITIAL_GUESS) { 2538 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2539 its--; 2540 } 2541 while (its--) { 2542 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2543 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2544 2545 /* update rhs: bb1 = bb - B*x */ 2546 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2547 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2548 2549 /* local sweep */ 2550 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2551 } 2552 } else { 2553 SETERRQ(PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2554 } 2555 2556 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 2557 PetscFunctionReturn(0); 2558 } 2559 2560 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2561 2562 2563 /* -------------------------------------------------------------------*/ 2564 static struct _MatOps MatOps_Values = { 2565 MatSetValues_MPIBAIJ, 2566 MatGetRow_MPIBAIJ, 2567 MatRestoreRow_MPIBAIJ, 2568 MatMult_MPIBAIJ, 2569 /* 4*/ MatMultAdd_MPIBAIJ, 2570 MatMultTranspose_MPIBAIJ, 2571 MatMultTransposeAdd_MPIBAIJ, 2572 0, 2573 0, 2574 0, 2575 /*10*/ 0, 2576 0, 2577 0, 2578 MatSOR_MPIBAIJ, 2579 MatTranspose_MPIBAIJ, 2580 /*15*/ MatGetInfo_MPIBAIJ, 2581 MatEqual_MPIBAIJ, 2582 MatGetDiagonal_MPIBAIJ, 2583 MatDiagonalScale_MPIBAIJ, 2584 MatNorm_MPIBAIJ, 2585 /*20*/ MatAssemblyBegin_MPIBAIJ, 2586 MatAssemblyEnd_MPIBAIJ, 2587 MatSetOption_MPIBAIJ, 2588 MatZeroEntries_MPIBAIJ, 2589 /*24*/ MatZeroRows_MPIBAIJ, 2590 0, 2591 0, 2592 0, 2593 0, 2594 /*29*/ MatSetUpPreallocation_MPIBAIJ, 2595 0, 2596 0, 2597 0, 2598 0, 2599 /*34*/ MatDuplicate_MPIBAIJ, 2600 0, 2601 0, 2602 0, 2603 0, 2604 /*39*/ MatAXPY_MPIBAIJ, 2605 MatGetSubMatrices_MPIBAIJ, 2606 MatIncreaseOverlap_MPIBAIJ, 2607 MatGetValues_MPIBAIJ, 2608 MatCopy_MPIBAIJ, 2609 /*44*/ 0, 2610 MatScale_MPIBAIJ, 2611 0, 2612 0, 2613 0, 2614 /*49*/ MatSetBlockSize_MPIBAIJ, 2615 0, 2616 0, 2617 0, 2618 0, 2619 /*54*/ MatFDColoringCreate_MPIBAIJ, 2620 0, 2621 MatSetUnfactored_MPIBAIJ, 2622 MatPermute_MPIBAIJ, 2623 MatSetValuesBlocked_MPIBAIJ, 2624 /*59*/ MatGetSubMatrix_MPIBAIJ, 2625 MatDestroy_MPIBAIJ, 2626 MatView_MPIBAIJ, 2627 0, 2628 0, 2629 /*64*/ 0, 2630 0, 2631 0, 2632 0, 2633 0, 2634 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2635 0, 2636 0, 2637 0, 2638 0, 2639 /*74*/ 0, 2640 MatFDColoringApply_BAIJ, 2641 0, 2642 0, 2643 0, 2644 /*79*/ 0, 2645 0, 2646 0, 2647 0, 2648 MatLoad_MPIBAIJ, 2649 /*84*/ 0, 2650 0, 2651 0, 2652 0, 2653 0, 2654 /*89*/ 0, 2655 0, 2656 0, 2657 0, 2658 0, 2659 /*94*/ 0, 2660 0, 2661 0, 2662 0, 2663 0, 2664 /*99*/ 0, 2665 0, 2666 0, 2667 0, 2668 0, 2669 /*104*/0, 2670 MatRealPart_MPIBAIJ, 2671 MatImaginaryPart_MPIBAIJ, 2672 0, 2673 0, 2674 /*109*/0, 2675 0, 2676 0, 2677 0, 2678 0, 2679 /*114*/MatGetSeqNonzerostructure_MPIBAIJ, 2680 0, 2681 MatGetGhosts_MPIBAIJ 2682 }; 2683 2684 EXTERN_C_BEGIN 2685 #undef __FUNCT__ 2686 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2687 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2688 { 2689 PetscFunctionBegin; 2690 *a = ((Mat_MPIBAIJ *)A->data)->A; 2691 *iscopy = PETSC_FALSE; 2692 PetscFunctionReturn(0); 2693 } 2694 EXTERN_C_END 2695 2696 EXTERN_C_BEGIN 2697 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2698 EXTERN_C_END 2699 2700 EXTERN_C_BEGIN 2701 #undef __FUNCT__ 2702 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2703 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2704 { 2705 PetscInt m,rstart,cstart,cend; 2706 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2707 const PetscInt *JJ=0; 2708 PetscScalar *values=0; 2709 PetscErrorCode ierr; 2710 2711 PetscFunctionBegin; 2712 2713 if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2714 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2715 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2716 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2717 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2718 m = B->rmap->n/bs; 2719 rstart = B->rmap->rstart/bs; 2720 cstart = B->cmap->rstart/bs; 2721 cend = B->cmap->rend/bs; 2722 2723 if (ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2724 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2725 o_nnz = d_nnz + m; 2726 for (i=0; i<m; i++) { 2727 nz = ii[i+1] - ii[i]; 2728 if (nz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2729 nz_max = PetscMax(nz_max,nz); 2730 JJ = jj + ii[i]; 2731 for (j=0; j<nz; j++) { 2732 if (*JJ >= cstart) break; 2733 JJ++; 2734 } 2735 d = 0; 2736 for (; j<nz; j++) { 2737 if (*JJ++ >= cend) break; 2738 d++; 2739 } 2740 d_nnz[i] = d; 2741 o_nnz[i] = nz - d; 2742 } 2743 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2744 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2745 2746 values = (PetscScalar*)V; 2747 if (!values) { 2748 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2749 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2750 } 2751 for (i=0; i<m; i++) { 2752 PetscInt row = i + rstart; 2753 PetscInt ncols = ii[i+1] - ii[i]; 2754 const PetscInt *icols = jj + ii[i]; 2755 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2756 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2757 } 2758 2759 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2760 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2761 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2762 2763 PetscFunctionReturn(0); 2764 } 2765 EXTERN_C_END 2766 2767 #undef __FUNCT__ 2768 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2769 /*@C 2770 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2771 (the default parallel PETSc format). 2772 2773 Collective on MPI_Comm 2774 2775 Input Parameters: 2776 + A - the matrix 2777 . i - the indices into j for the start of each local row (starts with zero) 2778 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2779 - v - optional values in the matrix 2780 2781 Level: developer 2782 2783 .keywords: matrix, aij, compressed row, sparse, parallel 2784 2785 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2786 @*/ 2787 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2788 { 2789 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2790 2791 PetscFunctionBegin; 2792 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2793 if (f) { 2794 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2795 } 2796 PetscFunctionReturn(0); 2797 } 2798 2799 EXTERN_C_BEGIN 2800 #undef __FUNCT__ 2801 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2802 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2803 { 2804 Mat_MPIBAIJ *b; 2805 PetscErrorCode ierr; 2806 PetscInt i, newbs = PetscAbs(bs); 2807 2808 PetscFunctionBegin; 2809 if (bs < 0) { 2810 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2811 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2812 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2813 bs = PetscAbs(bs); 2814 } 2815 if ((d_nnz || o_nnz) && newbs != bs) { 2816 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 2817 } 2818 bs = newbs; 2819 2820 2821 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2822 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2823 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2824 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2825 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2826 2827 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2828 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2829 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2830 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2831 2832 if (d_nnz) { 2833 for (i=0; i<B->rmap->n/bs; i++) { 2834 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2835 } 2836 } 2837 if (o_nnz) { 2838 for (i=0; i<B->rmap->n/bs; i++) { 2839 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2840 } 2841 } 2842 2843 b = (Mat_MPIBAIJ*)B->data; 2844 b->bs2 = bs*bs; 2845 b->mbs = B->rmap->n/bs; 2846 b->nbs = B->cmap->n/bs; 2847 b->Mbs = B->rmap->N/bs; 2848 b->Nbs = B->cmap->N/bs; 2849 2850 for (i=0; i<=b->size; i++) { 2851 b->rangebs[i] = B->rmap->range[i]/bs; 2852 } 2853 b->rstartbs = B->rmap->rstart/bs; 2854 b->rendbs = B->rmap->rend/bs; 2855 b->cstartbs = B->cmap->rstart/bs; 2856 b->cendbs = B->cmap->rend/bs; 2857 2858 if (!B->preallocated) { 2859 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2860 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2861 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2862 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2863 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2864 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2865 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2866 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2867 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 2868 } 2869 2870 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2871 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2872 B->preallocated = PETSC_TRUE; 2873 PetscFunctionReturn(0); 2874 } 2875 EXTERN_C_END 2876 2877 EXTERN_C_BEGIN 2878 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2879 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2880 EXTERN_C_END 2881 2882 2883 EXTERN_C_BEGIN 2884 #undef __FUNCT__ 2885 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 2886 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj) 2887 { 2888 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2889 PetscErrorCode ierr; 2890 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2891 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2892 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2893 2894 PetscFunctionBegin; 2895 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 2896 ii[0] = 0; 2897 CHKMEMQ; 2898 for (i=0; i<M; i++) { 2899 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2900 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2901 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2902 /* remove one from count of matrix has diagonal */ 2903 for (j=id[i]; j<id[i+1]; j++) { 2904 if (jd[j] == i) {ii[i+1]--;break;} 2905 } 2906 CHKMEMQ; 2907 } 2908 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2909 cnt = 0; 2910 for (i=0; i<M; i++) { 2911 for (j=io[i]; j<io[i+1]; j++) { 2912 if (garray[jo[j]] > rstart) break; 2913 jj[cnt++] = garray[jo[j]]; 2914 CHKMEMQ; 2915 } 2916 for (k=id[i]; k<id[i+1]; k++) { 2917 if (jd[k] != i) { 2918 jj[cnt++] = rstart + jd[k]; 2919 CHKMEMQ; 2920 } 2921 } 2922 for (;j<io[i+1]; j++) { 2923 jj[cnt++] = garray[jo[j]]; 2924 CHKMEMQ; 2925 } 2926 } 2927 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 2928 PetscFunctionReturn(0); 2929 } 2930 EXTERN_C_END 2931 2932 /*MC 2933 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2934 2935 Options Database Keys: 2936 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2937 . -mat_block_size <bs> - set the blocksize used to store the matrix 2938 - -mat_use_hash_table <fact> 2939 2940 Level: beginner 2941 2942 .seealso: MatCreateMPIBAIJ 2943 M*/ 2944 2945 EXTERN_C_BEGIN 2946 #undef __FUNCT__ 2947 #define __FUNCT__ "MatCreate_MPIBAIJ" 2948 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2949 { 2950 Mat_MPIBAIJ *b; 2951 PetscErrorCode ierr; 2952 PetscTruth flg; 2953 2954 PetscFunctionBegin; 2955 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2956 B->data = (void*)b; 2957 2958 2959 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2960 B->mapping = 0; 2961 B->assembled = PETSC_FALSE; 2962 2963 B->insertmode = NOT_SET_VALUES; 2964 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 2965 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 2966 2967 /* build local table of row and column ownerships */ 2968 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 2969 2970 /* build cache for off array entries formed */ 2971 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 2972 b->donotstash = PETSC_FALSE; 2973 b->colmap = PETSC_NULL; 2974 b->garray = PETSC_NULL; 2975 b->roworiented = PETSC_TRUE; 2976 2977 /* stuff used in block assembly */ 2978 b->barray = 0; 2979 2980 /* stuff used for matrix vector multiply */ 2981 b->lvec = 0; 2982 b->Mvctx = 0; 2983 2984 /* stuff for MatGetRow() */ 2985 b->rowindices = 0; 2986 b->rowvalues = 0; 2987 b->getrowactive = PETSC_FALSE; 2988 2989 /* hash table stuff */ 2990 b->ht = 0; 2991 b->hd = 0; 2992 b->ht_size = 0; 2993 b->ht_flag = PETSC_FALSE; 2994 b->ht_fact = 0; 2995 b->ht_total_ct = 0; 2996 b->ht_insert_ct = 0; 2997 2998 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 2999 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3000 if (flg) { 3001 PetscReal fact = 1.39; 3002 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3003 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 3004 if (fact <= 1.0) fact = 1.39; 3005 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3006 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3007 } 3008 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3009 3010 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3011 "MatConvert_MPIBAIJ_MPIAdj", 3012 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3013 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3014 "MatStoreValues_MPIBAIJ", 3015 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3016 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3017 "MatRetrieveValues_MPIBAIJ", 3018 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3019 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3020 "MatGetDiagonalBlock_MPIBAIJ", 3021 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3022 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3023 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3024 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3025 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3026 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3027 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3028 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3029 "MatDiagonalScaleLocal_MPIBAIJ", 3030 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3031 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3032 "MatSetHashTableFactor_MPIBAIJ", 3033 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3034 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3035 PetscFunctionReturn(0); 3036 } 3037 EXTERN_C_END 3038 3039 /*MC 3040 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3041 3042 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3043 and MATMPIBAIJ otherwise. 3044 3045 Options Database Keys: 3046 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3047 3048 Level: beginner 3049 3050 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3051 M*/ 3052 3053 EXTERN_C_BEGIN 3054 #undef __FUNCT__ 3055 #define __FUNCT__ "MatCreate_BAIJ" 3056 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 3057 { 3058 PetscErrorCode ierr; 3059 PetscMPIInt size; 3060 3061 PetscFunctionBegin; 3062 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3063 if (size == 1) { 3064 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 3065 } else { 3066 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 3067 } 3068 PetscFunctionReturn(0); 3069 } 3070 EXTERN_C_END 3071 3072 #undef __FUNCT__ 3073 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3074 /*@C 3075 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3076 (block compressed row). For good matrix assembly performance 3077 the user should preallocate the matrix storage by setting the parameters 3078 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3079 performance can be increased by more than a factor of 50. 3080 3081 Collective on Mat 3082 3083 Input Parameters: 3084 + A - the matrix 3085 . bs - size of blockk 3086 . d_nz - number of block nonzeros per block row in diagonal portion of local 3087 submatrix (same for all local rows) 3088 . d_nnz - array containing the number of block nonzeros in the various block rows 3089 of the in diagonal portion of the local (possibly different for each block 3090 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3091 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3092 submatrix (same for all local rows). 3093 - o_nnz - array containing the number of nonzeros in the various block rows of the 3094 off-diagonal portion of the local submatrix (possibly different for 3095 each block row) or PETSC_NULL. 3096 3097 If the *_nnz parameter is given then the *_nz parameter is ignored 3098 3099 Options Database Keys: 3100 + -mat_block_size - size of the blocks to use 3101 - -mat_use_hash_table <fact> 3102 3103 Notes: 3104 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3105 than it must be used on all processors that share the object for that argument. 3106 3107 Storage Information: 3108 For a square global matrix we define each processor's diagonal portion 3109 to be its local rows and the corresponding columns (a square submatrix); 3110 each processor's off-diagonal portion encompasses the remainder of the 3111 local matrix (a rectangular submatrix). 3112 3113 The user can specify preallocated storage for the diagonal part of 3114 the local submatrix with either d_nz or d_nnz (not both). Set 3115 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3116 memory allocation. Likewise, specify preallocated storage for the 3117 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3118 3119 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3120 the figure below we depict these three local rows and all columns (0-11). 3121 3122 .vb 3123 0 1 2 3 4 5 6 7 8 9 10 11 3124 ------------------- 3125 row 3 | o o o d d d o o o o o o 3126 row 4 | o o o d d d o o o o o o 3127 row 5 | o o o d d d o o o o o o 3128 ------------------- 3129 .ve 3130 3131 Thus, any entries in the d locations are stored in the d (diagonal) 3132 submatrix, and any entries in the o locations are stored in the 3133 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3134 stored simply in the MATSEQBAIJ format for compressed row storage. 3135 3136 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3137 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3138 In general, for PDE problems in which most nonzeros are near the diagonal, 3139 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3140 or you will get TERRIBLE performance; see the users' manual chapter on 3141 matrices. 3142 3143 You can call MatGetInfo() to get information on how effective the preallocation was; 3144 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3145 You can also run with the option -info and look for messages with the string 3146 malloc in them to see if additional memory allocation was needed. 3147 3148 Level: intermediate 3149 3150 .keywords: matrix, block, aij, compressed row, sparse, parallel 3151 3152 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 3153 @*/ 3154 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3155 { 3156 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3157 3158 PetscFunctionBegin; 3159 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3160 if (f) { 3161 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3162 } 3163 PetscFunctionReturn(0); 3164 } 3165 3166 #undef __FUNCT__ 3167 #define __FUNCT__ "MatCreateMPIBAIJ" 3168 /*@C 3169 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 3170 (block compressed row). For good matrix assembly performance 3171 the user should preallocate the matrix storage by setting the parameters 3172 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3173 performance can be increased by more than a factor of 50. 3174 3175 Collective on MPI_Comm 3176 3177 Input Parameters: 3178 + comm - MPI communicator 3179 . bs - size of blockk 3180 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3181 This value should be the same as the local size used in creating the 3182 y vector for the matrix-vector product y = Ax. 3183 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3184 This value should be the same as the local size used in creating the 3185 x vector for the matrix-vector product y = Ax. 3186 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3187 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3188 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3189 submatrix (same for all local rows) 3190 . d_nnz - array containing the number of nonzero blocks in the various block rows 3191 of the in diagonal portion of the local (possibly different for each block 3192 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3193 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3194 submatrix (same for all local rows). 3195 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3196 off-diagonal portion of the local submatrix (possibly different for 3197 each block row) or PETSC_NULL. 3198 3199 Output Parameter: 3200 . A - the matrix 3201 3202 Options Database Keys: 3203 + -mat_block_size - size of the blocks to use 3204 - -mat_use_hash_table <fact> 3205 3206 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3207 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3208 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3209 3210 Notes: 3211 If the *_nnz parameter is given then the *_nz parameter is ignored 3212 3213 A nonzero block is any block that as 1 or more nonzeros in it 3214 3215 The user MUST specify either the local or global matrix dimensions 3216 (possibly both). 3217 3218 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3219 than it must be used on all processors that share the object for that argument. 3220 3221 Storage Information: 3222 For a square global matrix we define each processor's diagonal portion 3223 to be its local rows and the corresponding columns (a square submatrix); 3224 each processor's off-diagonal portion encompasses the remainder of the 3225 local matrix (a rectangular submatrix). 3226 3227 The user can specify preallocated storage for the diagonal part of 3228 the local submatrix with either d_nz or d_nnz (not both). Set 3229 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3230 memory allocation. Likewise, specify preallocated storage for the 3231 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3232 3233 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3234 the figure below we depict these three local rows and all columns (0-11). 3235 3236 .vb 3237 0 1 2 3 4 5 6 7 8 9 10 11 3238 ------------------- 3239 row 3 | o o o d d d o o o o o o 3240 row 4 | o o o d d d o o o o o o 3241 row 5 | o o o d d d o o o o o o 3242 ------------------- 3243 .ve 3244 3245 Thus, any entries in the d locations are stored in the d (diagonal) 3246 submatrix, and any entries in the o locations are stored in the 3247 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3248 stored simply in the MATSEQBAIJ format for compressed row storage. 3249 3250 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3251 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3252 In general, for PDE problems in which most nonzeros are near the diagonal, 3253 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3254 or you will get TERRIBLE performance; see the users' manual chapter on 3255 matrices. 3256 3257 Level: intermediate 3258 3259 .keywords: matrix, block, aij, compressed row, sparse, parallel 3260 3261 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3262 @*/ 3263 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3264 { 3265 PetscErrorCode ierr; 3266 PetscMPIInt size; 3267 3268 PetscFunctionBegin; 3269 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3270 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3271 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3272 if (size > 1) { 3273 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3274 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3275 } else { 3276 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3277 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3278 } 3279 PetscFunctionReturn(0); 3280 } 3281 3282 #undef __FUNCT__ 3283 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3284 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3285 { 3286 Mat mat; 3287 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3288 PetscErrorCode ierr; 3289 PetscInt len=0; 3290 3291 PetscFunctionBegin; 3292 *newmat = 0; 3293 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3294 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3295 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3296 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3297 3298 mat->factor = matin->factor; 3299 mat->preallocated = PETSC_TRUE; 3300 mat->assembled = PETSC_TRUE; 3301 mat->insertmode = NOT_SET_VALUES; 3302 3303 a = (Mat_MPIBAIJ*)mat->data; 3304 mat->rmap->bs = matin->rmap->bs; 3305 a->bs2 = oldmat->bs2; 3306 a->mbs = oldmat->mbs; 3307 a->nbs = oldmat->nbs; 3308 a->Mbs = oldmat->Mbs; 3309 a->Nbs = oldmat->Nbs; 3310 3311 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3312 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3313 3314 a->size = oldmat->size; 3315 a->rank = oldmat->rank; 3316 a->donotstash = oldmat->donotstash; 3317 a->roworiented = oldmat->roworiented; 3318 a->rowindices = 0; 3319 a->rowvalues = 0; 3320 a->getrowactive = PETSC_FALSE; 3321 a->barray = 0; 3322 a->rstartbs = oldmat->rstartbs; 3323 a->rendbs = oldmat->rendbs; 3324 a->cstartbs = oldmat->cstartbs; 3325 a->cendbs = oldmat->cendbs; 3326 3327 /* hash table stuff */ 3328 a->ht = 0; 3329 a->hd = 0; 3330 a->ht_size = 0; 3331 a->ht_flag = oldmat->ht_flag; 3332 a->ht_fact = oldmat->ht_fact; 3333 a->ht_total_ct = 0; 3334 a->ht_insert_ct = 0; 3335 3336 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3337 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 3338 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3339 if (oldmat->colmap) { 3340 #if defined (PETSC_USE_CTABLE) 3341 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3342 #else 3343 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3344 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3345 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3346 #endif 3347 } else a->colmap = 0; 3348 3349 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3350 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3351 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3352 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3353 } else a->garray = 0; 3354 3355 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3356 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3357 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3358 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3359 3360 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3361 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3362 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3363 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3364 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3365 *newmat = mat; 3366 3367 PetscFunctionReturn(0); 3368 } 3369 3370 #include "petscsys.h" 3371 3372 #undef __FUNCT__ 3373 #define __FUNCT__ "MatLoad_MPIBAIJ" 3374 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 3375 { 3376 Mat A; 3377 PetscErrorCode ierr; 3378 int fd; 3379 PetscInt i,nz,j,rstart,rend; 3380 PetscScalar *vals,*buf; 3381 MPI_Comm comm = ((PetscObject)viewer)->comm; 3382 MPI_Status status; 3383 PetscMPIInt rank,size,maxnz; 3384 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3385 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3386 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3387 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3388 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3389 PetscInt dcount,kmax,k,nzcount,tmp,mend; 3390 3391 PetscFunctionBegin; 3392 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3393 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3394 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3395 3396 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3397 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3398 if (!rank) { 3399 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3400 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3401 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3402 } 3403 3404 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3405 M = header[1]; N = header[2]; 3406 3407 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 3408 3409 /* 3410 This code adds extra rows to make sure the number of rows is 3411 divisible by the blocksize 3412 */ 3413 Mbs = M/bs; 3414 extra_rows = bs - M + bs*Mbs; 3415 if (extra_rows == bs) extra_rows = 0; 3416 else Mbs++; 3417 if (extra_rows && !rank) { 3418 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3419 } 3420 3421 /* determine ownership of all rows */ 3422 mbs = Mbs/size + ((Mbs % size) > rank); 3423 m = mbs*bs; 3424 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3425 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3426 3427 /* process 0 needs enough room for process with most rows */ 3428 if (!rank) { 3429 mmax = rowners[1]; 3430 for (i=2; i<size; i++) { 3431 mmax = PetscMax(mmax,rowners[i]); 3432 } 3433 mmax*=bs; 3434 } else mmax = m; 3435 3436 rowners[0] = 0; 3437 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3438 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3439 rstart = rowners[rank]; 3440 rend = rowners[rank+1]; 3441 3442 /* distribute row lengths to all processors */ 3443 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3444 if (!rank) { 3445 mend = m; 3446 if (size == 1) mend = mend - extra_rows; 3447 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3448 for (j=mend; j<m; j++) locrowlens[j] = 1; 3449 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3450 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3451 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3452 for (j=0; j<m; j++) { 3453 procsnz[0] += locrowlens[j]; 3454 } 3455 for (i=1; i<size; i++) { 3456 mend = browners[i+1] - browners[i]; 3457 if (i == size-1) mend = mend - extra_rows; 3458 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3459 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3460 /* calculate the number of nonzeros on each processor */ 3461 for (j=0; j<browners[i+1]-browners[i]; j++) { 3462 procsnz[i] += rowlengths[j]; 3463 } 3464 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3465 } 3466 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3467 } else { 3468 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3469 } 3470 3471 if (!rank) { 3472 /* determine max buffer needed and allocate it */ 3473 maxnz = procsnz[0]; 3474 for (i=1; i<size; i++) { 3475 maxnz = PetscMax(maxnz,procsnz[i]); 3476 } 3477 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3478 3479 /* read in my part of the matrix column indices */ 3480 nz = procsnz[0]; 3481 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3482 mycols = ibuf; 3483 if (size == 1) nz -= extra_rows; 3484 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3485 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3486 3487 /* read in every ones (except the last) and ship off */ 3488 for (i=1; i<size-1; i++) { 3489 nz = procsnz[i]; 3490 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3491 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3492 } 3493 /* read in the stuff for the last proc */ 3494 if (size != 1) { 3495 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3496 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3497 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3498 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3499 } 3500 ierr = PetscFree(cols);CHKERRQ(ierr); 3501 } else { 3502 /* determine buffer space needed for message */ 3503 nz = 0; 3504 for (i=0; i<m; i++) { 3505 nz += locrowlens[i]; 3506 } 3507 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3508 mycols = ibuf; 3509 /* receive message of column indices*/ 3510 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3511 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3512 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3513 } 3514 3515 /* loop over local rows, determining number of off diagonal entries */ 3516 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3517 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3518 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3519 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3520 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3521 rowcount = 0; nzcount = 0; 3522 for (i=0; i<mbs; i++) { 3523 dcount = 0; 3524 odcount = 0; 3525 for (j=0; j<bs; j++) { 3526 kmax = locrowlens[rowcount]; 3527 for (k=0; k<kmax; k++) { 3528 tmp = mycols[nzcount++]/bs; 3529 if (!mask[tmp]) { 3530 mask[tmp] = 1; 3531 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3532 else masked1[dcount++] = tmp; 3533 } 3534 } 3535 rowcount++; 3536 } 3537 3538 dlens[i] = dcount; 3539 odlens[i] = odcount; 3540 3541 /* zero out the mask elements we set */ 3542 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3543 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3544 } 3545 3546 /* create our matrix */ 3547 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 3548 ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3549 ierr = MatSetType(A,type);CHKERRQ(ierr) 3550 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3551 3552 if (!rank) { 3553 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3554 /* read in my part of the matrix numerical values */ 3555 nz = procsnz[0]; 3556 vals = buf; 3557 mycols = ibuf; 3558 if (size == 1) nz -= extra_rows; 3559 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3560 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3561 3562 /* insert into matrix */ 3563 jj = rstart*bs; 3564 for (i=0; i<m; i++) { 3565 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3566 mycols += locrowlens[i]; 3567 vals += locrowlens[i]; 3568 jj++; 3569 } 3570 /* read in other processors (except the last one) and ship out */ 3571 for (i=1; i<size-1; i++) { 3572 nz = procsnz[i]; 3573 vals = buf; 3574 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3575 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3576 } 3577 /* the last proc */ 3578 if (size != 1){ 3579 nz = procsnz[i] - extra_rows; 3580 vals = buf; 3581 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3582 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3583 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3584 } 3585 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3586 } else { 3587 /* receive numeric values */ 3588 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3589 3590 /* receive message of values*/ 3591 vals = buf; 3592 mycols = ibuf; 3593 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 3594 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3595 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3596 3597 /* insert into matrix */ 3598 jj = rstart*bs; 3599 for (i=0; i<m; i++) { 3600 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3601 mycols += locrowlens[i]; 3602 vals += locrowlens[i]; 3603 jj++; 3604 } 3605 } 3606 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3607 ierr = PetscFree(buf);CHKERRQ(ierr); 3608 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3609 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3610 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3611 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3612 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3613 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3614 3615 *newmat = A; 3616 PetscFunctionReturn(0); 3617 } 3618 3619 #undef __FUNCT__ 3620 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3621 /*@ 3622 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3623 3624 Input Parameters: 3625 . mat - the matrix 3626 . fact - factor 3627 3628 Collective on Mat 3629 3630 Level: advanced 3631 3632 Notes: 3633 This can also be set by the command line option: -mat_use_hash_table <fact> 3634 3635 .keywords: matrix, hashtable, factor, HT 3636 3637 .seealso: MatSetOption() 3638 @*/ 3639 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3640 { 3641 PetscErrorCode ierr,(*f)(Mat,PetscReal); 3642 3643 PetscFunctionBegin; 3644 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 3645 if (f) { 3646 ierr = (*f)(mat,fact);CHKERRQ(ierr); 3647 } 3648 PetscFunctionReturn(0); 3649 } 3650 3651 EXTERN_C_BEGIN 3652 #undef __FUNCT__ 3653 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3654 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3655 { 3656 Mat_MPIBAIJ *baij; 3657 3658 PetscFunctionBegin; 3659 baij = (Mat_MPIBAIJ*)mat->data; 3660 baij->ht_fact = fact; 3661 PetscFunctionReturn(0); 3662 } 3663 EXTERN_C_END 3664 3665 #undef __FUNCT__ 3666 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3667 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3668 { 3669 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3670 PetscFunctionBegin; 3671 *Ad = a->A; 3672 *Ao = a->B; 3673 *colmap = a->garray; 3674 PetscFunctionReturn(0); 3675 } 3676 3677 /* 3678 Special version for direct calls from Fortran (to eliminate two function call overheads 3679 */ 3680 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3681 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3682 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3683 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3684 #endif 3685 3686 #undef __FUNCT__ 3687 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3688 /*@C 3689 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3690 3691 Collective on Mat 3692 3693 Input Parameters: 3694 + mat - the matrix 3695 . min - number of input rows 3696 . im - input rows 3697 . nin - number of input columns 3698 . in - input columns 3699 . v - numerical values input 3700 - addvin - INSERT_VALUES or ADD_VALUES 3701 3702 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3703 3704 Level: advanced 3705 3706 .seealso: MatSetValuesBlocked() 3707 @*/ 3708 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3709 { 3710 /* convert input arguments to C version */ 3711 Mat mat = *matin; 3712 PetscInt m = *min, n = *nin; 3713 InsertMode addv = *addvin; 3714 3715 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3716 const MatScalar *value; 3717 MatScalar *barray=baij->barray; 3718 PetscTruth roworiented = baij->roworiented; 3719 PetscErrorCode ierr; 3720 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3721 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3722 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3723 3724 PetscFunctionBegin; 3725 /* tasks normally handled by MatSetValuesBlocked() */ 3726 if (mat->insertmode == NOT_SET_VALUES) { 3727 mat->insertmode = addv; 3728 } 3729 #if defined(PETSC_USE_DEBUG) 3730 else if (mat->insertmode != addv) { 3731 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3732 } 3733 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3734 #endif 3735 if (mat->assembled) { 3736 mat->was_assembled = PETSC_TRUE; 3737 mat->assembled = PETSC_FALSE; 3738 } 3739 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3740 3741 3742 if(!barray) { 3743 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3744 baij->barray = barray; 3745 } 3746 3747 if (roworiented) { 3748 stepval = (n-1)*bs; 3749 } else { 3750 stepval = (m-1)*bs; 3751 } 3752 for (i=0; i<m; i++) { 3753 if (im[i] < 0) continue; 3754 #if defined(PETSC_USE_DEBUG) 3755 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3756 #endif 3757 if (im[i] >= rstart && im[i] < rend) { 3758 row = im[i] - rstart; 3759 for (j=0; j<n; j++) { 3760 /* If NumCol = 1 then a copy is not required */ 3761 if ((roworiented) && (n == 1)) { 3762 barray = (MatScalar*)v + i*bs2; 3763 } else if((!roworiented) && (m == 1)) { 3764 barray = (MatScalar*)v + j*bs2; 3765 } else { /* Here a copy is required */ 3766 if (roworiented) { 3767 value = v + i*(stepval+bs)*bs + j*bs; 3768 } else { 3769 value = v + j*(stepval+bs)*bs + i*bs; 3770 } 3771 for (ii=0; ii<bs; ii++,value+=stepval) { 3772 for (jj=0; jj<bs; jj++) { 3773 *barray++ = *value++; 3774 } 3775 } 3776 barray -=bs2; 3777 } 3778 3779 if (in[j] >= cstart && in[j] < cend){ 3780 col = in[j] - cstart; 3781 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3782 } 3783 else if (in[j] < 0) continue; 3784 #if defined(PETSC_USE_DEBUG) 3785 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 3786 #endif 3787 else { 3788 if (mat->was_assembled) { 3789 if (!baij->colmap) { 3790 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3791 } 3792 3793 #if defined(PETSC_USE_DEBUG) 3794 #if defined (PETSC_USE_CTABLE) 3795 { PetscInt data; 3796 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3797 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 3798 } 3799 #else 3800 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 3801 #endif 3802 #endif 3803 #if defined (PETSC_USE_CTABLE) 3804 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3805 col = (col - 1)/bs; 3806 #else 3807 col = (baij->colmap[in[j]] - 1)/bs; 3808 #endif 3809 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3810 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3811 col = in[j]; 3812 } 3813 } 3814 else col = in[j]; 3815 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3816 } 3817 } 3818 } else { 3819 if (!baij->donotstash) { 3820 if (roworiented) { 3821 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3822 } else { 3823 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3824 } 3825 } 3826 } 3827 } 3828 3829 /* task normally handled by MatSetValuesBlocked() */ 3830 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3831 PetscFunctionReturn(0); 3832 } 3833