1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 15 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 18 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 19 { 20 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt i,*idxb = 0; 23 PetscScalar *va,*vb; 24 Vec vtmp; 25 26 PetscFunctionBegin; 27 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 28 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 29 if (idx) { 30 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 31 } 32 33 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 34 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 35 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 36 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 37 38 for (i=0; i<A->rmap->n; i++){ 39 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 40 } 41 42 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 43 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 44 if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);} 45 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 46 PetscFunctionReturn(0); 47 } 48 49 EXTERN_C_BEGIN 50 #undef __FUNCT__ 51 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 52 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 53 { 54 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 55 PetscErrorCode ierr; 56 57 PetscFunctionBegin; 58 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 59 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 60 PetscFunctionReturn(0); 61 } 62 EXTERN_C_END 63 64 EXTERN_C_BEGIN 65 #undef __FUNCT__ 66 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 67 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 68 { 69 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 70 PetscErrorCode ierr; 71 72 PetscFunctionBegin; 73 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 74 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 75 PetscFunctionReturn(0); 76 } 77 EXTERN_C_END 78 79 /* 80 Local utility routine that creates a mapping from the global column 81 number to the local number in the off-diagonal part of the local 82 storage of the matrix. This is done in a non scable way since the 83 length of colmap equals the global matrix length. 84 */ 85 #undef __FUNCT__ 86 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 87 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 88 { 89 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 90 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 91 PetscErrorCode ierr; 92 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 93 94 PetscFunctionBegin; 95 #if defined (PETSC_USE_CTABLE) 96 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 97 for (i=0; i<nbs; i++){ 98 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 99 } 100 #else 101 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 102 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 103 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 105 #endif 106 PetscFunctionReturn(0); 107 } 108 109 #define CHUNKSIZE 10 110 111 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 112 { \ 113 \ 114 brow = row/bs; \ 115 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 116 rmax = aimax[brow]; nrow = ailen[brow]; \ 117 bcol = col/bs; \ 118 ridx = row % bs; cidx = col % bs; \ 119 low = 0; high = nrow; \ 120 while (high-low > 3) { \ 121 t = (low+high)/2; \ 122 if (rp[t] > bcol) high = t; \ 123 else low = t; \ 124 } \ 125 for (_i=low; _i<high; _i++) { \ 126 if (rp[_i] > bcol) break; \ 127 if (rp[_i] == bcol) { \ 128 bap = ap + bs2*_i + bs*cidx + ridx; \ 129 if (addv == ADD_VALUES) *bap += value; \ 130 else *bap = value; \ 131 goto a_noinsert; \ 132 } \ 133 } \ 134 if (a->nonew == 1) goto a_noinsert; \ 135 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 136 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 137 N = nrow++ - 1; \ 138 /* shift up all the later entries in this row */ \ 139 for (ii=N; ii>=_i; ii--) { \ 140 rp[ii+1] = rp[ii]; \ 141 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 142 } \ 143 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 144 rp[_i] = bcol; \ 145 ap[bs2*_i + bs*cidx + ridx] = value; \ 146 a_noinsert:; \ 147 ailen[brow] = nrow; \ 148 } 149 150 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 151 { \ 152 brow = row/bs; \ 153 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 154 rmax = bimax[brow]; nrow = bilen[brow]; \ 155 bcol = col/bs; \ 156 ridx = row % bs; cidx = col % bs; \ 157 low = 0; high = nrow; \ 158 while (high-low > 3) { \ 159 t = (low+high)/2; \ 160 if (rp[t] > bcol) high = t; \ 161 else low = t; \ 162 } \ 163 for (_i=low; _i<high; _i++) { \ 164 if (rp[_i] > bcol) break; \ 165 if (rp[_i] == bcol) { \ 166 bap = ap + bs2*_i + bs*cidx + ridx; \ 167 if (addv == ADD_VALUES) *bap += value; \ 168 else *bap = value; \ 169 goto b_noinsert; \ 170 } \ 171 } \ 172 if (b->nonew == 1) goto b_noinsert; \ 173 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 174 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 175 CHKMEMQ;\ 176 N = nrow++ - 1; \ 177 /* shift up all the later entries in this row */ \ 178 for (ii=N; ii>=_i; ii--) { \ 179 rp[ii+1] = rp[ii]; \ 180 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 181 } \ 182 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 183 rp[_i] = bcol; \ 184 ap[bs2*_i + bs*cidx + ridx] = value; \ 185 b_noinsert:; \ 186 bilen[brow] = nrow; \ 187 } 188 189 #undef __FUNCT__ 190 #define __FUNCT__ "MatSetValues_MPIBAIJ" 191 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 192 { 193 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 194 MatScalar value; 195 PetscTruth roworiented = baij->roworiented; 196 PetscErrorCode ierr; 197 PetscInt i,j,row,col; 198 PetscInt rstart_orig=mat->rmap->rstart; 199 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 200 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 201 202 /* Some Variables required in the macro */ 203 Mat A = baij->A; 204 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 205 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 206 MatScalar *aa=a->a; 207 208 Mat B = baij->B; 209 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 210 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 211 MatScalar *ba=b->a; 212 213 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 214 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 215 MatScalar *ap,*bap; 216 217 PetscFunctionBegin; 218 if (v) PetscValidScalarPointer(v,6); 219 for (i=0; i<m; i++) { 220 if (im[i] < 0) continue; 221 #if defined(PETSC_USE_DEBUG) 222 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 223 #endif 224 if (im[i] >= rstart_orig && im[i] < rend_orig) { 225 row = im[i] - rstart_orig; 226 for (j=0; j<n; j++) { 227 if (in[j] >= cstart_orig && in[j] < cend_orig){ 228 col = in[j] - cstart_orig; 229 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 230 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_DEBUG) 234 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1);} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!baij->colmap) { 239 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 243 col = col - 1; 244 #else 245 col = baij->colmap[in[j]/bs] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 248 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 251 B = baij->B; 252 b = (Mat_SeqBAIJ*)(B)->data; 253 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 254 ba=b->a; 255 } else col += in[j]%bs; 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 259 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 260 } 261 } 262 } else { 263 if (!baij->donotstash) { 264 if (roworiented) { 265 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 266 } else { 267 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 268 } 269 } 270 } 271 } 272 PetscFunctionReturn(0); 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 277 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 278 { 279 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 280 const PetscScalar *value; 281 MatScalar *barray=baij->barray; 282 PetscTruth roworiented = baij->roworiented; 283 PetscErrorCode ierr; 284 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 285 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 286 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 287 288 PetscFunctionBegin; 289 if(!barray) { 290 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 291 baij->barray = barray; 292 } 293 294 if (roworiented) { 295 stepval = (n-1)*bs; 296 } else { 297 stepval = (m-1)*bs; 298 } 299 for (i=0; i<m; i++) { 300 if (im[i] < 0) continue; 301 #if defined(PETSC_USE_DEBUG) 302 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 303 #endif 304 if (im[i] >= rstart && im[i] < rend) { 305 row = im[i] - rstart; 306 for (j=0; j<n; j++) { 307 /* If NumCol = 1 then a copy is not required */ 308 if ((roworiented) && (n == 1)) { 309 barray = (MatScalar*)v + i*bs2; 310 } else if((!roworiented) && (m == 1)) { 311 barray = (MatScalar*)v + j*bs2; 312 } else { /* Here a copy is required */ 313 if (roworiented) { 314 value = v + i*(stepval+bs)*bs + j*bs; 315 } else { 316 value = v + j*(stepval+bs)*bs + i*bs; 317 } 318 for (ii=0; ii<bs; ii++,value+=stepval) { 319 for (jj=0; jj<bs; jj++) { 320 *barray++ = *value++; 321 } 322 } 323 barray -=bs2; 324 } 325 326 if (in[j] >= cstart && in[j] < cend){ 327 col = in[j] - cstart; 328 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 329 } 330 else if (in[j] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 333 #endif 334 else { 335 if (mat->was_assembled) { 336 if (!baij->colmap) { 337 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 338 } 339 340 #if defined(PETSC_USE_DEBUG) 341 #if defined (PETSC_USE_CTABLE) 342 { PetscInt data; 343 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 344 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 345 } 346 #else 347 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 348 #endif 349 #endif 350 #if defined (PETSC_USE_CTABLE) 351 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 352 col = (col - 1)/bs; 353 #else 354 col = (baij->colmap[in[j]] - 1)/bs; 355 #endif 356 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 357 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 358 col = in[j]; 359 } 360 } 361 else col = in[j]; 362 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 363 } 364 } 365 } else { 366 if (!baij->donotstash) { 367 if (roworiented) { 368 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 369 } else { 370 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 371 } 372 } 373 } 374 } 375 PetscFunctionReturn(0); 376 } 377 378 #define HASH_KEY 0.6180339887 379 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 380 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 381 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 382 #undef __FUNCT__ 383 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 384 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 385 { 386 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 387 PetscTruth roworiented = baij->roworiented; 388 PetscErrorCode ierr; 389 PetscInt i,j,row,col; 390 PetscInt rstart_orig=mat->rmap->rstart; 391 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 392 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 393 PetscReal tmp; 394 MatScalar **HD = baij->hd,value; 395 #if defined(PETSC_USE_DEBUG) 396 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 397 #endif 398 399 PetscFunctionBegin; 400 if (v) PetscValidScalarPointer(v,6); 401 for (i=0; i<m; i++) { 402 #if defined(PETSC_USE_DEBUG) 403 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 404 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 405 #endif 406 row = im[i]; 407 if (row >= rstart_orig && row < rend_orig) { 408 for (j=0; j<n; j++) { 409 col = in[j]; 410 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 411 /* Look up PetscInto the Hash Table */ 412 key = (row/bs)*Nbs+(col/bs)+1; 413 h1 = HASH(size,key,tmp); 414 415 416 idx = h1; 417 #if defined(PETSC_USE_DEBUG) 418 insert_ct++; 419 total_ct++; 420 if (HT[idx] != key) { 421 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 422 if (idx == size) { 423 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 424 if (idx == h1) { 425 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 426 } 427 } 428 } 429 #else 430 if (HT[idx] != key) { 431 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 432 if (idx == size) { 433 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 434 if (idx == h1) { 435 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 436 } 437 } 438 } 439 #endif 440 /* A HASH table entry is found, so insert the values at the correct address */ 441 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 442 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 443 } 444 } else { 445 if (!baij->donotstash) { 446 if (roworiented) { 447 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 448 } else { 449 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 450 } 451 } 452 } 453 } 454 #if defined(PETSC_USE_DEBUG) 455 baij->ht_total_ct = total_ct; 456 baij->ht_insert_ct = insert_ct; 457 #endif 458 PetscFunctionReturn(0); 459 } 460 461 #undef __FUNCT__ 462 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 463 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 464 { 465 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 466 PetscTruth roworiented = baij->roworiented; 467 PetscErrorCode ierr; 468 PetscInt i,j,ii,jj,row,col; 469 PetscInt rstart=baij->rstartbs; 470 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 471 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 472 PetscReal tmp; 473 MatScalar **HD = baij->hd,*baij_a; 474 const PetscScalar *v_t,*value; 475 #if defined(PETSC_USE_DEBUG) 476 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 477 #endif 478 479 PetscFunctionBegin; 480 481 if (roworiented) { 482 stepval = (n-1)*bs; 483 } else { 484 stepval = (m-1)*bs; 485 } 486 for (i=0; i<m; i++) { 487 #if defined(PETSC_USE_DEBUG) 488 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 489 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 490 #endif 491 row = im[i]; 492 v_t = v + i*nbs2; 493 if (row >= rstart && row < rend) { 494 for (j=0; j<n; j++) { 495 col = in[j]; 496 497 /* Look up into the Hash Table */ 498 key = row*Nbs+col+1; 499 h1 = HASH(size,key,tmp); 500 501 idx = h1; 502 #if defined(PETSC_USE_DEBUG) 503 total_ct++; 504 insert_ct++; 505 if (HT[idx] != key) { 506 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 507 if (idx == size) { 508 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 509 if (idx == h1) { 510 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 511 } 512 } 513 } 514 #else 515 if (HT[idx] != key) { 516 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 517 if (idx == size) { 518 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 519 if (idx == h1) { 520 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 521 } 522 } 523 } 524 #endif 525 baij_a = HD[idx]; 526 if (roworiented) { 527 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 528 /* value = v + (i*(stepval+bs)+j)*bs; */ 529 value = v_t; 530 v_t += bs; 531 if (addv == ADD_VALUES) { 532 for (ii=0; ii<bs; ii++,value+=stepval) { 533 for (jj=ii; jj<bs2; jj+=bs) { 534 baij_a[jj] += *value++; 535 } 536 } 537 } else { 538 for (ii=0; ii<bs; ii++,value+=stepval) { 539 for (jj=ii; jj<bs2; jj+=bs) { 540 baij_a[jj] = *value++; 541 } 542 } 543 } 544 } else { 545 value = v + j*(stepval+bs)*bs + i*bs; 546 if (addv == ADD_VALUES) { 547 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 548 for (jj=0; jj<bs; jj++) { 549 baij_a[jj] += *value++; 550 } 551 } 552 } else { 553 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 554 for (jj=0; jj<bs; jj++) { 555 baij_a[jj] = *value++; 556 } 557 } 558 } 559 } 560 } 561 } else { 562 if (!baij->donotstash) { 563 if (roworiented) { 564 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 565 } else { 566 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 567 } 568 } 569 } 570 } 571 #if defined(PETSC_USE_DEBUG) 572 baij->ht_total_ct = total_ct; 573 baij->ht_insert_ct = insert_ct; 574 #endif 575 PetscFunctionReturn(0); 576 } 577 578 #undef __FUNCT__ 579 #define __FUNCT__ "MatGetValues_MPIBAIJ" 580 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 581 { 582 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 583 PetscErrorCode ierr; 584 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 585 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 586 587 PetscFunctionBegin; 588 for (i=0; i<m; i++) { 589 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 590 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 591 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 592 row = idxm[i] - bsrstart; 593 for (j=0; j<n; j++) { 594 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 595 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 596 if (idxn[j] >= bscstart && idxn[j] < bscend){ 597 col = idxn[j] - bscstart; 598 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 599 } else { 600 if (!baij->colmap) { 601 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 602 } 603 #if defined (PETSC_USE_CTABLE) 604 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 605 data --; 606 #else 607 data = baij->colmap[idxn[j]/bs]-1; 608 #endif 609 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 610 else { 611 col = data + idxn[j]%bs; 612 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 613 } 614 } 615 } 616 } else { 617 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 618 } 619 } 620 PetscFunctionReturn(0); 621 } 622 623 #undef __FUNCT__ 624 #define __FUNCT__ "MatNorm_MPIBAIJ" 625 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 626 { 627 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 628 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 629 PetscErrorCode ierr; 630 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 631 PetscReal sum = 0.0; 632 MatScalar *v; 633 634 PetscFunctionBegin; 635 if (baij->size == 1) { 636 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 637 } else { 638 if (type == NORM_FROBENIUS) { 639 v = amat->a; 640 nz = amat->nz*bs2; 641 for (i=0; i<nz; i++) { 642 #if defined(PETSC_USE_COMPLEX) 643 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 644 #else 645 sum += (*v)*(*v); v++; 646 #endif 647 } 648 v = bmat->a; 649 nz = bmat->nz*bs2; 650 for (i=0; i<nz; i++) { 651 #if defined(PETSC_USE_COMPLEX) 652 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 653 #else 654 sum += (*v)*(*v); v++; 655 #endif 656 } 657 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 658 *nrm = sqrt(*nrm); 659 } else if (type == NORM_1) { /* max column sum */ 660 PetscReal *tmp,*tmp2; 661 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 662 ierr = PetscMalloc((2*mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 663 tmp2 = tmp + mat->cmap->N; 664 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 665 v = amat->a; jj = amat->j; 666 for (i=0; i<amat->nz; i++) { 667 for (j=0; j<bs; j++){ 668 col = bs*(cstart + *jj) + j; /* column index */ 669 for (row=0; row<bs; row++){ 670 tmp[col] += PetscAbsScalar(*v); v++; 671 } 672 } 673 jj++; 674 } 675 v = bmat->a; jj = bmat->j; 676 for (i=0; i<bmat->nz; i++) { 677 for (j=0; j<bs; j++){ 678 col = bs*garray[*jj] + j; 679 for (row=0; row<bs; row++){ 680 tmp[col] += PetscAbsScalar(*v); v++; 681 } 682 } 683 jj++; 684 } 685 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 686 *nrm = 0.0; 687 for (j=0; j<mat->cmap->N; j++) { 688 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 689 } 690 ierr = PetscFree(tmp);CHKERRQ(ierr); 691 } else if (type == NORM_INFINITY) { /* max row sum */ 692 PetscReal *sums; 693 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr) 694 sum = 0.0; 695 for (j=0; j<amat->mbs; j++) { 696 for (row=0; row<bs; row++) sums[row] = 0.0; 697 v = amat->a + bs2*amat->i[j]; 698 nz = amat->i[j+1]-amat->i[j]; 699 for (i=0; i<nz; i++) { 700 for (col=0; col<bs; col++){ 701 for (row=0; row<bs; row++){ 702 sums[row] += PetscAbsScalar(*v); v++; 703 } 704 } 705 } 706 v = bmat->a + bs2*bmat->i[j]; 707 nz = bmat->i[j+1]-bmat->i[j]; 708 for (i=0; i<nz; i++) { 709 for (col=0; col<bs; col++){ 710 for (row=0; row<bs; row++){ 711 sums[row] += PetscAbsScalar(*v); v++; 712 } 713 } 714 } 715 for (row=0; row<bs; row++){ 716 if (sums[row] > sum) sum = sums[row]; 717 } 718 } 719 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 720 ierr = PetscFree(sums);CHKERRQ(ierr); 721 } else { 722 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 723 } 724 } 725 PetscFunctionReturn(0); 726 } 727 728 /* 729 Creates the hash table, and sets the table 730 This table is created only once. 731 If new entried need to be added to the matrix 732 then the hash table has to be destroyed and 733 recreated. 734 */ 735 #undef __FUNCT__ 736 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 737 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 738 { 739 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 740 Mat A = baij->A,B=baij->B; 741 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 742 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 743 PetscErrorCode ierr; 744 PetscInt size,bs2=baij->bs2,rstart=baij->rstartbs; 745 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 746 PetscInt *HT,key; 747 MatScalar **HD; 748 PetscReal tmp; 749 #if defined(PETSC_USE_INFO) 750 PetscInt ct=0,max=0; 751 #endif 752 753 PetscFunctionBegin; 754 baij->ht_size=(PetscInt)(factor*nz); 755 size = baij->ht_size; 756 757 if (baij->ht) { 758 PetscFunctionReturn(0); 759 } 760 761 /* Allocate Memory for Hash Table */ 762 ierr = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 763 baij->ht = (PetscInt*)(baij->hd + size); 764 HD = baij->hd; 765 HT = baij->ht; 766 767 768 ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr); 769 770 771 /* Loop Over A */ 772 for (i=0; i<a->mbs; i++) { 773 for (j=ai[i]; j<ai[i+1]; j++) { 774 row = i+rstart; 775 col = aj[j]+cstart; 776 777 key = row*Nbs + col + 1; 778 h1 = HASH(size,key,tmp); 779 for (k=0; k<size; k++){ 780 if (!HT[(h1+k)%size]) { 781 HT[(h1+k)%size] = key; 782 HD[(h1+k)%size] = a->a + j*bs2; 783 break; 784 #if defined(PETSC_USE_INFO) 785 } else { 786 ct++; 787 #endif 788 } 789 } 790 #if defined(PETSC_USE_INFO) 791 if (k> max) max = k; 792 #endif 793 } 794 } 795 /* Loop Over B */ 796 for (i=0; i<b->mbs; i++) { 797 for (j=bi[i]; j<bi[i+1]; j++) { 798 row = i+rstart; 799 col = garray[bj[j]]; 800 key = row*Nbs + col + 1; 801 h1 = HASH(size,key,tmp); 802 for (k=0; k<size; k++){ 803 if (!HT[(h1+k)%size]) { 804 HT[(h1+k)%size] = key; 805 HD[(h1+k)%size] = b->a + j*bs2; 806 break; 807 #if defined(PETSC_USE_INFO) 808 } else { 809 ct++; 810 #endif 811 } 812 } 813 #if defined(PETSC_USE_INFO) 814 if (k> max) max = k; 815 #endif 816 } 817 } 818 819 /* Print Summary */ 820 #if defined(PETSC_USE_INFO) 821 for (i=0,j=0; i<size; i++) { 822 if (HT[i]) {j++;} 823 } 824 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 825 #endif 826 PetscFunctionReturn(0); 827 } 828 829 #undef __FUNCT__ 830 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 831 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 832 { 833 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 834 PetscErrorCode ierr; 835 PetscInt nstash,reallocs; 836 InsertMode addv; 837 838 PetscFunctionBegin; 839 if (baij->donotstash) { 840 PetscFunctionReturn(0); 841 } 842 843 /* make sure all processors are either in INSERTMODE or ADDMODE */ 844 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 845 if (addv == (ADD_VALUES|INSERT_VALUES)) { 846 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 847 } 848 mat->insertmode = addv; /* in case this processor had no cache */ 849 850 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 851 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 852 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 853 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 854 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 855 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 856 PetscFunctionReturn(0); 857 } 858 859 #undef __FUNCT__ 860 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 861 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 862 { 863 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 864 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 865 PetscErrorCode ierr; 866 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 867 PetscInt *row,*col; 868 PetscTruth r1,r2,r3,other_disassembled; 869 MatScalar *val; 870 InsertMode addv = mat->insertmode; 871 PetscMPIInt n; 872 873 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 874 PetscFunctionBegin; 875 if (!baij->donotstash) { 876 while (1) { 877 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 878 if (!flg) break; 879 880 for (i=0; i<n;) { 881 /* Now identify the consecutive vals belonging to the same row */ 882 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 883 if (j < n) ncols = j-i; 884 else ncols = n-i; 885 /* Now assemble all these values with a single function call */ 886 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 887 i = j; 888 } 889 } 890 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 891 /* Now process the block-stash. Since the values are stashed column-oriented, 892 set the roworiented flag to column oriented, and after MatSetValues() 893 restore the original flags */ 894 r1 = baij->roworiented; 895 r2 = a->roworiented; 896 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 897 baij->roworiented = PETSC_FALSE; 898 a->roworiented = PETSC_FALSE; 899 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 900 while (1) { 901 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 902 if (!flg) break; 903 904 for (i=0; i<n;) { 905 /* Now identify the consecutive vals belonging to the same row */ 906 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 907 if (j < n) ncols = j-i; 908 else ncols = n-i; 909 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 910 i = j; 911 } 912 } 913 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 914 baij->roworiented = r1; 915 a->roworiented = r2; 916 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 917 } 918 919 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 920 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 921 922 /* determine if any processor has disassembled, if so we must 923 also disassemble ourselfs, in order that we may reassemble. */ 924 /* 925 if nonzero structure of submatrix B cannot change then we know that 926 no processor disassembled thus we can skip this stuff 927 */ 928 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 929 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 930 if (mat->was_assembled && !other_disassembled) { 931 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 932 } 933 } 934 935 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 936 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 937 } 938 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 939 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 940 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 941 942 #if defined(PETSC_USE_INFO) 943 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 944 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 945 baij->ht_total_ct = 0; 946 baij->ht_insert_ct = 0; 947 } 948 #endif 949 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 950 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 951 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 952 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 953 } 954 955 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 956 baij->rowvalues = 0; 957 PetscFunctionReturn(0); 958 } 959 960 #undef __FUNCT__ 961 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 962 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 963 { 964 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 965 PetscErrorCode ierr; 966 PetscMPIInt size = baij->size,rank = baij->rank; 967 PetscInt bs = mat->rmap->bs; 968 PetscTruth iascii,isdraw; 969 PetscViewer sviewer; 970 PetscViewerFormat format; 971 972 PetscFunctionBegin; 973 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 974 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 975 if (iascii) { 976 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 977 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 978 MatInfo info; 979 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 980 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 981 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 982 rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 983 mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 984 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 985 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 986 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 987 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 988 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 989 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 990 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 991 PetscFunctionReturn(0); 992 } else if (format == PETSC_VIEWER_ASCII_INFO) { 993 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 994 PetscFunctionReturn(0); 995 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 996 PetscFunctionReturn(0); 997 } 998 } 999 1000 if (isdraw) { 1001 PetscDraw draw; 1002 PetscTruth isnull; 1003 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1004 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1005 } 1006 1007 if (size == 1) { 1008 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1009 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1010 } else { 1011 /* assemble the entire matrix onto first processor. */ 1012 Mat A; 1013 Mat_SeqBAIJ *Aloc; 1014 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1015 MatScalar *a; 1016 1017 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1018 /* Perhaps this should be the type of mat? */ 1019 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1020 if (!rank) { 1021 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1022 } else { 1023 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1024 } 1025 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1026 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1027 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1028 1029 /* copy over the A part */ 1030 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1031 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1032 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1033 1034 for (i=0; i<mbs; i++) { 1035 rvals[0] = bs*(baij->rstartbs + i); 1036 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1037 for (j=ai[i]; j<ai[i+1]; j++) { 1038 col = (baij->cstartbs+aj[j])*bs; 1039 for (k=0; k<bs; k++) { 1040 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1041 col++; a += bs; 1042 } 1043 } 1044 } 1045 /* copy over the B part */ 1046 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1047 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1048 for (i=0; i<mbs; i++) { 1049 rvals[0] = bs*(baij->rstartbs + i); 1050 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1051 for (j=ai[i]; j<ai[i+1]; j++) { 1052 col = baij->garray[aj[j]]*bs; 1053 for (k=0; k<bs; k++) { 1054 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1055 col++; a += bs; 1056 } 1057 } 1058 } 1059 ierr = PetscFree(rvals);CHKERRQ(ierr); 1060 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1061 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1062 /* 1063 Everyone has to call to draw the matrix since the graphics waits are 1064 synchronized across all processors that share the PetscDraw object 1065 */ 1066 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1067 if (!rank) { 1068 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1069 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1070 } 1071 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1072 ierr = MatDestroy(A);CHKERRQ(ierr); 1073 } 1074 PetscFunctionReturn(0); 1075 } 1076 1077 #undef __FUNCT__ 1078 #define __FUNCT__ "MatView_MPIBAIJ" 1079 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1080 { 1081 PetscErrorCode ierr; 1082 PetscTruth iascii,isdraw,issocket,isbinary; 1083 1084 PetscFunctionBegin; 1085 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1086 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1087 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1088 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1089 if (iascii || isdraw || issocket || isbinary) { 1090 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1091 } else { 1092 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1093 } 1094 PetscFunctionReturn(0); 1095 } 1096 1097 #undef __FUNCT__ 1098 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1099 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1100 { 1101 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1102 PetscErrorCode ierr; 1103 1104 PetscFunctionBegin; 1105 #if defined(PETSC_USE_LOG) 1106 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1107 #endif 1108 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1109 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1110 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1111 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1112 #if defined (PETSC_USE_CTABLE) 1113 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 1114 #else 1115 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1116 #endif 1117 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1118 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1119 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1120 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1121 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1122 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1123 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1124 ierr = PetscFree(baij);CHKERRQ(ierr); 1125 1126 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1127 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1128 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1129 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1130 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1131 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1132 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1133 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1134 PetscFunctionReturn(0); 1135 } 1136 1137 #undef __FUNCT__ 1138 #define __FUNCT__ "MatMult_MPIBAIJ" 1139 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1140 { 1141 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1142 PetscErrorCode ierr; 1143 PetscInt nt; 1144 1145 PetscFunctionBegin; 1146 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1147 if (nt != A->cmap->n) { 1148 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1149 } 1150 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1151 if (nt != A->rmap->n) { 1152 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1153 } 1154 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1155 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1156 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1157 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1158 PetscFunctionReturn(0); 1159 } 1160 1161 #undef __FUNCT__ 1162 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1163 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1164 { 1165 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1166 PetscErrorCode ierr; 1167 1168 PetscFunctionBegin; 1169 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1170 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1171 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1172 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1178 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1179 { 1180 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1181 PetscErrorCode ierr; 1182 PetscTruth merged; 1183 1184 PetscFunctionBegin; 1185 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1186 /* do nondiagonal part */ 1187 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1188 if (!merged) { 1189 /* send it on its way */ 1190 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1191 /* do local part */ 1192 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1193 /* receive remote parts: note this assumes the values are not actually */ 1194 /* inserted in yy until the next line */ 1195 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1196 } else { 1197 /* do local part */ 1198 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1199 /* send it on its way */ 1200 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1201 /* values actually were received in the Begin() but we need to call this nop */ 1202 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1203 } 1204 PetscFunctionReturn(0); 1205 } 1206 1207 #undef __FUNCT__ 1208 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1209 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1210 { 1211 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1212 PetscErrorCode ierr; 1213 1214 PetscFunctionBegin; 1215 /* do nondiagonal part */ 1216 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1217 /* send it on its way */ 1218 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1219 /* do local part */ 1220 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1221 /* receive remote parts: note this assumes the values are not actually */ 1222 /* inserted in yy until the next line, which is true for my implementation*/ 1223 /* but is not perhaps always true. */ 1224 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 /* 1229 This only works correctly for square matrices where the subblock A->A is the 1230 diagonal block 1231 */ 1232 #undef __FUNCT__ 1233 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1234 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1235 { 1236 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1237 PetscErrorCode ierr; 1238 1239 PetscFunctionBegin; 1240 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1241 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1242 PetscFunctionReturn(0); 1243 } 1244 1245 #undef __FUNCT__ 1246 #define __FUNCT__ "MatScale_MPIBAIJ" 1247 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1248 { 1249 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1250 PetscErrorCode ierr; 1251 1252 PetscFunctionBegin; 1253 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1254 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1260 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1261 { 1262 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1263 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1264 PetscErrorCode ierr; 1265 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1266 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1267 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1268 1269 PetscFunctionBegin; 1270 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1271 mat->getrowactive = PETSC_TRUE; 1272 1273 if (!mat->rowvalues && (idx || v)) { 1274 /* 1275 allocate enough space to hold information from the longest row. 1276 */ 1277 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1278 PetscInt max = 1,mbs = mat->mbs,tmp; 1279 for (i=0; i<mbs; i++) { 1280 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1281 if (max < tmp) { max = tmp; } 1282 } 1283 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1284 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1285 } 1286 1287 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1288 lrow = row - brstart; 1289 1290 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1291 if (!v) {pvA = 0; pvB = 0;} 1292 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1293 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1294 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1295 nztot = nzA + nzB; 1296 1297 cmap = mat->garray; 1298 if (v || idx) { 1299 if (nztot) { 1300 /* Sort by increasing column numbers, assuming A and B already sorted */ 1301 PetscInt imark = -1; 1302 if (v) { 1303 *v = v_p = mat->rowvalues; 1304 for (i=0; i<nzB; i++) { 1305 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1306 else break; 1307 } 1308 imark = i; 1309 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1310 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1311 } 1312 if (idx) { 1313 *idx = idx_p = mat->rowindices; 1314 if (imark > -1) { 1315 for (i=0; i<imark; i++) { 1316 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1317 } 1318 } else { 1319 for (i=0; i<nzB; i++) { 1320 if (cmap[cworkB[i]/bs] < cstart) 1321 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1322 else break; 1323 } 1324 imark = i; 1325 } 1326 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1327 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1328 } 1329 } else { 1330 if (idx) *idx = 0; 1331 if (v) *v = 0; 1332 } 1333 } 1334 *nz = nztot; 1335 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1336 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1337 PetscFunctionReturn(0); 1338 } 1339 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1342 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1343 { 1344 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1345 1346 PetscFunctionBegin; 1347 if (!baij->getrowactive) { 1348 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1349 } 1350 baij->getrowactive = PETSC_FALSE; 1351 PetscFunctionReturn(0); 1352 } 1353 1354 #undef __FUNCT__ 1355 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1356 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1357 { 1358 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1359 PetscErrorCode ierr; 1360 1361 PetscFunctionBegin; 1362 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1363 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1364 PetscFunctionReturn(0); 1365 } 1366 1367 #undef __FUNCT__ 1368 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1369 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1370 { 1371 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1372 Mat A = a->A,B = a->B; 1373 PetscErrorCode ierr; 1374 PetscReal isend[5],irecv[5]; 1375 1376 PetscFunctionBegin; 1377 info->block_size = (PetscReal)matin->rmap->bs; 1378 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1379 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1380 isend[3] = info->memory; isend[4] = info->mallocs; 1381 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1382 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1383 isend[3] += info->memory; isend[4] += info->mallocs; 1384 if (flag == MAT_LOCAL) { 1385 info->nz_used = isend[0]; 1386 info->nz_allocated = isend[1]; 1387 info->nz_unneeded = isend[2]; 1388 info->memory = isend[3]; 1389 info->mallocs = isend[4]; 1390 } else if (flag == MAT_GLOBAL_MAX) { 1391 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1392 info->nz_used = irecv[0]; 1393 info->nz_allocated = irecv[1]; 1394 info->nz_unneeded = irecv[2]; 1395 info->memory = irecv[3]; 1396 info->mallocs = irecv[4]; 1397 } else if (flag == MAT_GLOBAL_SUM) { 1398 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1399 info->nz_used = irecv[0]; 1400 info->nz_allocated = irecv[1]; 1401 info->nz_unneeded = irecv[2]; 1402 info->memory = irecv[3]; 1403 info->mallocs = irecv[4]; 1404 } else { 1405 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1406 } 1407 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1408 info->fill_ratio_needed = 0; 1409 info->factor_mallocs = 0; 1410 PetscFunctionReturn(0); 1411 } 1412 1413 #undef __FUNCT__ 1414 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1415 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg) 1416 { 1417 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1418 PetscErrorCode ierr; 1419 1420 PetscFunctionBegin; 1421 switch (op) { 1422 case MAT_NEW_NONZERO_LOCATIONS: 1423 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1424 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1425 case MAT_KEEP_NONZERO_PATTERN: 1426 case MAT_NEW_NONZERO_LOCATION_ERR: 1427 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1428 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1429 break; 1430 case MAT_ROW_ORIENTED: 1431 a->roworiented = flg; 1432 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1433 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1434 break; 1435 case MAT_NEW_DIAGONALS: 1436 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1437 break; 1438 case MAT_IGNORE_OFF_PROC_ENTRIES: 1439 a->donotstash = flg; 1440 break; 1441 case MAT_USE_HASH_TABLE: 1442 a->ht_flag = flg; 1443 break; 1444 case MAT_SYMMETRIC: 1445 case MAT_STRUCTURALLY_SYMMETRIC: 1446 case MAT_HERMITIAN: 1447 case MAT_SYMMETRY_ETERNAL: 1448 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1449 break; 1450 default: 1451 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1452 } 1453 PetscFunctionReturn(0); 1454 } 1455 1456 #undef __FUNCT__ 1457 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1458 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1459 { 1460 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1461 Mat_SeqBAIJ *Aloc; 1462 Mat B; 1463 PetscErrorCode ierr; 1464 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1465 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1466 MatScalar *a; 1467 1468 PetscFunctionBegin; 1469 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1470 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1471 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1472 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1473 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1474 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1475 } else { 1476 B = *matout; 1477 } 1478 1479 /* copy over the A part */ 1480 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1481 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1482 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1483 1484 for (i=0; i<mbs; i++) { 1485 rvals[0] = bs*(baij->rstartbs + i); 1486 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1487 for (j=ai[i]; j<ai[i+1]; j++) { 1488 col = (baij->cstartbs+aj[j])*bs; 1489 for (k=0; k<bs; k++) { 1490 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1491 col++; a += bs; 1492 } 1493 } 1494 } 1495 /* copy over the B part */ 1496 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1497 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1498 for (i=0; i<mbs; i++) { 1499 rvals[0] = bs*(baij->rstartbs + i); 1500 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1501 for (j=ai[i]; j<ai[i+1]; j++) { 1502 col = baij->garray[aj[j]]*bs; 1503 for (k=0; k<bs; k++) { 1504 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1505 col++; a += bs; 1506 } 1507 } 1508 } 1509 ierr = PetscFree(rvals);CHKERRQ(ierr); 1510 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1511 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1512 1513 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1514 *matout = B; 1515 } else { 1516 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1517 } 1518 PetscFunctionReturn(0); 1519 } 1520 1521 #undef __FUNCT__ 1522 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1523 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1524 { 1525 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1526 Mat a = baij->A,b = baij->B; 1527 PetscErrorCode ierr; 1528 PetscInt s1,s2,s3; 1529 1530 PetscFunctionBegin; 1531 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1532 if (rr) { 1533 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1534 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1535 /* Overlap communication with computation. */ 1536 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1537 } 1538 if (ll) { 1539 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1540 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1541 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1542 } 1543 /* scale the diagonal block */ 1544 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1545 1546 if (rr) { 1547 /* Do a scatter end and then right scale the off-diagonal block */ 1548 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1549 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1550 } 1551 1552 PetscFunctionReturn(0); 1553 } 1554 1555 #undef __FUNCT__ 1556 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1557 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 1558 { 1559 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1560 PetscErrorCode ierr; 1561 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1562 PetscInt i,*owners = A->rmap->range; 1563 PetscInt *nprocs,j,idx,nsends,row; 1564 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1565 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1566 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1567 MPI_Comm comm = ((PetscObject)A)->comm; 1568 MPI_Request *send_waits,*recv_waits; 1569 MPI_Status recv_status,*send_status; 1570 #if defined(PETSC_DEBUG) 1571 PetscTruth found = PETSC_FALSE; 1572 #endif 1573 1574 PetscFunctionBegin; 1575 /* first count number of contributors to each processor */ 1576 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1577 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1578 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1579 j = 0; 1580 for (i=0; i<N; i++) { 1581 if (lastidx > (idx = rows[i])) j = 0; 1582 lastidx = idx; 1583 for (; j<size; j++) { 1584 if (idx >= owners[j] && idx < owners[j+1]) { 1585 nprocs[2*j]++; 1586 nprocs[2*j+1] = 1; 1587 owner[i] = j; 1588 #if defined(PETSC_DEBUG) 1589 found = PETSC_TRUE; 1590 #endif 1591 break; 1592 } 1593 } 1594 #if defined(PETSC_DEBUG) 1595 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1596 found = PETSC_FALSE; 1597 #endif 1598 } 1599 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1600 1601 /* inform other processors of number of messages and max length*/ 1602 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1603 1604 /* post receives: */ 1605 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1606 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1607 for (i=0; i<nrecvs; i++) { 1608 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1609 } 1610 1611 /* do sends: 1612 1) starts[i] gives the starting index in svalues for stuff going to 1613 the ith processor 1614 */ 1615 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1616 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1617 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1618 starts[0] = 0; 1619 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1620 for (i=0; i<N; i++) { 1621 svalues[starts[owner[i]]++] = rows[i]; 1622 } 1623 1624 starts[0] = 0; 1625 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1626 count = 0; 1627 for (i=0; i<size; i++) { 1628 if (nprocs[2*i+1]) { 1629 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1630 } 1631 } 1632 ierr = PetscFree(starts);CHKERRQ(ierr); 1633 1634 base = owners[rank]; 1635 1636 /* wait on receives */ 1637 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 1638 source = lens + nrecvs; 1639 count = nrecvs; slen = 0; 1640 while (count) { 1641 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1642 /* unpack receives into our local space */ 1643 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1644 source[imdex] = recv_status.MPI_SOURCE; 1645 lens[imdex] = n; 1646 slen += n; 1647 count--; 1648 } 1649 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1650 1651 /* move the data into the send scatter */ 1652 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1653 count = 0; 1654 for (i=0; i<nrecvs; i++) { 1655 values = rvalues + i*nmax; 1656 for (j=0; j<lens[i]; j++) { 1657 lrows[count++] = values[j] - base; 1658 } 1659 } 1660 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1661 ierr = PetscFree(lens);CHKERRQ(ierr); 1662 ierr = PetscFree(owner);CHKERRQ(ierr); 1663 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1664 1665 /* actually zap the local rows */ 1666 /* 1667 Zero the required rows. If the "diagonal block" of the matrix 1668 is square and the user wishes to set the diagonal we use separate 1669 code so that MatSetValues() is not called for each diagonal allocating 1670 new memory, thus calling lots of mallocs and slowing things down. 1671 1672 */ 1673 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1674 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1675 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1676 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1677 } else if (diag != 0.0) { 1678 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1679 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1680 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1681 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1682 } 1683 for (i=0; i<slen; i++) { 1684 row = lrows[i] + rstart_bs; 1685 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1686 } 1687 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1688 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1689 } else { 1690 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1691 } 1692 1693 ierr = PetscFree(lrows);CHKERRQ(ierr); 1694 1695 /* wait on sends */ 1696 if (nsends) { 1697 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1698 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1699 ierr = PetscFree(send_status);CHKERRQ(ierr); 1700 } 1701 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1702 ierr = PetscFree(svalues);CHKERRQ(ierr); 1703 1704 PetscFunctionReturn(0); 1705 } 1706 1707 #undef __FUNCT__ 1708 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1709 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1710 { 1711 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1712 PetscErrorCode ierr; 1713 1714 PetscFunctionBegin; 1715 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1716 PetscFunctionReturn(0); 1717 } 1718 1719 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1720 1721 #undef __FUNCT__ 1722 #define __FUNCT__ "MatEqual_MPIBAIJ" 1723 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1724 { 1725 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1726 Mat a,b,c,d; 1727 PetscTruth flg; 1728 PetscErrorCode ierr; 1729 1730 PetscFunctionBegin; 1731 a = matA->A; b = matA->B; 1732 c = matB->A; d = matB->B; 1733 1734 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1735 if (flg) { 1736 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1737 } 1738 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1739 PetscFunctionReturn(0); 1740 } 1741 1742 #undef __FUNCT__ 1743 #define __FUNCT__ "MatCopy_MPIBAIJ" 1744 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1745 { 1746 PetscErrorCode ierr; 1747 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1748 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1749 1750 PetscFunctionBegin; 1751 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1752 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1753 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1754 } else { 1755 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1756 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1757 } 1758 PetscFunctionReturn(0); 1759 } 1760 1761 #undef __FUNCT__ 1762 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1763 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1764 { 1765 PetscErrorCode ierr; 1766 1767 PetscFunctionBegin; 1768 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1769 PetscFunctionReturn(0); 1770 } 1771 1772 #include "petscblaslapack.h" 1773 #undef __FUNCT__ 1774 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1775 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1776 { 1777 PetscErrorCode ierr; 1778 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1779 PetscBLASInt bnz,one=1; 1780 Mat_SeqBAIJ *x,*y; 1781 1782 PetscFunctionBegin; 1783 if (str == SAME_NONZERO_PATTERN) { 1784 PetscScalar alpha = a; 1785 x = (Mat_SeqBAIJ *)xx->A->data; 1786 y = (Mat_SeqBAIJ *)yy->A->data; 1787 bnz = PetscBLASIntCast(x->nz); 1788 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1789 x = (Mat_SeqBAIJ *)xx->B->data; 1790 y = (Mat_SeqBAIJ *)yy->B->data; 1791 bnz = PetscBLASIntCast(x->nz); 1792 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1793 } else { 1794 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1795 } 1796 PetscFunctionReturn(0); 1797 } 1798 1799 #undef __FUNCT__ 1800 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ" 1801 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs) 1802 { 1803 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1804 PetscInt rbs,cbs; 1805 PetscErrorCode ierr; 1806 1807 PetscFunctionBegin; 1808 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1809 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1810 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 1811 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 1812 if (rbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 1813 if (cbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 1814 PetscFunctionReturn(0); 1815 } 1816 1817 #undef __FUNCT__ 1818 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1819 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1820 { 1821 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1822 PetscErrorCode ierr; 1823 1824 PetscFunctionBegin; 1825 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1826 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1827 PetscFunctionReturn(0); 1828 } 1829 1830 #undef __FUNCT__ 1831 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1832 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1833 { 1834 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1835 PetscErrorCode ierr; 1836 1837 PetscFunctionBegin; 1838 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1839 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1840 PetscFunctionReturn(0); 1841 } 1842 1843 #undef __FUNCT__ 1844 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 1845 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1846 { 1847 PetscErrorCode ierr; 1848 IS iscol_local; 1849 PetscInt csize; 1850 1851 PetscFunctionBegin; 1852 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1853 if (call == MAT_REUSE_MATRIX) { 1854 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1855 if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1856 } else { 1857 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1858 } 1859 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1860 if (call == MAT_INITIAL_MATRIX) { 1861 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1862 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 1863 } 1864 PetscFunctionReturn(0); 1865 } 1866 1867 #undef __FUNCT__ 1868 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 1869 /* 1870 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1871 in local and then by concatenating the local matrices the end result. 1872 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 1873 */ 1874 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 1875 { 1876 PetscErrorCode ierr; 1877 PetscMPIInt rank,size; 1878 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 1879 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 1880 Mat *local,M,Mreuse; 1881 MatScalar *vwork,*aa; 1882 MPI_Comm comm = ((PetscObject)mat)->comm; 1883 Mat_SeqBAIJ *aij; 1884 1885 1886 PetscFunctionBegin; 1887 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1888 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1889 1890 if (call == MAT_REUSE_MATRIX) { 1891 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1892 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1893 local = &Mreuse; 1894 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1895 } else { 1896 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1897 Mreuse = *local; 1898 ierr = PetscFree(local);CHKERRQ(ierr); 1899 } 1900 1901 /* 1902 m - number of local rows 1903 n - number of columns (same on all processors) 1904 rstart - first row in new global matrix generated 1905 */ 1906 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 1907 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1908 m = m/bs; 1909 n = n/bs; 1910 1911 if (call == MAT_INITIAL_MATRIX) { 1912 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1913 ii = aij->i; 1914 jj = aij->j; 1915 1916 /* 1917 Determine the number of non-zeros in the diagonal and off-diagonal 1918 portions of the matrix in order to do correct preallocation 1919 */ 1920 1921 /* first get start and end of "diagonal" columns */ 1922 if (csize == PETSC_DECIDE) { 1923 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 1924 if (mglobal == n*bs) { /* square matrix */ 1925 nlocal = m; 1926 } else { 1927 nlocal = n/size + ((n % size) > rank); 1928 } 1929 } else { 1930 nlocal = csize/bs; 1931 } 1932 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1933 rstart = rend - nlocal; 1934 if (rank == size - 1 && rend != n) { 1935 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 1936 } 1937 1938 /* next, compute all the lengths */ 1939 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 1940 olens = dlens + m; 1941 for (i=0; i<m; i++) { 1942 jend = ii[i+1] - ii[i]; 1943 olen = 0; 1944 dlen = 0; 1945 for (j=0; j<jend; j++) { 1946 if (*jj < rstart || *jj >= rend) olen++; 1947 else dlen++; 1948 jj++; 1949 } 1950 olens[i] = olen; 1951 dlens[i] = dlen; 1952 } 1953 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 1954 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 1955 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 1956 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 1957 ierr = PetscFree(dlens);CHKERRQ(ierr); 1958 } else { 1959 PetscInt ml,nl; 1960 1961 M = *newmat; 1962 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1963 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1964 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1965 /* 1966 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1967 rather than the slower MatSetValues(). 1968 */ 1969 M->was_assembled = PETSC_TRUE; 1970 M->assembled = PETSC_FALSE; 1971 } 1972 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1973 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1974 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1975 ii = aij->i; 1976 jj = aij->j; 1977 aa = aij->a; 1978 for (i=0; i<m; i++) { 1979 row = rstart/bs + i; 1980 nz = ii[i+1] - ii[i]; 1981 cwork = jj; jj += nz; 1982 vwork = aa; aa += nz; 1983 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1984 } 1985 1986 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1987 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1988 *newmat = M; 1989 1990 /* save submatrix used in processor for next request */ 1991 if (call == MAT_INITIAL_MATRIX) { 1992 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 1993 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 1994 } 1995 1996 PetscFunctionReturn(0); 1997 } 1998 1999 #undef __FUNCT__ 2000 #define __FUNCT__ "MatPermute_MPIBAIJ" 2001 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2002 { 2003 MPI_Comm comm,pcomm; 2004 PetscInt first,local_size,nrows; 2005 const PetscInt *rows; 2006 PetscMPIInt size; 2007 IS crowp,growp,irowp,lrowp,lcolp,icolp; 2008 PetscErrorCode ierr; 2009 2010 PetscFunctionBegin; 2011 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2012 /* make a collective version of 'rowp' */ 2013 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2014 if (pcomm==comm) { 2015 crowp = rowp; 2016 } else { 2017 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2018 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2019 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 2020 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2021 } 2022 /* collect the global row permutation and invert it */ 2023 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2024 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2025 if (pcomm!=comm) { 2026 ierr = ISDestroy(crowp);CHKERRQ(ierr); 2027 } 2028 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2029 /* get the local target indices */ 2030 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2031 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2032 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2033 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 2034 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2035 ierr = ISDestroy(irowp);CHKERRQ(ierr); 2036 /* the column permutation is so much easier; 2037 make a local version of 'colp' and invert it */ 2038 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2039 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2040 if (size==1) { 2041 lcolp = colp; 2042 } else { 2043 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2044 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2045 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 2046 } 2047 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2048 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2049 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2050 if (size>1) { 2051 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2052 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 2053 } 2054 /* now we just get the submatrix */ 2055 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2056 /* clean up */ 2057 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 2058 ierr = ISDestroy(icolp);CHKERRQ(ierr); 2059 PetscFunctionReturn(0); 2060 } 2061 2062 #undef __FUNCT__ 2063 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2064 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2065 { 2066 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2067 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2068 2069 PetscFunctionBegin; 2070 if (nghosts) { *nghosts = B->nbs;} 2071 if (ghosts) {*ghosts = baij->garray;} 2072 PetscFunctionReturn(0); 2073 } 2074 2075 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat); 2076 2077 #undef __FUNCT__ 2078 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2079 /* 2080 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2081 */ 2082 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2083 { 2084 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2085 PetscErrorCode ierr; 2086 PetscMPIInt size,*ncolsonproc,*disp,nn; 2087 PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col; 2088 const PetscInt *is; 2089 PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj; 2090 PetscInt *rowhit,M,cstart,cend,colb; 2091 PetscInt *columnsforrow,l; 2092 IS *isa; 2093 PetscTruth done,flg; 2094 ISLocalToGlobalMapping map = mat->bmapping; 2095 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2096 2097 PetscFunctionBegin; 2098 if (!mat->assembled) { 2099 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2100 } 2101 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2102 2103 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2104 2105 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2106 M = mat->rmap->n/bs; 2107 cstart = mat->cmap->rstart/bs; 2108 cend = mat->cmap->rend/bs; 2109 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2110 c->N = mat->cmap->N/bs; 2111 c->m = mat->rmap->n/bs; 2112 c->rstart = mat->rmap->rstart/bs; 2113 2114 c->ncolors = nis; 2115 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2116 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2117 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2118 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2119 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2120 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2121 2122 /* Allow access to data structures of local part of matrix */ 2123 if (!baij->colmap) { 2124 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2125 } 2126 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2127 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2128 2129 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2130 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2131 2132 for (i=0; i<nis; i++) { 2133 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2134 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2135 c->ncolumns[i] = n; 2136 if (n) { 2137 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2138 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2139 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2140 } else { 2141 c->columns[i] = 0; 2142 } 2143 2144 if (ctype == IS_COLORING_GLOBAL){ 2145 /* Determine the total (parallel) number of columns of this color */ 2146 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2147 ierr = PetscMalloc(2*size*sizeof(PetscInt*),&ncolsonproc);CHKERRQ(ierr); 2148 disp = ncolsonproc + size; 2149 2150 nn = PetscMPIIntCast(n); 2151 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2152 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2153 if (!nctot) { 2154 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2155 } 2156 2157 disp[0] = 0; 2158 for (j=1; j<size; j++) { 2159 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2160 } 2161 2162 /* Get complete list of columns for color on each processor */ 2163 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2164 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2165 ierr = PetscFree(ncolsonproc);CHKERRQ(ierr); 2166 } else if (ctype == IS_COLORING_GHOSTED){ 2167 /* Determine local number of columns of this color on this process, including ghost points */ 2168 nctot = n; 2169 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2170 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2171 } else { 2172 SETERRQ(PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2173 } 2174 2175 /* 2176 Mark all rows affect by these columns 2177 */ 2178 /* Temporary option to allow for debugging/testing */ 2179 flg = PETSC_FALSE; 2180 ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2181 if (!flg) {/*-----------------------------------------------------------------------------*/ 2182 /* crude, fast version */ 2183 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2184 /* loop over columns*/ 2185 for (j=0; j<nctot; j++) { 2186 if (ctype == IS_COLORING_GHOSTED) { 2187 col = ltog[cols[j]]; 2188 } else { 2189 col = cols[j]; 2190 } 2191 if (col >= cstart && col < cend) { 2192 /* column is in diagonal block of matrix */ 2193 rows = A_cj + A_ci[col-cstart]; 2194 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2195 } else { 2196 #if defined (PETSC_USE_CTABLE) 2197 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr) 2198 colb --; 2199 #else 2200 colb = baij->colmap[col] - 1; 2201 #endif 2202 if (colb == -1) { 2203 m = 0; 2204 } else { 2205 colb = colb/bs; 2206 rows = B_cj + B_ci[colb]; 2207 m = B_ci[colb+1] - B_ci[colb]; 2208 } 2209 } 2210 /* loop over columns marking them in rowhit */ 2211 for (k=0; k<m; k++) { 2212 rowhit[*rows++] = col + 1; 2213 } 2214 } 2215 2216 /* count the number of hits */ 2217 nrows = 0; 2218 for (j=0; j<M; j++) { 2219 if (rowhit[j]) nrows++; 2220 } 2221 c->nrows[i] = nrows; 2222 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2223 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2224 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2225 nrows = 0; 2226 for (j=0; j<M; j++) { 2227 if (rowhit[j]) { 2228 c->rows[i][nrows] = j; 2229 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2230 nrows++; 2231 } 2232 } 2233 } else {/*-------------------------------------------------------------------------------*/ 2234 /* slow version, using rowhit as a linked list */ 2235 PetscInt currentcol,fm,mfm; 2236 rowhit[M] = M; 2237 nrows = 0; 2238 /* loop over columns*/ 2239 for (j=0; j<nctot; j++) { 2240 if (ctype == IS_COLORING_GHOSTED) { 2241 col = ltog[cols[j]]; 2242 } else { 2243 col = cols[j]; 2244 } 2245 if (col >= cstart && col < cend) { 2246 /* column is in diagonal block of matrix */ 2247 rows = A_cj + A_ci[col-cstart]; 2248 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2249 } else { 2250 #if defined (PETSC_USE_CTABLE) 2251 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2252 colb --; 2253 #else 2254 colb = baij->colmap[col] - 1; 2255 #endif 2256 if (colb == -1) { 2257 m = 0; 2258 } else { 2259 colb = colb/bs; 2260 rows = B_cj + B_ci[colb]; 2261 m = B_ci[colb+1] - B_ci[colb]; 2262 } 2263 } 2264 2265 /* loop over columns marking them in rowhit */ 2266 fm = M; /* fm points to first entry in linked list */ 2267 for (k=0; k<m; k++) { 2268 currentcol = *rows++; 2269 /* is it already in the list? */ 2270 do { 2271 mfm = fm; 2272 fm = rowhit[fm]; 2273 } while (fm < currentcol); 2274 /* not in list so add it */ 2275 if (fm != currentcol) { 2276 nrows++; 2277 columnsforrow[currentcol] = col; 2278 /* next three lines insert new entry into linked list */ 2279 rowhit[mfm] = currentcol; 2280 rowhit[currentcol] = fm; 2281 fm = currentcol; 2282 /* fm points to present position in list since we know the columns are sorted */ 2283 } else { 2284 SETERRQ(PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2285 } 2286 } 2287 } 2288 c->nrows[i] = nrows; 2289 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2290 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2291 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2292 /* now store the linked list of rows into c->rows[i] */ 2293 nrows = 0; 2294 fm = rowhit[M]; 2295 do { 2296 c->rows[i][nrows] = fm; 2297 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2298 fm = rowhit[fm]; 2299 } while (fm < M); 2300 } /* ---------------------------------------------------------------------------------------*/ 2301 ierr = PetscFree(cols);CHKERRQ(ierr); 2302 } 2303 2304 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2305 /* 2306 vscale will contain the "diagonal" on processor scalings followed by the off processor 2307 */ 2308 if (ctype == IS_COLORING_GLOBAL) { 2309 PetscInt *garray; 2310 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2311 for (i=0; i<baij->B->cmap->n/bs; i++) { 2312 for (j=0; j<bs; j++) { 2313 garray[i*bs+j] = bs*baij->garray[i]+j; 2314 } 2315 } 2316 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2317 ierr = PetscFree(garray);CHKERRQ(ierr); 2318 CHKMEMQ; 2319 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2320 for (k=0; k<c->ncolors; k++) { 2321 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2322 for (l=0; l<c->nrows[k]; l++) { 2323 col = c->columnsforrow[k][l]; 2324 if (col >= cstart && col < cend) { 2325 /* column is in diagonal block of matrix */ 2326 colb = col - cstart; 2327 } else { 2328 /* column is in "off-processor" part */ 2329 #if defined (PETSC_USE_CTABLE) 2330 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2331 colb --; 2332 #else 2333 colb = baij->colmap[col] - 1; 2334 #endif 2335 colb = colb/bs; 2336 colb += cend - cstart; 2337 } 2338 c->vscaleforrow[k][l] = colb; 2339 } 2340 } 2341 } else if (ctype == IS_COLORING_GHOSTED) { 2342 /* Get gtol mapping */ 2343 PetscInt N = mat->cmap->N, *gtol; 2344 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2345 for (i=0; i<N; i++) gtol[i] = -1; 2346 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2347 2348 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2349 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2350 for (k=0; k<c->ncolors; k++) { 2351 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2352 for (l=0; l<c->nrows[k]; l++) { 2353 col = c->columnsforrow[k][l]; /* global column index */ 2354 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2355 } 2356 } 2357 ierr = PetscFree(gtol);CHKERRQ(ierr); 2358 } 2359 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2360 2361 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2362 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2363 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2364 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2365 CHKMEMQ; 2366 PetscFunctionReturn(0); 2367 } 2368 2369 #undef __FUNCT__ 2370 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ" 2371 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat) 2372 { 2373 Mat B; 2374 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2375 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2376 Mat_SeqAIJ *b; 2377 PetscErrorCode ierr; 2378 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2379 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2380 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2381 2382 PetscFunctionBegin; 2383 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2384 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2385 2386 /* ---------------------------------------------------------------- 2387 Tell every processor the number of nonzeros per row 2388 */ 2389 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2390 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2391 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2392 } 2393 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2394 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2395 displs = recvcounts + size; 2396 for (i=0; i<size; i++) { 2397 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2398 displs[i] = A->rmap->range[i]/bs; 2399 } 2400 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2401 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2402 #else 2403 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2404 #endif 2405 /* --------------------------------------------------------------- 2406 Create the sequential matrix of the same type as the local block diagonal 2407 */ 2408 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2409 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2410 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2411 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2412 b = (Mat_SeqAIJ *)B->data; 2413 2414 /*-------------------------------------------------------------------- 2415 Copy my part of matrix column indices over 2416 */ 2417 sendcount = ad->nz + bd->nz; 2418 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2419 a_jsendbuf = ad->j; 2420 b_jsendbuf = bd->j; 2421 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2422 cnt = 0; 2423 for (i=0; i<n; i++) { 2424 2425 /* put in lower diagonal portion */ 2426 m = bd->i[i+1] - bd->i[i]; 2427 while (m > 0) { 2428 /* is it above diagonal (in bd (compressed) numbering) */ 2429 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2430 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2431 m--; 2432 } 2433 2434 /* put in diagonal portion */ 2435 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2436 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2437 } 2438 2439 /* put in upper diagonal portion */ 2440 while (m-- > 0) { 2441 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2442 } 2443 } 2444 if (cnt != sendcount) SETERRQ2(PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2445 2446 /*-------------------------------------------------------------------- 2447 Gather all column indices to all processors 2448 */ 2449 for (i=0; i<size; i++) { 2450 recvcounts[i] = 0; 2451 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2452 recvcounts[i] += lens[j]; 2453 } 2454 } 2455 displs[0] = 0; 2456 for (i=1; i<size; i++) { 2457 displs[i] = displs[i-1] + recvcounts[i-1]; 2458 } 2459 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2460 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2461 #else 2462 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2463 #endif 2464 /*-------------------------------------------------------------------- 2465 Assemble the matrix into useable form (note numerical values not yet set) 2466 */ 2467 /* set the b->ilen (length of each row) values */ 2468 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2469 /* set the b->i indices */ 2470 b->i[0] = 0; 2471 for (i=1; i<=A->rmap->N/bs; i++) { 2472 b->i[i] = b->i[i-1] + lens[i-1]; 2473 } 2474 ierr = PetscFree(lens);CHKERRQ(ierr); 2475 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2476 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2477 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2478 2479 if (A->symmetric){ 2480 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2481 } else if (A->hermitian) { 2482 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2483 } else if (A->structurally_symmetric) { 2484 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2485 } 2486 *newmat = B; 2487 PetscFunctionReturn(0); 2488 } 2489 2490 #undef __FUNCT__ 2491 #define __FUNCT__ "MatSOR_MPIBAIJ" 2492 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2493 { 2494 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2495 PetscErrorCode ierr; 2496 Vec bb1 = 0; 2497 2498 PetscFunctionBegin; 2499 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2500 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2501 } 2502 2503 if (flag == SOR_APPLY_UPPER) { 2504 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2505 PetscFunctionReturn(0); 2506 } 2507 2508 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2509 if (flag & SOR_ZERO_INITIAL_GUESS) { 2510 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2511 its--; 2512 } 2513 2514 while (its--) { 2515 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2516 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2517 2518 /* update rhs: bb1 = bb - B*x */ 2519 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2520 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2521 2522 /* local sweep */ 2523 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2524 } 2525 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2526 if (flag & SOR_ZERO_INITIAL_GUESS) { 2527 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2528 its--; 2529 } 2530 while (its--) { 2531 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2532 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2533 2534 /* update rhs: bb1 = bb - B*x */ 2535 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2536 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2537 2538 /* local sweep */ 2539 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2540 } 2541 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2542 if (flag & SOR_ZERO_INITIAL_GUESS) { 2543 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2544 its--; 2545 } 2546 while (its--) { 2547 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2548 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2549 2550 /* update rhs: bb1 = bb - B*x */ 2551 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2552 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2553 2554 /* local sweep */ 2555 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2556 } 2557 } else { 2558 SETERRQ(PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2559 } 2560 2561 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 2562 PetscFunctionReturn(0); 2563 } 2564 2565 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2566 2567 2568 /* -------------------------------------------------------------------*/ 2569 static struct _MatOps MatOps_Values = { 2570 MatSetValues_MPIBAIJ, 2571 MatGetRow_MPIBAIJ, 2572 MatRestoreRow_MPIBAIJ, 2573 MatMult_MPIBAIJ, 2574 /* 4*/ MatMultAdd_MPIBAIJ, 2575 MatMultTranspose_MPIBAIJ, 2576 MatMultTransposeAdd_MPIBAIJ, 2577 0, 2578 0, 2579 0, 2580 /*10*/ 0, 2581 0, 2582 0, 2583 MatSOR_MPIBAIJ, 2584 MatTranspose_MPIBAIJ, 2585 /*15*/ MatGetInfo_MPIBAIJ, 2586 MatEqual_MPIBAIJ, 2587 MatGetDiagonal_MPIBAIJ, 2588 MatDiagonalScale_MPIBAIJ, 2589 MatNorm_MPIBAIJ, 2590 /*20*/ MatAssemblyBegin_MPIBAIJ, 2591 MatAssemblyEnd_MPIBAIJ, 2592 MatSetOption_MPIBAIJ, 2593 MatZeroEntries_MPIBAIJ, 2594 /*24*/ MatZeroRows_MPIBAIJ, 2595 0, 2596 0, 2597 0, 2598 0, 2599 /*29*/ MatSetUpPreallocation_MPIBAIJ, 2600 0, 2601 0, 2602 0, 2603 0, 2604 /*34*/ MatDuplicate_MPIBAIJ, 2605 0, 2606 0, 2607 0, 2608 0, 2609 /*39*/ MatAXPY_MPIBAIJ, 2610 MatGetSubMatrices_MPIBAIJ, 2611 MatIncreaseOverlap_MPIBAIJ, 2612 MatGetValues_MPIBAIJ, 2613 MatCopy_MPIBAIJ, 2614 /*44*/ 0, 2615 MatScale_MPIBAIJ, 2616 0, 2617 0, 2618 0, 2619 /*49*/ MatSetBlockSize_MPIBAIJ, 2620 0, 2621 0, 2622 0, 2623 0, 2624 /*54*/ MatFDColoringCreate_MPIBAIJ, 2625 0, 2626 MatSetUnfactored_MPIBAIJ, 2627 MatPermute_MPIBAIJ, 2628 MatSetValuesBlocked_MPIBAIJ, 2629 /*59*/ MatGetSubMatrix_MPIBAIJ, 2630 MatDestroy_MPIBAIJ, 2631 MatView_MPIBAIJ, 2632 0, 2633 0, 2634 /*64*/ 0, 2635 0, 2636 0, 2637 0, 2638 0, 2639 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2640 0, 2641 0, 2642 0, 2643 0, 2644 /*74*/ 0, 2645 MatFDColoringApply_BAIJ, 2646 0, 2647 0, 2648 0, 2649 /*79*/ 0, 2650 0, 2651 0, 2652 0, 2653 MatLoad_MPIBAIJ, 2654 /*84*/ 0, 2655 0, 2656 0, 2657 0, 2658 0, 2659 /*89*/ 0, 2660 0, 2661 0, 2662 0, 2663 0, 2664 /*94*/ 0, 2665 0, 2666 0, 2667 0, 2668 0, 2669 /*99*/ 0, 2670 0, 2671 0, 2672 0, 2673 0, 2674 /*104*/0, 2675 MatRealPart_MPIBAIJ, 2676 MatImaginaryPart_MPIBAIJ, 2677 0, 2678 0, 2679 /*109*/0, 2680 0, 2681 0, 2682 0, 2683 0, 2684 /*114*/MatGetSeqNonzerostructure_MPIBAIJ, 2685 0, 2686 MatGetGhosts_MPIBAIJ 2687 }; 2688 2689 EXTERN_C_BEGIN 2690 #undef __FUNCT__ 2691 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2692 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2693 { 2694 PetscFunctionBegin; 2695 *a = ((Mat_MPIBAIJ *)A->data)->A; 2696 *iscopy = PETSC_FALSE; 2697 PetscFunctionReturn(0); 2698 } 2699 EXTERN_C_END 2700 2701 EXTERN_C_BEGIN 2702 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2703 EXTERN_C_END 2704 2705 EXTERN_C_BEGIN 2706 #undef __FUNCT__ 2707 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2708 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2709 { 2710 PetscInt m,rstart,cstart,cend; 2711 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2712 const PetscInt *JJ=0; 2713 PetscScalar *values=0; 2714 PetscErrorCode ierr; 2715 2716 PetscFunctionBegin; 2717 2718 if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2719 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2720 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2721 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2722 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2723 m = B->rmap->n/bs; 2724 rstart = B->rmap->rstart/bs; 2725 cstart = B->cmap->rstart/bs; 2726 cend = B->cmap->rend/bs; 2727 2728 if (ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2729 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2730 o_nnz = d_nnz + m; 2731 for (i=0; i<m; i++) { 2732 nz = ii[i+1] - ii[i]; 2733 if (nz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2734 nz_max = PetscMax(nz_max,nz); 2735 JJ = jj + ii[i]; 2736 for (j=0; j<nz; j++) { 2737 if (*JJ >= cstart) break; 2738 JJ++; 2739 } 2740 d = 0; 2741 for (; j<nz; j++) { 2742 if (*JJ++ >= cend) break; 2743 d++; 2744 } 2745 d_nnz[i] = d; 2746 o_nnz[i] = nz - d; 2747 } 2748 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2749 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2750 2751 values = (PetscScalar*)V; 2752 if (!values) { 2753 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2754 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2755 } 2756 for (i=0; i<m; i++) { 2757 PetscInt row = i + rstart; 2758 PetscInt ncols = ii[i+1] - ii[i]; 2759 const PetscInt *icols = jj + ii[i]; 2760 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2761 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2762 } 2763 2764 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2765 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2766 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2767 2768 PetscFunctionReturn(0); 2769 } 2770 EXTERN_C_END 2771 2772 #undef __FUNCT__ 2773 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2774 /*@C 2775 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2776 (the default parallel PETSc format). 2777 2778 Collective on MPI_Comm 2779 2780 Input Parameters: 2781 + A - the matrix 2782 . i - the indices into j for the start of each local row (starts with zero) 2783 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2784 - v - optional values in the matrix 2785 2786 Level: developer 2787 2788 .keywords: matrix, aij, compressed row, sparse, parallel 2789 2790 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2791 @*/ 2792 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2793 { 2794 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2795 2796 PetscFunctionBegin; 2797 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2798 if (f) { 2799 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2800 } 2801 PetscFunctionReturn(0); 2802 } 2803 2804 EXTERN_C_BEGIN 2805 #undef __FUNCT__ 2806 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2807 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2808 { 2809 Mat_MPIBAIJ *b; 2810 PetscErrorCode ierr; 2811 PetscInt i, newbs = PetscAbs(bs); 2812 2813 PetscFunctionBegin; 2814 if (bs < 0) { 2815 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2816 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2817 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2818 bs = PetscAbs(bs); 2819 } 2820 if ((d_nnz || o_nnz) && newbs != bs) { 2821 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 2822 } 2823 bs = newbs; 2824 2825 2826 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2827 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2828 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2829 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2830 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2831 2832 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2833 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2834 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2835 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2836 2837 if (d_nnz) { 2838 for (i=0; i<B->rmap->n/bs; i++) { 2839 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2840 } 2841 } 2842 if (o_nnz) { 2843 for (i=0; i<B->rmap->n/bs; i++) { 2844 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2845 } 2846 } 2847 2848 b = (Mat_MPIBAIJ*)B->data; 2849 b->bs2 = bs*bs; 2850 b->mbs = B->rmap->n/bs; 2851 b->nbs = B->cmap->n/bs; 2852 b->Mbs = B->rmap->N/bs; 2853 b->Nbs = B->cmap->N/bs; 2854 2855 for (i=0; i<=b->size; i++) { 2856 b->rangebs[i] = B->rmap->range[i]/bs; 2857 } 2858 b->rstartbs = B->rmap->rstart/bs; 2859 b->rendbs = B->rmap->rend/bs; 2860 b->cstartbs = B->cmap->rstart/bs; 2861 b->cendbs = B->cmap->rend/bs; 2862 2863 if (!B->preallocated) { 2864 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2865 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2866 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2867 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2868 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2869 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2870 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2871 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2872 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 2873 } 2874 2875 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2876 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2877 B->preallocated = PETSC_TRUE; 2878 PetscFunctionReturn(0); 2879 } 2880 EXTERN_C_END 2881 2882 EXTERN_C_BEGIN 2883 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2884 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2885 EXTERN_C_END 2886 2887 2888 EXTERN_C_BEGIN 2889 #undef __FUNCT__ 2890 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 2891 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj) 2892 { 2893 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2894 PetscErrorCode ierr; 2895 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2896 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2897 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2898 2899 PetscFunctionBegin; 2900 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 2901 ii[0] = 0; 2902 CHKMEMQ; 2903 for (i=0; i<M; i++) { 2904 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2905 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2906 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2907 /* remove one from count of matrix has diagonal */ 2908 for (j=id[i]; j<id[i+1]; j++) { 2909 if (jd[j] == i) {ii[i+1]--;break;} 2910 } 2911 CHKMEMQ; 2912 } 2913 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2914 cnt = 0; 2915 for (i=0; i<M; i++) { 2916 for (j=io[i]; j<io[i+1]; j++) { 2917 if (garray[jo[j]] > rstart) break; 2918 jj[cnt++] = garray[jo[j]]; 2919 CHKMEMQ; 2920 } 2921 for (k=id[i]; k<id[i+1]; k++) { 2922 if (jd[k] != i) { 2923 jj[cnt++] = rstart + jd[k]; 2924 CHKMEMQ; 2925 } 2926 } 2927 for (;j<io[i+1]; j++) { 2928 jj[cnt++] = garray[jo[j]]; 2929 CHKMEMQ; 2930 } 2931 } 2932 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 2933 PetscFunctionReturn(0); 2934 } 2935 EXTERN_C_END 2936 2937 /*MC 2938 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2939 2940 Options Database Keys: 2941 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2942 . -mat_block_size <bs> - set the blocksize used to store the matrix 2943 - -mat_use_hash_table <fact> 2944 2945 Level: beginner 2946 2947 .seealso: MatCreateMPIBAIJ 2948 M*/ 2949 2950 EXTERN_C_BEGIN 2951 #undef __FUNCT__ 2952 #define __FUNCT__ "MatCreate_MPIBAIJ" 2953 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2954 { 2955 Mat_MPIBAIJ *b; 2956 PetscErrorCode ierr; 2957 PetscTruth flg; 2958 2959 PetscFunctionBegin; 2960 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2961 B->data = (void*)b; 2962 2963 2964 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2965 B->mapping = 0; 2966 B->assembled = PETSC_FALSE; 2967 2968 B->insertmode = NOT_SET_VALUES; 2969 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 2970 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 2971 2972 /* build local table of row and column ownerships */ 2973 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 2974 2975 /* build cache for off array entries formed */ 2976 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 2977 b->donotstash = PETSC_FALSE; 2978 b->colmap = PETSC_NULL; 2979 b->garray = PETSC_NULL; 2980 b->roworiented = PETSC_TRUE; 2981 2982 /* stuff used in block assembly */ 2983 b->barray = 0; 2984 2985 /* stuff used for matrix vector multiply */ 2986 b->lvec = 0; 2987 b->Mvctx = 0; 2988 2989 /* stuff for MatGetRow() */ 2990 b->rowindices = 0; 2991 b->rowvalues = 0; 2992 b->getrowactive = PETSC_FALSE; 2993 2994 /* hash table stuff */ 2995 b->ht = 0; 2996 b->hd = 0; 2997 b->ht_size = 0; 2998 b->ht_flag = PETSC_FALSE; 2999 b->ht_fact = 0; 3000 b->ht_total_ct = 0; 3001 b->ht_insert_ct = 0; 3002 3003 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3004 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3005 if (flg) { 3006 PetscReal fact = 1.39; 3007 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3008 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 3009 if (fact <= 1.0) fact = 1.39; 3010 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3011 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3012 } 3013 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3014 3015 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3016 "MatConvert_MPIBAIJ_MPIAdj", 3017 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3018 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3019 "MatStoreValues_MPIBAIJ", 3020 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3021 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3022 "MatRetrieveValues_MPIBAIJ", 3023 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3024 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3025 "MatGetDiagonalBlock_MPIBAIJ", 3026 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3027 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3028 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3029 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3030 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3031 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3032 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3033 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3034 "MatDiagonalScaleLocal_MPIBAIJ", 3035 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3036 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3037 "MatSetHashTableFactor_MPIBAIJ", 3038 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3039 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3040 PetscFunctionReturn(0); 3041 } 3042 EXTERN_C_END 3043 3044 /*MC 3045 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3046 3047 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3048 and MATMPIBAIJ otherwise. 3049 3050 Options Database Keys: 3051 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3052 3053 Level: beginner 3054 3055 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3056 M*/ 3057 3058 EXTERN_C_BEGIN 3059 #undef __FUNCT__ 3060 #define __FUNCT__ "MatCreate_BAIJ" 3061 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 3062 { 3063 PetscErrorCode ierr; 3064 PetscMPIInt size; 3065 3066 PetscFunctionBegin; 3067 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3068 if (size == 1) { 3069 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 3070 } else { 3071 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 3072 } 3073 PetscFunctionReturn(0); 3074 } 3075 EXTERN_C_END 3076 3077 #undef __FUNCT__ 3078 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3079 /*@C 3080 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3081 (block compressed row). For good matrix assembly performance 3082 the user should preallocate the matrix storage by setting the parameters 3083 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3084 performance can be increased by more than a factor of 50. 3085 3086 Collective on Mat 3087 3088 Input Parameters: 3089 + A - the matrix 3090 . bs - size of blockk 3091 . d_nz - number of block nonzeros per block row in diagonal portion of local 3092 submatrix (same for all local rows) 3093 . d_nnz - array containing the number of block nonzeros in the various block rows 3094 of the in diagonal portion of the local (possibly different for each block 3095 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3096 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3097 submatrix (same for all local rows). 3098 - o_nnz - array containing the number of nonzeros in the various block rows of the 3099 off-diagonal portion of the local submatrix (possibly different for 3100 each block row) or PETSC_NULL. 3101 3102 If the *_nnz parameter is given then the *_nz parameter is ignored 3103 3104 Options Database Keys: 3105 + -mat_block_size - size of the blocks to use 3106 - -mat_use_hash_table <fact> 3107 3108 Notes: 3109 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3110 than it must be used on all processors that share the object for that argument. 3111 3112 Storage Information: 3113 For a square global matrix we define each processor's diagonal portion 3114 to be its local rows and the corresponding columns (a square submatrix); 3115 each processor's off-diagonal portion encompasses the remainder of the 3116 local matrix (a rectangular submatrix). 3117 3118 The user can specify preallocated storage for the diagonal part of 3119 the local submatrix with either d_nz or d_nnz (not both). Set 3120 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3121 memory allocation. Likewise, specify preallocated storage for the 3122 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3123 3124 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3125 the figure below we depict these three local rows and all columns (0-11). 3126 3127 .vb 3128 0 1 2 3 4 5 6 7 8 9 10 11 3129 ------------------- 3130 row 3 | o o o d d d o o o o o o 3131 row 4 | o o o d d d o o o o o o 3132 row 5 | o o o d d d o o o o o o 3133 ------------------- 3134 .ve 3135 3136 Thus, any entries in the d locations are stored in the d (diagonal) 3137 submatrix, and any entries in the o locations are stored in the 3138 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3139 stored simply in the MATSEQBAIJ format for compressed row storage. 3140 3141 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3142 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3143 In general, for PDE problems in which most nonzeros are near the diagonal, 3144 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3145 or you will get TERRIBLE performance; see the users' manual chapter on 3146 matrices. 3147 3148 You can call MatGetInfo() to get information on how effective the preallocation was; 3149 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3150 You can also run with the option -info and look for messages with the string 3151 malloc in them to see if additional memory allocation was needed. 3152 3153 Level: intermediate 3154 3155 .keywords: matrix, block, aij, compressed row, sparse, parallel 3156 3157 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 3158 @*/ 3159 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3160 { 3161 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3162 3163 PetscFunctionBegin; 3164 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3165 if (f) { 3166 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3167 } 3168 PetscFunctionReturn(0); 3169 } 3170 3171 #undef __FUNCT__ 3172 #define __FUNCT__ "MatCreateMPIBAIJ" 3173 /*@C 3174 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 3175 (block compressed row). For good matrix assembly performance 3176 the user should preallocate the matrix storage by setting the parameters 3177 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3178 performance can be increased by more than a factor of 50. 3179 3180 Collective on MPI_Comm 3181 3182 Input Parameters: 3183 + comm - MPI communicator 3184 . bs - size of blockk 3185 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3186 This value should be the same as the local size used in creating the 3187 y vector for the matrix-vector product y = Ax. 3188 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3189 This value should be the same as the local size used in creating the 3190 x vector for the matrix-vector product y = Ax. 3191 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3192 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3193 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3194 submatrix (same for all local rows) 3195 . d_nnz - array containing the number of nonzero blocks in the various block rows 3196 of the in diagonal portion of the local (possibly different for each block 3197 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3198 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3199 submatrix (same for all local rows). 3200 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3201 off-diagonal portion of the local submatrix (possibly different for 3202 each block row) or PETSC_NULL. 3203 3204 Output Parameter: 3205 . A - the matrix 3206 3207 Options Database Keys: 3208 + -mat_block_size - size of the blocks to use 3209 - -mat_use_hash_table <fact> 3210 3211 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3212 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3213 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3214 3215 Notes: 3216 If the *_nnz parameter is given then the *_nz parameter is ignored 3217 3218 A nonzero block is any block that as 1 or more nonzeros in it 3219 3220 The user MUST specify either the local or global matrix dimensions 3221 (possibly both). 3222 3223 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3224 than it must be used on all processors that share the object for that argument. 3225 3226 Storage Information: 3227 For a square global matrix we define each processor's diagonal portion 3228 to be its local rows and the corresponding columns (a square submatrix); 3229 each processor's off-diagonal portion encompasses the remainder of the 3230 local matrix (a rectangular submatrix). 3231 3232 The user can specify preallocated storage for the diagonal part of 3233 the local submatrix with either d_nz or d_nnz (not both). Set 3234 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3235 memory allocation. Likewise, specify preallocated storage for the 3236 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3237 3238 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3239 the figure below we depict these three local rows and all columns (0-11). 3240 3241 .vb 3242 0 1 2 3 4 5 6 7 8 9 10 11 3243 ------------------- 3244 row 3 | o o o d d d o o o o o o 3245 row 4 | o o o d d d o o o o o o 3246 row 5 | o o o d d d o o o o o o 3247 ------------------- 3248 .ve 3249 3250 Thus, any entries in the d locations are stored in the d (diagonal) 3251 submatrix, and any entries in the o locations are stored in the 3252 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3253 stored simply in the MATSEQBAIJ format for compressed row storage. 3254 3255 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3256 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3257 In general, for PDE problems in which most nonzeros are near the diagonal, 3258 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3259 or you will get TERRIBLE performance; see the users' manual chapter on 3260 matrices. 3261 3262 Level: intermediate 3263 3264 .keywords: matrix, block, aij, compressed row, sparse, parallel 3265 3266 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3267 @*/ 3268 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3269 { 3270 PetscErrorCode ierr; 3271 PetscMPIInt size; 3272 3273 PetscFunctionBegin; 3274 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3275 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3276 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3277 if (size > 1) { 3278 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3279 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3280 } else { 3281 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3282 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3283 } 3284 PetscFunctionReturn(0); 3285 } 3286 3287 #undef __FUNCT__ 3288 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3289 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3290 { 3291 Mat mat; 3292 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3293 PetscErrorCode ierr; 3294 PetscInt len=0; 3295 3296 PetscFunctionBegin; 3297 *newmat = 0; 3298 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3299 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3300 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3301 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3302 3303 mat->factor = matin->factor; 3304 mat->preallocated = PETSC_TRUE; 3305 mat->assembled = PETSC_TRUE; 3306 mat->insertmode = NOT_SET_VALUES; 3307 3308 a = (Mat_MPIBAIJ*)mat->data; 3309 mat->rmap->bs = matin->rmap->bs; 3310 a->bs2 = oldmat->bs2; 3311 a->mbs = oldmat->mbs; 3312 a->nbs = oldmat->nbs; 3313 a->Mbs = oldmat->Mbs; 3314 a->Nbs = oldmat->Nbs; 3315 3316 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3317 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3318 3319 a->size = oldmat->size; 3320 a->rank = oldmat->rank; 3321 a->donotstash = oldmat->donotstash; 3322 a->roworiented = oldmat->roworiented; 3323 a->rowindices = 0; 3324 a->rowvalues = 0; 3325 a->getrowactive = PETSC_FALSE; 3326 a->barray = 0; 3327 a->rstartbs = oldmat->rstartbs; 3328 a->rendbs = oldmat->rendbs; 3329 a->cstartbs = oldmat->cstartbs; 3330 a->cendbs = oldmat->cendbs; 3331 3332 /* hash table stuff */ 3333 a->ht = 0; 3334 a->hd = 0; 3335 a->ht_size = 0; 3336 a->ht_flag = oldmat->ht_flag; 3337 a->ht_fact = oldmat->ht_fact; 3338 a->ht_total_ct = 0; 3339 a->ht_insert_ct = 0; 3340 3341 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3342 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 3343 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3344 if (oldmat->colmap) { 3345 #if defined (PETSC_USE_CTABLE) 3346 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3347 #else 3348 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3349 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3350 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3351 #endif 3352 } else a->colmap = 0; 3353 3354 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3355 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3356 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3357 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3358 } else a->garray = 0; 3359 3360 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3361 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3362 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3363 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3364 3365 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3366 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3367 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3368 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3369 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3370 *newmat = mat; 3371 3372 PetscFunctionReturn(0); 3373 } 3374 3375 #include "petscsys.h" 3376 3377 #undef __FUNCT__ 3378 #define __FUNCT__ "MatLoad_MPIBAIJ" 3379 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 3380 { 3381 Mat A; 3382 PetscErrorCode ierr; 3383 int fd; 3384 PetscInt i,nz,j,rstart,rend; 3385 PetscScalar *vals,*buf; 3386 MPI_Comm comm = ((PetscObject)viewer)->comm; 3387 MPI_Status status; 3388 PetscMPIInt rank,size,maxnz; 3389 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3390 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3391 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3392 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3393 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3394 PetscInt dcount,kmax,k,nzcount,tmp,mend; 3395 3396 PetscFunctionBegin; 3397 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3398 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3399 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3400 3401 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3402 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3403 if (!rank) { 3404 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3405 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3406 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3407 } 3408 3409 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3410 M = header[1]; N = header[2]; 3411 3412 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 3413 3414 /* 3415 This code adds extra rows to make sure the number of rows is 3416 divisible by the blocksize 3417 */ 3418 Mbs = M/bs; 3419 extra_rows = bs - M + bs*Mbs; 3420 if (extra_rows == bs) extra_rows = 0; 3421 else Mbs++; 3422 if (extra_rows && !rank) { 3423 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3424 } 3425 3426 /* determine ownership of all rows */ 3427 mbs = Mbs/size + ((Mbs % size) > rank); 3428 m = mbs*bs; 3429 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3430 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3431 3432 /* process 0 needs enough room for process with most rows */ 3433 if (!rank) { 3434 mmax = rowners[1]; 3435 for (i=2; i<size; i++) { 3436 mmax = PetscMax(mmax,rowners[i]); 3437 } 3438 mmax*=bs; 3439 } else mmax = m; 3440 3441 rowners[0] = 0; 3442 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3443 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3444 rstart = rowners[rank]; 3445 rend = rowners[rank+1]; 3446 3447 /* distribute row lengths to all processors */ 3448 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3449 if (!rank) { 3450 mend = m; 3451 if (size == 1) mend = mend - extra_rows; 3452 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3453 for (j=mend; j<m; j++) locrowlens[j] = 1; 3454 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3455 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3456 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3457 for (j=0; j<m; j++) { 3458 procsnz[0] += locrowlens[j]; 3459 } 3460 for (i=1; i<size; i++) { 3461 mend = browners[i+1] - browners[i]; 3462 if (i == size-1) mend = mend - extra_rows; 3463 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3464 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3465 /* calculate the number of nonzeros on each processor */ 3466 for (j=0; j<browners[i+1]-browners[i]; j++) { 3467 procsnz[i] += rowlengths[j]; 3468 } 3469 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3470 } 3471 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3472 } else { 3473 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3474 } 3475 3476 if (!rank) { 3477 /* determine max buffer needed and allocate it */ 3478 maxnz = procsnz[0]; 3479 for (i=1; i<size; i++) { 3480 maxnz = PetscMax(maxnz,procsnz[i]); 3481 } 3482 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3483 3484 /* read in my part of the matrix column indices */ 3485 nz = procsnz[0]; 3486 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3487 mycols = ibuf; 3488 if (size == 1) nz -= extra_rows; 3489 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3490 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3491 3492 /* read in every ones (except the last) and ship off */ 3493 for (i=1; i<size-1; i++) { 3494 nz = procsnz[i]; 3495 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3496 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3497 } 3498 /* read in the stuff for the last proc */ 3499 if (size != 1) { 3500 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3501 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3502 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3503 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3504 } 3505 ierr = PetscFree(cols);CHKERRQ(ierr); 3506 } else { 3507 /* determine buffer space needed for message */ 3508 nz = 0; 3509 for (i=0; i<m; i++) { 3510 nz += locrowlens[i]; 3511 } 3512 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3513 mycols = ibuf; 3514 /* receive message of column indices*/ 3515 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3516 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3517 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3518 } 3519 3520 /* loop over local rows, determining number of off diagonal entries */ 3521 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3522 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3523 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3524 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3525 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3526 rowcount = 0; nzcount = 0; 3527 for (i=0; i<mbs; i++) { 3528 dcount = 0; 3529 odcount = 0; 3530 for (j=0; j<bs; j++) { 3531 kmax = locrowlens[rowcount]; 3532 for (k=0; k<kmax; k++) { 3533 tmp = mycols[nzcount++]/bs; 3534 if (!mask[tmp]) { 3535 mask[tmp] = 1; 3536 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3537 else masked1[dcount++] = tmp; 3538 } 3539 } 3540 rowcount++; 3541 } 3542 3543 dlens[i] = dcount; 3544 odlens[i] = odcount; 3545 3546 /* zero out the mask elements we set */ 3547 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3548 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3549 } 3550 3551 /* create our matrix */ 3552 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 3553 ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3554 ierr = MatSetType(A,type);CHKERRQ(ierr) 3555 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3556 3557 if (!rank) { 3558 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3559 /* read in my part of the matrix numerical values */ 3560 nz = procsnz[0]; 3561 vals = buf; 3562 mycols = ibuf; 3563 if (size == 1) nz -= extra_rows; 3564 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3565 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3566 3567 /* insert into matrix */ 3568 jj = rstart*bs; 3569 for (i=0; i<m; i++) { 3570 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3571 mycols += locrowlens[i]; 3572 vals += locrowlens[i]; 3573 jj++; 3574 } 3575 /* read in other processors (except the last one) and ship out */ 3576 for (i=1; i<size-1; i++) { 3577 nz = procsnz[i]; 3578 vals = buf; 3579 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3580 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3581 } 3582 /* the last proc */ 3583 if (size != 1){ 3584 nz = procsnz[i] - extra_rows; 3585 vals = buf; 3586 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3587 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3588 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3589 } 3590 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3591 } else { 3592 /* receive numeric values */ 3593 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3594 3595 /* receive message of values*/ 3596 vals = buf; 3597 mycols = ibuf; 3598 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 3599 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3600 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3601 3602 /* insert into matrix */ 3603 jj = rstart*bs; 3604 for (i=0; i<m; i++) { 3605 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3606 mycols += locrowlens[i]; 3607 vals += locrowlens[i]; 3608 jj++; 3609 } 3610 } 3611 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3612 ierr = PetscFree(buf);CHKERRQ(ierr); 3613 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3614 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3615 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3616 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3617 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3618 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3619 3620 *newmat = A; 3621 PetscFunctionReturn(0); 3622 } 3623 3624 #undef __FUNCT__ 3625 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3626 /*@ 3627 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3628 3629 Input Parameters: 3630 . mat - the matrix 3631 . fact - factor 3632 3633 Collective on Mat 3634 3635 Level: advanced 3636 3637 Notes: 3638 This can also be set by the command line option: -mat_use_hash_table <fact> 3639 3640 .keywords: matrix, hashtable, factor, HT 3641 3642 .seealso: MatSetOption() 3643 @*/ 3644 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3645 { 3646 PetscErrorCode ierr,(*f)(Mat,PetscReal); 3647 3648 PetscFunctionBegin; 3649 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 3650 if (f) { 3651 ierr = (*f)(mat,fact);CHKERRQ(ierr); 3652 } 3653 PetscFunctionReturn(0); 3654 } 3655 3656 EXTERN_C_BEGIN 3657 #undef __FUNCT__ 3658 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3659 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3660 { 3661 Mat_MPIBAIJ *baij; 3662 3663 PetscFunctionBegin; 3664 baij = (Mat_MPIBAIJ*)mat->data; 3665 baij->ht_fact = fact; 3666 PetscFunctionReturn(0); 3667 } 3668 EXTERN_C_END 3669 3670 #undef __FUNCT__ 3671 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3672 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3673 { 3674 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3675 PetscFunctionBegin; 3676 *Ad = a->A; 3677 *Ao = a->B; 3678 *colmap = a->garray; 3679 PetscFunctionReturn(0); 3680 } 3681 3682 /* 3683 Special version for direct calls from Fortran (to eliminate two function call overheads 3684 */ 3685 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3686 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3687 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3688 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3689 #endif 3690 3691 #undef __FUNCT__ 3692 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3693 /*@C 3694 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3695 3696 Collective on Mat 3697 3698 Input Parameters: 3699 + mat - the matrix 3700 . min - number of input rows 3701 . im - input rows 3702 . nin - number of input columns 3703 . in - input columns 3704 . v - numerical values input 3705 - addvin - INSERT_VALUES or ADD_VALUES 3706 3707 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3708 3709 Level: advanced 3710 3711 .seealso: MatSetValuesBlocked() 3712 @*/ 3713 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3714 { 3715 /* convert input arguments to C version */ 3716 Mat mat = *matin; 3717 PetscInt m = *min, n = *nin; 3718 InsertMode addv = *addvin; 3719 3720 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3721 const MatScalar *value; 3722 MatScalar *barray=baij->barray; 3723 PetscTruth roworiented = baij->roworiented; 3724 PetscErrorCode ierr; 3725 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3726 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3727 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3728 3729 PetscFunctionBegin; 3730 /* tasks normally handled by MatSetValuesBlocked() */ 3731 if (mat->insertmode == NOT_SET_VALUES) { 3732 mat->insertmode = addv; 3733 } 3734 #if defined(PETSC_USE_DEBUG) 3735 else if (mat->insertmode != addv) { 3736 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3737 } 3738 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3739 #endif 3740 if (mat->assembled) { 3741 mat->was_assembled = PETSC_TRUE; 3742 mat->assembled = PETSC_FALSE; 3743 } 3744 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3745 3746 3747 if(!barray) { 3748 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3749 baij->barray = barray; 3750 } 3751 3752 if (roworiented) { 3753 stepval = (n-1)*bs; 3754 } else { 3755 stepval = (m-1)*bs; 3756 } 3757 for (i=0; i<m; i++) { 3758 if (im[i] < 0) continue; 3759 #if defined(PETSC_USE_DEBUG) 3760 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3761 #endif 3762 if (im[i] >= rstart && im[i] < rend) { 3763 row = im[i] - rstart; 3764 for (j=0; j<n; j++) { 3765 /* If NumCol = 1 then a copy is not required */ 3766 if ((roworiented) && (n == 1)) { 3767 barray = (MatScalar*)v + i*bs2; 3768 } else if((!roworiented) && (m == 1)) { 3769 barray = (MatScalar*)v + j*bs2; 3770 } else { /* Here a copy is required */ 3771 if (roworiented) { 3772 value = v + i*(stepval+bs)*bs + j*bs; 3773 } else { 3774 value = v + j*(stepval+bs)*bs + i*bs; 3775 } 3776 for (ii=0; ii<bs; ii++,value+=stepval) { 3777 for (jj=0; jj<bs; jj++) { 3778 *barray++ = *value++; 3779 } 3780 } 3781 barray -=bs2; 3782 } 3783 3784 if (in[j] >= cstart && in[j] < cend){ 3785 col = in[j] - cstart; 3786 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3787 } 3788 else if (in[j] < 0) continue; 3789 #if defined(PETSC_USE_DEBUG) 3790 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 3791 #endif 3792 else { 3793 if (mat->was_assembled) { 3794 if (!baij->colmap) { 3795 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3796 } 3797 3798 #if defined(PETSC_USE_DEBUG) 3799 #if defined (PETSC_USE_CTABLE) 3800 { PetscInt data; 3801 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3802 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 3803 } 3804 #else 3805 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 3806 #endif 3807 #endif 3808 #if defined (PETSC_USE_CTABLE) 3809 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3810 col = (col - 1)/bs; 3811 #else 3812 col = (baij->colmap[in[j]] - 1)/bs; 3813 #endif 3814 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3815 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3816 col = in[j]; 3817 } 3818 } 3819 else col = in[j]; 3820 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3821 } 3822 } 3823 } else { 3824 if (!baij->donotstash) { 3825 if (roworiented) { 3826 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3827 } else { 3828 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3829 } 3830 } 3831 } 3832 } 3833 3834 /* task normally handled by MatSetValuesBlocked() */ 3835 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3836 PetscFunctionReturn(0); 3837 } 3838