1 2 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 3 4 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 5 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 6 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,int,IS[],int); 7 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat *[]); 8 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int [],PetscScalar []); 9 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,int,const int[],int,const int [],const PetscScalar [],InsertMode); 10 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode); 11 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]); 12 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]); 13 EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat); 14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*); 15 16 /* UGLY, ugly, ugly 17 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 18 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 19 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 20 converts the entries into single precision and then calls ..._MatScalar() to put them 21 into the single precision data structures. 22 */ 23 #if defined(PETSC_USE_MAT_SINGLE) 24 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 25 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 26 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 27 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 28 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 29 #else 30 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 31 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 32 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 33 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 34 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 35 #endif 36 37 #undef __FUNCT__ 38 #define __FUNCT__ "MatGetRowMax_MPIBAIJ" 39 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v) 40 { 41 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 42 PetscErrorCode ierr; 43 int i; 44 PetscScalar *va,*vb; 45 Vec vtmp; 46 47 PetscFunctionBegin; 48 49 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 50 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 51 52 ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr); 53 ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 54 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 55 56 for (i=0; i<A->m; i++){ 57 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 58 } 59 60 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 61 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 62 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 63 64 PetscFunctionReturn(0); 65 } 66 67 EXTERN_C_BEGIN 68 #undef __FUNCT__ 69 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 70 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 71 { 72 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 73 PetscErrorCode ierr; 74 75 PetscFunctionBegin; 76 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 77 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 EXTERN_C_END 81 82 EXTERN_C_BEGIN 83 #undef __FUNCT__ 84 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 85 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 86 { 87 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 88 PetscErrorCode ierr; 89 90 PetscFunctionBegin; 91 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 92 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 93 PetscFunctionReturn(0); 94 } 95 EXTERN_C_END 96 97 /* 98 Local utility routine that creates a mapping from the global column 99 number to the local number in the off-diagonal part of the local 100 storage of the matrix. This is done in a non scable way since the 101 length of colmap equals the global matrix length. 102 */ 103 #undef __FUNCT__ 104 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 105 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 106 { 107 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 108 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 109 PetscErrorCode ierr; 110 int nbs = B->nbs,i,bs=B->bs; 111 112 PetscFunctionBegin; 113 #if defined (PETSC_USE_CTABLE) 114 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 115 for (i=0; i<nbs; i++){ 116 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 117 } 118 #else 119 ierr = PetscMalloc((baij->Nbs+1)*sizeof(int),&baij->colmap);CHKERRQ(ierr); 120 PetscLogObjectMemory(mat,baij->Nbs*sizeof(int)); 121 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 122 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 123 #endif 124 PetscFunctionReturn(0); 125 } 126 127 #define CHUNKSIZE 10 128 129 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 130 { \ 131 \ 132 brow = row/bs; \ 133 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 134 rmax = aimax[brow]; nrow = ailen[brow]; \ 135 bcol = col/bs; \ 136 ridx = row % bs; cidx = col % bs; \ 137 low = 0; high = nrow; \ 138 while (high-low > 3) { \ 139 t = (low+high)/2; \ 140 if (rp[t] > bcol) high = t; \ 141 else low = t; \ 142 } \ 143 for (_i=low; _i<high; _i++) { \ 144 if (rp[_i] > bcol) break; \ 145 if (rp[_i] == bcol) { \ 146 bap = ap + bs2*_i + bs*cidx + ridx; \ 147 if (addv == ADD_VALUES) *bap += value; \ 148 else *bap = value; \ 149 goto a_noinsert; \ 150 } \ 151 } \ 152 if (a->nonew == 1) goto a_noinsert; \ 153 else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 154 if (nrow >= rmax) { \ 155 /* there is no extra room in row, therefore enlarge */ \ 156 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 157 MatScalar *new_a; \ 158 \ 159 if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 160 \ 161 /* malloc new storage space */ \ 162 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 163 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 164 new_j = (int*)(new_a + bs2*new_nz); \ 165 new_i = new_j + new_nz; \ 166 \ 167 /* copy over old data into new slots */ \ 168 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 169 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 170 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 171 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 172 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 173 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 174 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 175 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 176 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 177 /* free up old matrix storage */ \ 178 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 179 if (!a->singlemalloc) { \ 180 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 181 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 182 } \ 183 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 184 a->singlemalloc = PETSC_TRUE; \ 185 \ 186 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 187 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 188 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 189 a->maxnz += bs2*CHUNKSIZE; \ 190 a->reallocs++; \ 191 a->nz++; \ 192 } \ 193 N = nrow++ - 1; \ 194 /* shift up all the later entries in this row */ \ 195 for (ii=N; ii>=_i; ii--) { \ 196 rp[ii+1] = rp[ii]; \ 197 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 198 } \ 199 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 200 rp[_i] = bcol; \ 201 ap[bs2*_i + bs*cidx + ridx] = value; \ 202 a_noinsert:; \ 203 ailen[brow] = nrow; \ 204 } 205 206 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 207 { \ 208 brow = row/bs; \ 209 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 210 rmax = bimax[brow]; nrow = bilen[brow]; \ 211 bcol = col/bs; \ 212 ridx = row % bs; cidx = col % bs; \ 213 low = 0; high = nrow; \ 214 while (high-low > 3) { \ 215 t = (low+high)/2; \ 216 if (rp[t] > bcol) high = t; \ 217 else low = t; \ 218 } \ 219 for (_i=low; _i<high; _i++) { \ 220 if (rp[_i] > bcol) break; \ 221 if (rp[_i] == bcol) { \ 222 bap = ap + bs2*_i + bs*cidx + ridx; \ 223 if (addv == ADD_VALUES) *bap += value; \ 224 else *bap = value; \ 225 goto b_noinsert; \ 226 } \ 227 } \ 228 if (b->nonew == 1) goto b_noinsert; \ 229 else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 230 if (nrow >= rmax) { \ 231 /* there is no extra room in row, therefore enlarge */ \ 232 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 233 MatScalar *new_a; \ 234 \ 235 if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 236 \ 237 /* malloc new storage space */ \ 238 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 239 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 240 new_j = (int*)(new_a + bs2*new_nz); \ 241 new_i = new_j + new_nz; \ 242 \ 243 /* copy over old data into new slots */ \ 244 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 245 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 246 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 247 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 248 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 249 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 250 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 251 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 252 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 253 /* free up old matrix storage */ \ 254 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 255 if (!b->singlemalloc) { \ 256 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 257 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 258 } \ 259 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 260 b->singlemalloc = PETSC_TRUE; \ 261 \ 262 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 263 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 264 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 265 b->maxnz += bs2*CHUNKSIZE; \ 266 b->reallocs++; \ 267 b->nz++; \ 268 } \ 269 N = nrow++ - 1; \ 270 /* shift up all the later entries in this row */ \ 271 for (ii=N; ii>=_i; ii--) { \ 272 rp[ii+1] = rp[ii]; \ 273 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 274 } \ 275 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 276 rp[_i] = bcol; \ 277 ap[bs2*_i + bs*cidx + ridx] = value; \ 278 b_noinsert:; \ 279 bilen[brow] = nrow; \ 280 } 281 282 #if defined(PETSC_USE_MAT_SINGLE) 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatSetValues_MPIBAIJ" 285 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 286 { 287 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 288 PetscErrorCode ierr; 289 int i,N = m*n; 290 MatScalar *vsingle; 291 292 PetscFunctionBegin; 293 if (N > b->setvalueslen) { 294 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 295 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 296 b->setvalueslen = N; 297 } 298 vsingle = b->setvaluescopy; 299 300 for (i=0; i<N; i++) { 301 vsingle[i] = v[i]; 302 } 303 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 304 PetscFunctionReturn(0); 305 } 306 307 #undef __FUNCT__ 308 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 309 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 310 { 311 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 312 PetscErrorCode ierr; 313 int i,N = m*n*b->bs2; 314 MatScalar *vsingle; 315 316 PetscFunctionBegin; 317 if (N > b->setvalueslen) { 318 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 319 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 320 b->setvalueslen = N; 321 } 322 vsingle = b->setvaluescopy; 323 for (i=0; i<N; i++) { 324 vsingle[i] = v[i]; 325 } 326 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 327 PetscFunctionReturn(0); 328 } 329 330 #undef __FUNCT__ 331 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 332 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 333 { 334 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 335 PetscErrorCode ierr; 336 int i,N = m*n; 337 MatScalar *vsingle; 338 339 PetscFunctionBegin; 340 if (N > b->setvalueslen) { 341 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 342 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 343 b->setvalueslen = N; 344 } 345 vsingle = b->setvaluescopy; 346 for (i=0; i<N; i++) { 347 vsingle[i] = v[i]; 348 } 349 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 350 PetscFunctionReturn(0); 351 } 352 353 #undef __FUNCT__ 354 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 355 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 356 { 357 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 358 PetscErrorCode ierr; 359 int i,N = m*n*b->bs2; 360 MatScalar *vsingle; 361 362 PetscFunctionBegin; 363 if (N > b->setvalueslen) { 364 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 365 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 366 b->setvalueslen = N; 367 } 368 vsingle = b->setvaluescopy; 369 for (i=0; i<N; i++) { 370 vsingle[i] = v[i]; 371 } 372 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 373 PetscFunctionReturn(0); 374 } 375 #endif 376 377 #undef __FUNCT__ 378 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 379 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 380 { 381 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 382 MatScalar value; 383 PetscTruth roworiented = baij->roworiented; 384 PetscErrorCode ierr; 385 int i,j,row,col; 386 int rstart_orig=baij->rstart_bs; 387 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 388 int cend_orig=baij->cend_bs,bs=baij->bs; 389 390 /* Some Variables required in the macro */ 391 Mat A = baij->A; 392 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 393 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 394 MatScalar *aa=a->a; 395 396 Mat B = baij->B; 397 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 398 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 399 MatScalar *ba=b->a; 400 401 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 402 int low,high,t,ridx,cidx,bs2=a->bs2; 403 MatScalar *ap,*bap; 404 405 PetscFunctionBegin; 406 for (i=0; i<m; i++) { 407 if (im[i] < 0) continue; 408 #if defined(PETSC_USE_BOPT_g) 409 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 410 #endif 411 if (im[i] >= rstart_orig && im[i] < rend_orig) { 412 row = im[i] - rstart_orig; 413 for (j=0; j<n; j++) { 414 if (in[j] >= cstart_orig && in[j] < cend_orig){ 415 col = in[j] - cstart_orig; 416 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 417 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 418 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 419 } else if (in[j] < 0) continue; 420 #if defined(PETSC_USE_BOPT_g) 421 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[i],mat->N-1);} 422 #endif 423 else { 424 if (mat->was_assembled) { 425 if (!baij->colmap) { 426 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 427 } 428 #if defined (PETSC_USE_CTABLE) 429 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 430 col = col - 1; 431 #else 432 col = baij->colmap[in[j]/bs] - 1; 433 #endif 434 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 435 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 436 col = in[j]; 437 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 438 B = baij->B; 439 b = (Mat_SeqBAIJ*)(B)->data; 440 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 441 ba=b->a; 442 } else col += in[j]%bs; 443 } else col = in[j]; 444 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 445 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 446 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 447 } 448 } 449 } else { 450 if (!baij->donotstash) { 451 if (roworiented) { 452 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 453 } else { 454 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 455 } 456 } 457 } 458 } 459 PetscFunctionReturn(0); 460 } 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 464 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 465 { 466 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 467 const MatScalar *value; 468 MatScalar *barray=baij->barray; 469 PetscTruth roworiented = baij->roworiented; 470 PetscErrorCode ierr; 471 int i,j,ii,jj,row,col,rstart=baij->rstart; 472 int rend=baij->rend,cstart=baij->cstart,stepval; 473 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 474 475 PetscFunctionBegin; 476 if(!barray) { 477 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 478 baij->barray = barray; 479 } 480 481 if (roworiented) { 482 stepval = (n-1)*bs; 483 } else { 484 stepval = (m-1)*bs; 485 } 486 for (i=0; i<m; i++) { 487 if (im[i] < 0) continue; 488 #if defined(PETSC_USE_BOPT_g) 489 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1); 490 #endif 491 if (im[i] >= rstart && im[i] < rend) { 492 row = im[i] - rstart; 493 for (j=0; j<n; j++) { 494 /* If NumCol = 1 then a copy is not required */ 495 if ((roworiented) && (n == 1)) { 496 barray = (MatScalar*)v + i*bs2; 497 } else if((!roworiented) && (m == 1)) { 498 barray = (MatScalar*)v + j*bs2; 499 } else { /* Here a copy is required */ 500 if (roworiented) { 501 value = v + i*(stepval+bs)*bs + j*bs; 502 } else { 503 value = v + j*(stepval+bs)*bs + i*bs; 504 } 505 for (ii=0; ii<bs; ii++,value+=stepval) { 506 for (jj=0; jj<bs; jj++) { 507 *barray++ = *value++; 508 } 509 } 510 barray -=bs2; 511 } 512 513 if (in[j] >= cstart && in[j] < cend){ 514 col = in[j] - cstart; 515 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 516 } 517 else if (in[j] < 0) continue; 518 #if defined(PETSC_USE_BOPT_g) 519 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);} 520 #endif 521 else { 522 if (mat->was_assembled) { 523 if (!baij->colmap) { 524 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 525 } 526 527 #if defined(PETSC_USE_BOPT_g) 528 #if defined (PETSC_USE_CTABLE) 529 { int data; 530 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 531 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 532 } 533 #else 534 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 535 #endif 536 #endif 537 #if defined (PETSC_USE_CTABLE) 538 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 539 col = (col - 1)/bs; 540 #else 541 col = (baij->colmap[in[j]] - 1)/bs; 542 #endif 543 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 544 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 545 col = in[j]; 546 } 547 } 548 else col = in[j]; 549 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 550 } 551 } 552 } else { 553 if (!baij->donotstash) { 554 if (roworiented) { 555 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 556 } else { 557 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 558 } 559 } 560 } 561 } 562 PetscFunctionReturn(0); 563 } 564 565 #define HASH_KEY 0.6180339887 566 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 567 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 568 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 569 #undef __FUNCT__ 570 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 571 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 572 { 573 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 574 PetscTruth roworiented = baij->roworiented; 575 PetscErrorCode ierr; 576 int i,j,row,col; 577 int rstart_orig=baij->rstart_bs; 578 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 579 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 580 PetscReal tmp; 581 MatScalar **HD = baij->hd,value; 582 #if defined(PETSC_USE_BOPT_g) 583 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 584 #endif 585 586 PetscFunctionBegin; 587 588 for (i=0; i<m; i++) { 589 #if defined(PETSC_USE_BOPT_g) 590 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 591 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 592 #endif 593 row = im[i]; 594 if (row >= rstart_orig && row < rend_orig) { 595 for (j=0; j<n; j++) { 596 col = in[j]; 597 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 598 /* Look up into the Hash Table */ 599 key = (row/bs)*Nbs+(col/bs)+1; 600 h1 = HASH(size,key,tmp); 601 602 603 idx = h1; 604 #if defined(PETSC_USE_BOPT_g) 605 insert_ct++; 606 total_ct++; 607 if (HT[idx] != key) { 608 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 609 if (idx == size) { 610 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 611 if (idx == h1) { 612 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col); 613 } 614 } 615 } 616 #else 617 if (HT[idx] != key) { 618 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 619 if (idx == size) { 620 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 621 if (idx == h1) { 622 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col); 623 } 624 } 625 } 626 #endif 627 /* A HASH table entry is found, so insert the values at the correct address */ 628 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 629 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 630 } 631 } else { 632 if (!baij->donotstash) { 633 if (roworiented) { 634 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 635 } else { 636 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 637 } 638 } 639 } 640 } 641 #if defined(PETSC_USE_BOPT_g) 642 baij->ht_total_ct = total_ct; 643 baij->ht_insert_ct = insert_ct; 644 #endif 645 PetscFunctionReturn(0); 646 } 647 648 #undef __FUNCT__ 649 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 650 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 651 { 652 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 653 PetscTruth roworiented = baij->roworiented; 654 PetscErrorCode ierr; 655 int i,j,ii,jj,row,col; 656 int rstart=baij->rstart ; 657 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 658 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 659 PetscReal tmp; 660 MatScalar **HD = baij->hd,*baij_a; 661 const MatScalar *v_t,*value; 662 #if defined(PETSC_USE_BOPT_g) 663 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 664 #endif 665 666 PetscFunctionBegin; 667 668 if (roworiented) { 669 stepval = (n-1)*bs; 670 } else { 671 stepval = (m-1)*bs; 672 } 673 for (i=0; i<m; i++) { 674 #if defined(PETSC_USE_BOPT_g) 675 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",im[i]); 676 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],baij->Mbs-1); 677 #endif 678 row = im[i]; 679 v_t = v + i*bs2; 680 if (row >= rstart && row < rend) { 681 for (j=0; j<n; j++) { 682 col = in[j]; 683 684 /* Look up into the Hash Table */ 685 key = row*Nbs+col+1; 686 h1 = HASH(size,key,tmp); 687 688 idx = h1; 689 #if defined(PETSC_USE_BOPT_g) 690 total_ct++; 691 insert_ct++; 692 if (HT[idx] != key) { 693 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 694 if (idx == size) { 695 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 696 if (idx == h1) { 697 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col); 698 } 699 } 700 } 701 #else 702 if (HT[idx] != key) { 703 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 704 if (idx == size) { 705 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 706 if (idx == h1) { 707 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col); 708 } 709 } 710 } 711 #endif 712 baij_a = HD[idx]; 713 if (roworiented) { 714 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 715 /* value = v + (i*(stepval+bs)+j)*bs; */ 716 value = v_t; 717 v_t += bs; 718 if (addv == ADD_VALUES) { 719 for (ii=0; ii<bs; ii++,value+=stepval) { 720 for (jj=ii; jj<bs2; jj+=bs) { 721 baij_a[jj] += *value++; 722 } 723 } 724 } else { 725 for (ii=0; ii<bs; ii++,value+=stepval) { 726 for (jj=ii; jj<bs2; jj+=bs) { 727 baij_a[jj] = *value++; 728 } 729 } 730 } 731 } else { 732 value = v + j*(stepval+bs)*bs + i*bs; 733 if (addv == ADD_VALUES) { 734 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 735 for (jj=0; jj<bs; jj++) { 736 baij_a[jj] += *value++; 737 } 738 } 739 } else { 740 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 741 for (jj=0; jj<bs; jj++) { 742 baij_a[jj] = *value++; 743 } 744 } 745 } 746 } 747 } 748 } else { 749 if (!baij->donotstash) { 750 if (roworiented) { 751 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 752 } else { 753 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 754 } 755 } 756 } 757 } 758 #if defined(PETSC_USE_BOPT_g) 759 baij->ht_total_ct = total_ct; 760 baij->ht_insert_ct = insert_ct; 761 #endif 762 PetscFunctionReturn(0); 763 } 764 765 #undef __FUNCT__ 766 #define __FUNCT__ "MatGetValues_MPIBAIJ" 767 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 768 { 769 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 770 PetscErrorCode ierr; 771 int bs=baij->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 772 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 773 774 PetscFunctionBegin; 775 for (i=0; i<m; i++) { 776 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 777 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 778 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 779 row = idxm[i] - bsrstart; 780 for (j=0; j<n; j++) { 781 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 782 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 783 if (idxn[j] >= bscstart && idxn[j] < bscend){ 784 col = idxn[j] - bscstart; 785 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 786 } else { 787 if (!baij->colmap) { 788 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 789 } 790 #if defined (PETSC_USE_CTABLE) 791 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 792 data --; 793 #else 794 data = baij->colmap[idxn[j]/bs]-1; 795 #endif 796 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 797 else { 798 col = data + idxn[j]%bs; 799 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 800 } 801 } 802 } 803 } else { 804 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 805 } 806 } 807 PetscFunctionReturn(0); 808 } 809 810 #undef __FUNCT__ 811 #define __FUNCT__ "MatNorm_MPIBAIJ" 812 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 813 { 814 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 815 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 816 PetscErrorCode ierr; 817 int i,bs2=baij->bs2; 818 PetscReal sum = 0.0; 819 MatScalar *v; 820 821 PetscFunctionBegin; 822 if (baij->size == 1) { 823 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 824 } else { 825 if (type == NORM_FROBENIUS) { 826 v = amat->a; 827 for (i=0; i<amat->nz*bs2; i++) { 828 #if defined(PETSC_USE_COMPLEX) 829 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 830 #else 831 sum += (*v)*(*v); v++; 832 #endif 833 } 834 v = bmat->a; 835 for (i=0; i<bmat->nz*bs2; i++) { 836 #if defined(PETSC_USE_COMPLEX) 837 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 838 #else 839 sum += (*v)*(*v); v++; 840 #endif 841 } 842 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 843 *nrm = sqrt(*nrm); 844 } else { 845 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 846 } 847 } 848 PetscFunctionReturn(0); 849 } 850 851 852 /* 853 Creates the hash table, and sets the table 854 This table is created only once. 855 If new entried need to be added to the matrix 856 then the hash table has to be destroyed and 857 recreated. 858 */ 859 #undef __FUNCT__ 860 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 861 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 862 { 863 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 864 Mat A = baij->A,B=baij->B; 865 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 866 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 867 PetscErrorCode ierr; 868 int size,bs2=baij->bs2,rstart=baij->rstart; 869 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 870 int *HT,key; 871 MatScalar **HD; 872 PetscReal tmp; 873 #if defined(PETSC_USE_BOPT_g) 874 int ct=0,max=0; 875 #endif 876 877 PetscFunctionBegin; 878 baij->ht_size=(int)(factor*nz); 879 size = baij->ht_size; 880 881 if (baij->ht) { 882 PetscFunctionReturn(0); 883 } 884 885 /* Allocate Memory for Hash Table */ 886 ierr = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 887 baij->ht = (int*)(baij->hd + size); 888 HD = baij->hd; 889 HT = baij->ht; 890 891 892 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(PetscScalar*)));CHKERRQ(ierr); 893 894 895 /* Loop Over A */ 896 for (i=0; i<a->mbs; i++) { 897 for (j=ai[i]; j<ai[i+1]; j++) { 898 row = i+rstart; 899 col = aj[j]+cstart; 900 901 key = row*Nbs + col + 1; 902 h1 = HASH(size,key,tmp); 903 for (k=0; k<size; k++){ 904 if (!HT[(h1+k)%size]) { 905 HT[(h1+k)%size] = key; 906 HD[(h1+k)%size] = a->a + j*bs2; 907 break; 908 #if defined(PETSC_USE_BOPT_g) 909 } else { 910 ct++; 911 #endif 912 } 913 } 914 #if defined(PETSC_USE_BOPT_g) 915 if (k> max) max = k; 916 #endif 917 } 918 } 919 /* Loop Over B */ 920 for (i=0; i<b->mbs; i++) { 921 for (j=bi[i]; j<bi[i+1]; j++) { 922 row = i+rstart; 923 col = garray[bj[j]]; 924 key = row*Nbs + col + 1; 925 h1 = HASH(size,key,tmp); 926 for (k=0; k<size; k++){ 927 if (!HT[(h1+k)%size]) { 928 HT[(h1+k)%size] = key; 929 HD[(h1+k)%size] = b->a + j*bs2; 930 break; 931 #if defined(PETSC_USE_BOPT_g) 932 } else { 933 ct++; 934 #endif 935 } 936 } 937 #if defined(PETSC_USE_BOPT_g) 938 if (k> max) max = k; 939 #endif 940 } 941 } 942 943 /* Print Summary */ 944 #if defined(PETSC_USE_BOPT_g) 945 for (i=0,j=0; i<size; i++) { 946 if (HT[i]) {j++;} 947 } 948 PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max); 949 #endif 950 PetscFunctionReturn(0); 951 } 952 953 #undef __FUNCT__ 954 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 955 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 956 { 957 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 958 PetscErrorCode ierr; 959 int nstash,reallocs; 960 InsertMode addv; 961 962 PetscFunctionBegin; 963 if (baij->donotstash) { 964 PetscFunctionReturn(0); 965 } 966 967 /* make sure all processors are either in INSERTMODE or ADDMODE */ 968 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 969 if (addv == (ADD_VALUES|INSERT_VALUES)) { 970 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 971 } 972 mat->insertmode = addv; /* in case this processor had no cache */ 973 974 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 975 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 976 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 977 PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 978 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 979 PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 980 PetscFunctionReturn(0); 981 } 982 983 #undef __FUNCT__ 984 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 985 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 986 { 987 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 988 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 989 PetscErrorCode ierr; 990 int i,j,rstart,ncols,n,flg,bs2=baij->bs2; 991 int *row,*col,other_disassembled; 992 PetscTruth r1,r2,r3; 993 MatScalar *val; 994 InsertMode addv = mat->insertmode; 995 996 PetscFunctionBegin; 997 if (!baij->donotstash) { 998 while (1) { 999 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1000 if (!flg) break; 1001 1002 for (i=0; i<n;) { 1003 /* Now identify the consecutive vals belonging to the same row */ 1004 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1005 if (j < n) ncols = j-i; 1006 else ncols = n-i; 1007 /* Now assemble all these values with a single function call */ 1008 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 1009 i = j; 1010 } 1011 } 1012 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1013 /* Now process the block-stash. Since the values are stashed column-oriented, 1014 set the roworiented flag to column oriented, and after MatSetValues() 1015 restore the original flags */ 1016 r1 = baij->roworiented; 1017 r2 = a->roworiented; 1018 r3 = b->roworiented; 1019 baij->roworiented = PETSC_FALSE; 1020 a->roworiented = PETSC_FALSE; 1021 b->roworiented = PETSC_FALSE; 1022 while (1) { 1023 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1024 if (!flg) break; 1025 1026 for (i=0; i<n;) { 1027 /* Now identify the consecutive vals belonging to the same row */ 1028 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1029 if (j < n) ncols = j-i; 1030 else ncols = n-i; 1031 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1032 i = j; 1033 } 1034 } 1035 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1036 baij->roworiented = r1; 1037 a->roworiented = r2; 1038 b->roworiented = r3; 1039 } 1040 1041 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1042 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1043 1044 /* determine if any processor has disassembled, if so we must 1045 also disassemble ourselfs, in order that we may reassemble. */ 1046 /* 1047 if nonzero structure of submatrix B cannot change then we know that 1048 no processor disassembled thus we can skip this stuff 1049 */ 1050 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1051 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1052 if (mat->was_assembled && !other_disassembled) { 1053 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1054 } 1055 } 1056 1057 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1058 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1059 } 1060 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1061 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1062 1063 #if defined(PETSC_USE_BOPT_g) 1064 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1065 PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 1066 baij->ht_total_ct = 0; 1067 baij->ht_insert_ct = 0; 1068 } 1069 #endif 1070 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1071 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1072 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1073 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1074 } 1075 1076 if (baij->rowvalues) { 1077 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1078 baij->rowvalues = 0; 1079 } 1080 PetscFunctionReturn(0); 1081 } 1082 1083 #undef __FUNCT__ 1084 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1085 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1086 { 1087 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1088 PetscErrorCode ierr; 1089 int bs = baij->bs,size = baij->size,rank = baij->rank; 1090 PetscTruth iascii,isdraw; 1091 PetscViewer sviewer; 1092 PetscViewerFormat format; 1093 1094 PetscFunctionBegin; 1095 /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */ 1096 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1097 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1098 if (iascii) { 1099 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1100 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1101 MatInfo info; 1102 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1103 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1104 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 1105 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 1106 baij->bs,(int)info.memory);CHKERRQ(ierr); 1107 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1108 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1109 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1110 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1111 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1112 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1113 PetscFunctionReturn(0); 1114 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1115 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 1116 PetscFunctionReturn(0); 1117 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1118 PetscFunctionReturn(0); 1119 } 1120 } 1121 1122 if (isdraw) { 1123 PetscDraw draw; 1124 PetscTruth isnull; 1125 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1126 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1127 } 1128 1129 if (size == 1) { 1130 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 1131 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1132 } else { 1133 /* assemble the entire matrix onto first processor. */ 1134 Mat A; 1135 Mat_SeqBAIJ *Aloc; 1136 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1137 MatScalar *a; 1138 1139 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1140 /* Perhaps this should be the type of mat? */ 1141 if (!rank) { 1142 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 1143 } else { 1144 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 1145 } 1146 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1147 ierr = MatMPIBAIJSetPreallocation(A,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1148 PetscLogObjectParent(mat,A); 1149 1150 /* copy over the A part */ 1151 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1152 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1153 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 1154 1155 for (i=0; i<mbs; i++) { 1156 rvals[0] = bs*(baij->rstart + i); 1157 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1158 for (j=ai[i]; j<ai[i+1]; j++) { 1159 col = (baij->cstart+aj[j])*bs; 1160 for (k=0; k<bs; k++) { 1161 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1162 col++; a += bs; 1163 } 1164 } 1165 } 1166 /* copy over the B part */ 1167 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1168 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1169 for (i=0; i<mbs; i++) { 1170 rvals[0] = bs*(baij->rstart + i); 1171 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1172 for (j=ai[i]; j<ai[i+1]; j++) { 1173 col = baij->garray[aj[j]]*bs; 1174 for (k=0; k<bs; k++) { 1175 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1176 col++; a += bs; 1177 } 1178 } 1179 } 1180 ierr = PetscFree(rvals);CHKERRQ(ierr); 1181 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1182 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1183 /* 1184 Everyone has to call to draw the matrix since the graphics waits are 1185 synchronized across all processors that share the PetscDraw object 1186 */ 1187 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1188 if (!rank) { 1189 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1190 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1191 } 1192 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1193 ierr = MatDestroy(A);CHKERRQ(ierr); 1194 } 1195 PetscFunctionReturn(0); 1196 } 1197 1198 #undef __FUNCT__ 1199 #define __FUNCT__ "MatView_MPIBAIJ" 1200 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1201 { 1202 PetscErrorCode ierr; 1203 PetscTruth iascii,isdraw,issocket,isbinary; 1204 1205 PetscFunctionBegin; 1206 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1207 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1208 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1209 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1210 if (iascii || isdraw || issocket || isbinary) { 1211 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1212 } else { 1213 SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1214 } 1215 PetscFunctionReturn(0); 1216 } 1217 1218 #undef __FUNCT__ 1219 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1220 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1221 { 1222 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1223 PetscErrorCode ierr; 1224 1225 PetscFunctionBegin; 1226 #if defined(PETSC_USE_LOG) 1227 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 1228 #endif 1229 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1230 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1231 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1232 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1233 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1234 #if defined (PETSC_USE_CTABLE) 1235 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1236 #else 1237 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1238 #endif 1239 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1240 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1241 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1242 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1243 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1244 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1245 #if defined(PETSC_USE_MAT_SINGLE) 1246 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 1247 #endif 1248 ierr = PetscFree(baij);CHKERRQ(ierr); 1249 1250 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1251 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1252 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1253 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1254 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1255 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "MatMult_MPIBAIJ" 1261 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1262 { 1263 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1264 PetscErrorCode ierr; 1265 int nt; 1266 1267 PetscFunctionBegin; 1268 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1269 if (nt != A->n) { 1270 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1271 } 1272 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1273 if (nt != A->m) { 1274 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1275 } 1276 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1277 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1278 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1279 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1280 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1281 PetscFunctionReturn(0); 1282 } 1283 1284 #undef __FUNCT__ 1285 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1286 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1287 { 1288 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1289 PetscErrorCode ierr; 1290 1291 PetscFunctionBegin; 1292 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1293 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1294 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1295 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1296 PetscFunctionReturn(0); 1297 } 1298 1299 #undef __FUNCT__ 1300 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1301 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1302 { 1303 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1304 PetscErrorCode ierr; 1305 1306 PetscFunctionBegin; 1307 /* do nondiagonal part */ 1308 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1309 /* send it on its way */ 1310 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1311 /* do local part */ 1312 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1313 /* receive remote parts: note this assumes the values are not actually */ 1314 /* inserted in yy until the next line, which is true for my implementation*/ 1315 /* but is not perhaps always true. */ 1316 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1317 PetscFunctionReturn(0); 1318 } 1319 1320 #undef __FUNCT__ 1321 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1322 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1323 { 1324 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1325 PetscErrorCode ierr; 1326 1327 PetscFunctionBegin; 1328 /* do nondiagonal part */ 1329 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1330 /* send it on its way */ 1331 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1332 /* do local part */ 1333 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1334 /* receive remote parts: note this assumes the values are not actually */ 1335 /* inserted in yy until the next line, which is true for my implementation*/ 1336 /* but is not perhaps always true. */ 1337 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1338 PetscFunctionReturn(0); 1339 } 1340 1341 /* 1342 This only works correctly for square matrices where the subblock A->A is the 1343 diagonal block 1344 */ 1345 #undef __FUNCT__ 1346 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1347 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1348 { 1349 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1350 PetscErrorCode ierr; 1351 1352 PetscFunctionBegin; 1353 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1354 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1355 PetscFunctionReturn(0); 1356 } 1357 1358 #undef __FUNCT__ 1359 #define __FUNCT__ "MatScale_MPIBAIJ" 1360 PetscErrorCode MatScale_MPIBAIJ(const PetscScalar *aa,Mat A) 1361 { 1362 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1363 PetscErrorCode ierr; 1364 1365 PetscFunctionBegin; 1366 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1367 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1368 PetscFunctionReturn(0); 1369 } 1370 1371 #undef __FUNCT__ 1372 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1373 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1374 { 1375 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1376 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1377 PetscErrorCode ierr; 1378 int bs = mat->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1379 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1380 int *cmap,*idx_p,cstart = mat->cstart; 1381 1382 PetscFunctionBegin; 1383 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1384 mat->getrowactive = PETSC_TRUE; 1385 1386 if (!mat->rowvalues && (idx || v)) { 1387 /* 1388 allocate enough space to hold information from the longest row. 1389 */ 1390 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1391 int max = 1,mbs = mat->mbs,tmp; 1392 for (i=0; i<mbs; i++) { 1393 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1394 if (max < tmp) { max = tmp; } 1395 } 1396 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1397 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1398 } 1399 1400 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1401 lrow = row - brstart; 1402 1403 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1404 if (!v) {pvA = 0; pvB = 0;} 1405 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1406 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1407 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1408 nztot = nzA + nzB; 1409 1410 cmap = mat->garray; 1411 if (v || idx) { 1412 if (nztot) { 1413 /* Sort by increasing column numbers, assuming A and B already sorted */ 1414 int imark = -1; 1415 if (v) { 1416 *v = v_p = mat->rowvalues; 1417 for (i=0; i<nzB; i++) { 1418 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1419 else break; 1420 } 1421 imark = i; 1422 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1423 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1424 } 1425 if (idx) { 1426 *idx = idx_p = mat->rowindices; 1427 if (imark > -1) { 1428 for (i=0; i<imark; i++) { 1429 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1430 } 1431 } else { 1432 for (i=0; i<nzB; i++) { 1433 if (cmap[cworkB[i]/bs] < cstart) 1434 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1435 else break; 1436 } 1437 imark = i; 1438 } 1439 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1440 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1441 } 1442 } else { 1443 if (idx) *idx = 0; 1444 if (v) *v = 0; 1445 } 1446 } 1447 *nz = nztot; 1448 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1449 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1450 PetscFunctionReturn(0); 1451 } 1452 1453 #undef __FUNCT__ 1454 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1455 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1456 { 1457 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1458 1459 PetscFunctionBegin; 1460 if (baij->getrowactive == PETSC_FALSE) { 1461 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1462 } 1463 baij->getrowactive = PETSC_FALSE; 1464 PetscFunctionReturn(0); 1465 } 1466 1467 #undef __FUNCT__ 1468 #define __FUNCT__ "MatGetBlockSize_MPIBAIJ" 1469 PetscErrorCode MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1470 { 1471 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1472 1473 PetscFunctionBegin; 1474 *bs = baij->bs; 1475 PetscFunctionReturn(0); 1476 } 1477 1478 #undef __FUNCT__ 1479 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1480 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1481 { 1482 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1483 PetscErrorCode ierr; 1484 1485 PetscFunctionBegin; 1486 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1487 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1488 PetscFunctionReturn(0); 1489 } 1490 1491 #undef __FUNCT__ 1492 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1493 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1494 { 1495 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1496 Mat A = a->A,B = a->B; 1497 PetscErrorCode ierr; 1498 PetscReal isend[5],irecv[5]; 1499 1500 PetscFunctionBegin; 1501 info->block_size = (PetscReal)a->bs; 1502 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1503 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1504 isend[3] = info->memory; isend[4] = info->mallocs; 1505 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1506 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1507 isend[3] += info->memory; isend[4] += info->mallocs; 1508 if (flag == MAT_LOCAL) { 1509 info->nz_used = isend[0]; 1510 info->nz_allocated = isend[1]; 1511 info->nz_unneeded = isend[2]; 1512 info->memory = isend[3]; 1513 info->mallocs = isend[4]; 1514 } else if (flag == MAT_GLOBAL_MAX) { 1515 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1516 info->nz_used = irecv[0]; 1517 info->nz_allocated = irecv[1]; 1518 info->nz_unneeded = irecv[2]; 1519 info->memory = irecv[3]; 1520 info->mallocs = irecv[4]; 1521 } else if (flag == MAT_GLOBAL_SUM) { 1522 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1523 info->nz_used = irecv[0]; 1524 info->nz_allocated = irecv[1]; 1525 info->nz_unneeded = irecv[2]; 1526 info->memory = irecv[3]; 1527 info->mallocs = irecv[4]; 1528 } else { 1529 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1530 } 1531 info->rows_global = (PetscReal)A->M; 1532 info->columns_global = (PetscReal)A->N; 1533 info->rows_local = (PetscReal)A->m; 1534 info->columns_local = (PetscReal)A->N; 1535 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1536 info->fill_ratio_needed = 0; 1537 info->factor_mallocs = 0; 1538 PetscFunctionReturn(0); 1539 } 1540 1541 #undef __FUNCT__ 1542 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1543 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op) 1544 { 1545 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1546 PetscErrorCode ierr; 1547 1548 PetscFunctionBegin; 1549 switch (op) { 1550 case MAT_NO_NEW_NONZERO_LOCATIONS: 1551 case MAT_YES_NEW_NONZERO_LOCATIONS: 1552 case MAT_COLUMNS_UNSORTED: 1553 case MAT_COLUMNS_SORTED: 1554 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1555 case MAT_KEEP_ZEROED_ROWS: 1556 case MAT_NEW_NONZERO_LOCATION_ERR: 1557 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1558 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1559 break; 1560 case MAT_ROW_ORIENTED: 1561 a->roworiented = PETSC_TRUE; 1562 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1563 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1564 break; 1565 case MAT_ROWS_SORTED: 1566 case MAT_ROWS_UNSORTED: 1567 case MAT_YES_NEW_DIAGONALS: 1568 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1569 break; 1570 case MAT_COLUMN_ORIENTED: 1571 a->roworiented = PETSC_FALSE; 1572 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1573 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1574 break; 1575 case MAT_IGNORE_OFF_PROC_ENTRIES: 1576 a->donotstash = PETSC_TRUE; 1577 break; 1578 case MAT_NO_NEW_DIAGONALS: 1579 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1580 case MAT_USE_HASH_TABLE: 1581 a->ht_flag = PETSC_TRUE; 1582 break; 1583 case MAT_SYMMETRIC: 1584 case MAT_STRUCTURALLY_SYMMETRIC: 1585 case MAT_HERMITIAN: 1586 case MAT_SYMMETRY_ETERNAL: 1587 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1588 break; 1589 case MAT_NOT_SYMMETRIC: 1590 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1591 case MAT_NOT_HERMITIAN: 1592 case MAT_NOT_SYMMETRY_ETERNAL: 1593 break; 1594 default: 1595 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1596 } 1597 PetscFunctionReturn(0); 1598 } 1599 1600 #undef __FUNCT__ 1601 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1602 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1603 { 1604 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1605 Mat_SeqBAIJ *Aloc; 1606 Mat B; 1607 PetscErrorCode ierr; 1608 int M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col; 1609 int bs=baij->bs,mbs=baij->mbs; 1610 MatScalar *a; 1611 1612 PetscFunctionBegin; 1613 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1614 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1615 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1616 ierr = MatMPIBAIJSetPreallocation(B,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1617 1618 /* copy over the A part */ 1619 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1620 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1621 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 1622 1623 for (i=0; i<mbs; i++) { 1624 rvals[0] = bs*(baij->rstart + i); 1625 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1626 for (j=ai[i]; j<ai[i+1]; j++) { 1627 col = (baij->cstart+aj[j])*bs; 1628 for (k=0; k<bs; k++) { 1629 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1630 col++; a += bs; 1631 } 1632 } 1633 } 1634 /* copy over the B part */ 1635 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1636 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1637 for (i=0; i<mbs; i++) { 1638 rvals[0] = bs*(baij->rstart + i); 1639 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1640 for (j=ai[i]; j<ai[i+1]; j++) { 1641 col = baij->garray[aj[j]]*bs; 1642 for (k=0; k<bs; k++) { 1643 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1644 col++; a += bs; 1645 } 1646 } 1647 } 1648 ierr = PetscFree(rvals);CHKERRQ(ierr); 1649 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1650 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1651 1652 if (matout) { 1653 *matout = B; 1654 } else { 1655 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1656 } 1657 PetscFunctionReturn(0); 1658 } 1659 1660 #undef __FUNCT__ 1661 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1662 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1663 { 1664 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1665 Mat a = baij->A,b = baij->B; 1666 PetscErrorCode ierr; 1667 int s1,s2,s3; 1668 1669 PetscFunctionBegin; 1670 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1671 if (rr) { 1672 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1673 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1674 /* Overlap communication with computation. */ 1675 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1676 } 1677 if (ll) { 1678 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1679 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1680 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1681 } 1682 /* scale the diagonal block */ 1683 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1684 1685 if (rr) { 1686 /* Do a scatter end and then right scale the off-diagonal block */ 1687 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1688 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1689 } 1690 1691 PetscFunctionReturn(0); 1692 } 1693 1694 #undef __FUNCT__ 1695 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1696 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag) 1697 { 1698 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1699 PetscErrorCode ierr; 1700 int i,N,*rows,*owners = l->rowners,size = l->size; 1701 int *nprocs,j,idx,nsends,row; 1702 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1703 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1704 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1705 MPI_Comm comm = A->comm; 1706 MPI_Request *send_waits,*recv_waits; 1707 MPI_Status recv_status,*send_status; 1708 IS istmp; 1709 PetscTruth found; 1710 1711 PetscFunctionBegin; 1712 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 1713 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1714 1715 /* first count number of contributors to each processor */ 1716 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 1717 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1718 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 1719 for (i=0; i<N; i++) { 1720 idx = rows[i]; 1721 found = PETSC_FALSE; 1722 for (j=0; j<size; j++) { 1723 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1724 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 1725 } 1726 } 1727 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1728 } 1729 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1730 1731 /* inform other processors of number of messages and max length*/ 1732 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1733 1734 /* post receives: */ 1735 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 1736 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1737 for (i=0; i<nrecvs; i++) { 1738 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1739 } 1740 1741 /* do sends: 1742 1) starts[i] gives the starting index in svalues for stuff going to 1743 the ith processor 1744 */ 1745 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 1746 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1747 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 1748 starts[0] = 0; 1749 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1750 for (i=0; i<N; i++) { 1751 svalues[starts[owner[i]]++] = rows[i]; 1752 } 1753 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1754 1755 starts[0] = 0; 1756 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1757 count = 0; 1758 for (i=0; i<size; i++) { 1759 if (nprocs[2*i+1]) { 1760 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1761 } 1762 } 1763 ierr = PetscFree(starts);CHKERRQ(ierr); 1764 1765 base = owners[rank]*bs; 1766 1767 /* wait on receives */ 1768 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 1769 source = lens + nrecvs; 1770 count = nrecvs; slen = 0; 1771 while (count) { 1772 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1773 /* unpack receives into our local space */ 1774 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1775 source[imdex] = recv_status.MPI_SOURCE; 1776 lens[imdex] = n; 1777 slen += n; 1778 count--; 1779 } 1780 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1781 1782 /* move the data into the send scatter */ 1783 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 1784 count = 0; 1785 for (i=0; i<nrecvs; i++) { 1786 values = rvalues + i*nmax; 1787 for (j=0; j<lens[i]; j++) { 1788 lrows[count++] = values[j] - base; 1789 } 1790 } 1791 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1792 ierr = PetscFree(lens);CHKERRQ(ierr); 1793 ierr = PetscFree(owner);CHKERRQ(ierr); 1794 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1795 1796 /* actually zap the local rows */ 1797 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1798 PetscLogObjectParent(A,istmp); 1799 1800 /* 1801 Zero the required rows. If the "diagonal block" of the matrix 1802 is square and the user wishes to set the diagonal we use seperate 1803 code so that MatSetValues() is not called for each diagonal allocating 1804 new memory, thus calling lots of mallocs and slowing things down. 1805 1806 Contributed by: Mathew Knepley 1807 */ 1808 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1809 ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1810 if (diag && (l->A->M == l->A->N)) { 1811 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1812 } else if (diag) { 1813 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1814 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1815 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1816 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1817 } 1818 for (i=0; i<slen; i++) { 1819 row = lrows[i] + rstart_bs; 1820 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1821 } 1822 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1823 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1824 } else { 1825 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1826 } 1827 1828 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1829 ierr = PetscFree(lrows);CHKERRQ(ierr); 1830 1831 /* wait on sends */ 1832 if (nsends) { 1833 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1834 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1835 ierr = PetscFree(send_status);CHKERRQ(ierr); 1836 } 1837 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1838 ierr = PetscFree(svalues);CHKERRQ(ierr); 1839 1840 PetscFunctionReturn(0); 1841 } 1842 1843 #undef __FUNCT__ 1844 #define __FUNCT__ "MatPrintHelp_MPIBAIJ" 1845 PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A) 1846 { 1847 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1848 MPI_Comm comm = A->comm; 1849 static int called = 0; 1850 PetscErrorCode ierr; 1851 1852 PetscFunctionBegin; 1853 if (!a->rank) { 1854 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1855 } 1856 if (called) {PetscFunctionReturn(0);} else called = 1; 1857 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1858 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1859 PetscFunctionReturn(0); 1860 } 1861 1862 #undef __FUNCT__ 1863 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1864 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1865 { 1866 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1867 PetscErrorCode ierr; 1868 1869 PetscFunctionBegin; 1870 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1871 PetscFunctionReturn(0); 1872 } 1873 1874 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1875 1876 #undef __FUNCT__ 1877 #define __FUNCT__ "MatEqual_MPIBAIJ" 1878 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1879 { 1880 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1881 Mat a,b,c,d; 1882 PetscTruth flg; 1883 PetscErrorCode ierr; 1884 1885 PetscFunctionBegin; 1886 a = matA->A; b = matA->B; 1887 c = matB->A; d = matB->B; 1888 1889 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1890 if (flg == PETSC_TRUE) { 1891 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1892 } 1893 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1894 PetscFunctionReturn(0); 1895 } 1896 1897 1898 #undef __FUNCT__ 1899 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1900 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1901 { 1902 PetscErrorCode ierr; 1903 1904 PetscFunctionBegin; 1905 ierr = MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1906 PetscFunctionReturn(0); 1907 } 1908 1909 /* -------------------------------------------------------------------*/ 1910 static struct _MatOps MatOps_Values = { 1911 MatSetValues_MPIBAIJ, 1912 MatGetRow_MPIBAIJ, 1913 MatRestoreRow_MPIBAIJ, 1914 MatMult_MPIBAIJ, 1915 /* 4*/ MatMultAdd_MPIBAIJ, 1916 MatMultTranspose_MPIBAIJ, 1917 MatMultTransposeAdd_MPIBAIJ, 1918 0, 1919 0, 1920 0, 1921 /*10*/ 0, 1922 0, 1923 0, 1924 0, 1925 MatTranspose_MPIBAIJ, 1926 /*15*/ MatGetInfo_MPIBAIJ, 1927 MatEqual_MPIBAIJ, 1928 MatGetDiagonal_MPIBAIJ, 1929 MatDiagonalScale_MPIBAIJ, 1930 MatNorm_MPIBAIJ, 1931 /*20*/ MatAssemblyBegin_MPIBAIJ, 1932 MatAssemblyEnd_MPIBAIJ, 1933 0, 1934 MatSetOption_MPIBAIJ, 1935 MatZeroEntries_MPIBAIJ, 1936 /*25*/ MatZeroRows_MPIBAIJ, 1937 0, 1938 0, 1939 0, 1940 0, 1941 /*30*/ MatSetUpPreallocation_MPIBAIJ, 1942 0, 1943 0, 1944 0, 1945 0, 1946 /*35*/ MatDuplicate_MPIBAIJ, 1947 0, 1948 0, 1949 0, 1950 0, 1951 /*40*/ 0, 1952 MatGetSubMatrices_MPIBAIJ, 1953 MatIncreaseOverlap_MPIBAIJ, 1954 MatGetValues_MPIBAIJ, 1955 0, 1956 /*45*/ MatPrintHelp_MPIBAIJ, 1957 MatScale_MPIBAIJ, 1958 0, 1959 0, 1960 0, 1961 /*50*/ MatGetBlockSize_MPIBAIJ, 1962 0, 1963 0, 1964 0, 1965 0, 1966 /*55*/ 0, 1967 0, 1968 MatSetUnfactored_MPIBAIJ, 1969 0, 1970 MatSetValuesBlocked_MPIBAIJ, 1971 /*60*/ 0, 1972 MatDestroy_MPIBAIJ, 1973 MatView_MPIBAIJ, 1974 MatGetPetscMaps_Petsc, 1975 0, 1976 /*65*/ 0, 1977 0, 1978 0, 1979 0, 1980 0, 1981 /*70*/ MatGetRowMax_MPIBAIJ, 1982 0, 1983 0, 1984 0, 1985 0, 1986 /*75*/ 0, 1987 0, 1988 0, 1989 0, 1990 0, 1991 /*80*/ 0, 1992 0, 1993 0, 1994 0, 1995 MatLoad_MPIBAIJ, 1996 /*85*/ 0, 1997 0, 1998 0, 1999 0, 2000 0, 2001 /*90*/ 0, 2002 0, 2003 0, 2004 0, 2005 0, 2006 /*95*/ 0, 2007 0, 2008 0, 2009 0}; 2010 2011 2012 EXTERN_C_BEGIN 2013 #undef __FUNCT__ 2014 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2015 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2016 { 2017 PetscFunctionBegin; 2018 *a = ((Mat_MPIBAIJ *)A->data)->A; 2019 *iscopy = PETSC_FALSE; 2020 PetscFunctionReturn(0); 2021 } 2022 EXTERN_C_END 2023 2024 EXTERN_C_BEGIN 2025 extern int MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,Mat*); 2026 EXTERN_C_END 2027 2028 EXTERN_C_BEGIN 2029 #undef __FUNCT__ 2030 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2031 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2032 { 2033 Mat_MPIBAIJ *b; 2034 PetscErrorCode ierr; 2035 int i; 2036 2037 PetscFunctionBegin; 2038 B->preallocated = PETSC_TRUE; 2039 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2040 2041 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2042 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2043 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2044 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2045 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2046 if (d_nnz) { 2047 for (i=0; i<B->m/bs; i++) { 2048 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 2049 } 2050 } 2051 if (o_nnz) { 2052 for (i=0; i<B->m/bs; i++) { 2053 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 2054 } 2055 } 2056 2057 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 2058 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 2059 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 2060 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 2061 2062 b = (Mat_MPIBAIJ*)B->data; 2063 b->bs = bs; 2064 b->bs2 = bs*bs; 2065 b->mbs = B->m/bs; 2066 b->nbs = B->n/bs; 2067 b->Mbs = B->M/bs; 2068 b->Nbs = B->N/bs; 2069 2070 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 2071 b->rowners[0] = 0; 2072 for (i=2; i<=b->size; i++) { 2073 b->rowners[i] += b->rowners[i-1]; 2074 } 2075 b->rstart = b->rowners[b->rank]; 2076 b->rend = b->rowners[b->rank+1]; 2077 2078 ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 2079 b->cowners[0] = 0; 2080 for (i=2; i<=b->size; i++) { 2081 b->cowners[i] += b->cowners[i-1]; 2082 } 2083 b->cstart = b->cowners[b->rank]; 2084 b->cend = b->cowners[b->rank+1]; 2085 2086 for (i=0; i<=b->size; i++) { 2087 b->rowners_bs[i] = b->rowners[i]*bs; 2088 } 2089 b->rstart_bs = b->rstart*bs; 2090 b->rend_bs = b->rend*bs; 2091 b->cstart_bs = b->cstart*bs; 2092 b->cend_bs = b->cend*bs; 2093 2094 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 2095 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2096 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2097 PetscLogObjectParent(B,b->A); 2098 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 2099 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2100 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2101 PetscLogObjectParent(B,b->B); 2102 2103 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2104 2105 PetscFunctionReturn(0); 2106 } 2107 EXTERN_C_END 2108 2109 EXTERN_C_BEGIN 2110 EXTERN PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2111 EXTERN PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2112 EXTERN_C_END 2113 2114 /*MC 2115 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2116 2117 Options Database Keys: 2118 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2119 2120 Level: beginner 2121 2122 .seealso: MatCreateMPIBAIJ 2123 M*/ 2124 2125 EXTERN_C_BEGIN 2126 #undef __FUNCT__ 2127 #define __FUNCT__ "MatCreate_MPIBAIJ" 2128 PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2129 { 2130 Mat_MPIBAIJ *b; 2131 PetscErrorCode ierr; 2132 PetscTruth flg; 2133 2134 PetscFunctionBegin; 2135 ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2136 B->data = (void*)b; 2137 2138 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2139 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2140 B->mapping = 0; 2141 B->factor = 0; 2142 B->assembled = PETSC_FALSE; 2143 2144 B->insertmode = NOT_SET_VALUES; 2145 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2146 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2147 2148 /* build local table of row and column ownerships */ 2149 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 2150 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2151 b->cowners = b->rowners + b->size + 2; 2152 b->rowners_bs = b->cowners + b->size + 2; 2153 2154 /* build cache for off array entries formed */ 2155 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2156 b->donotstash = PETSC_FALSE; 2157 b->colmap = PETSC_NULL; 2158 b->garray = PETSC_NULL; 2159 b->roworiented = PETSC_TRUE; 2160 2161 #if defined(PETSC_USE_MAT_SINGLE) 2162 /* stuff for MatSetValues_XXX in single precision */ 2163 b->setvalueslen = 0; 2164 b->setvaluescopy = PETSC_NULL; 2165 #endif 2166 2167 /* stuff used in block assembly */ 2168 b->barray = 0; 2169 2170 /* stuff used for matrix vector multiply */ 2171 b->lvec = 0; 2172 b->Mvctx = 0; 2173 2174 /* stuff for MatGetRow() */ 2175 b->rowindices = 0; 2176 b->rowvalues = 0; 2177 b->getrowactive = PETSC_FALSE; 2178 2179 /* hash table stuff */ 2180 b->ht = 0; 2181 b->hd = 0; 2182 b->ht_size = 0; 2183 b->ht_flag = PETSC_FALSE; 2184 b->ht_fact = 0; 2185 b->ht_total_ct = 0; 2186 b->ht_insert_ct = 0; 2187 2188 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2189 if (flg) { 2190 PetscReal fact = 1.39; 2191 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2192 ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2193 if (fact <= 1.0) fact = 1.39; 2194 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2195 PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2196 } 2197 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2198 "MatStoreValues_MPIBAIJ", 2199 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2200 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2201 "MatRetrieveValues_MPIBAIJ", 2202 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2203 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2204 "MatGetDiagonalBlock_MPIBAIJ", 2205 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2206 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2207 "MatMPIBAIJSetPreallocation_MPIBAIJ", 2208 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2209 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 2210 "MatDiagonalScaleLocal_MPIBAIJ", 2211 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2212 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 2213 "MatSetHashTableFactor_MPIBAIJ", 2214 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2215 PetscFunctionReturn(0); 2216 } 2217 EXTERN_C_END 2218 2219 /*MC 2220 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2221 2222 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2223 and MATMPIBAIJ otherwise. 2224 2225 Options Database Keys: 2226 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2227 2228 Level: beginner 2229 2230 .seealso: MatCreateMPIBAIJ,MATSEQBAIJ,MATMPIBAIJ 2231 M*/ 2232 2233 EXTERN_C_BEGIN 2234 #undef __FUNCT__ 2235 #define __FUNCT__ "MatCreate_BAIJ" 2236 PetscErrorCode MatCreate_BAIJ(Mat A) 2237 { 2238 PetscErrorCode ierr; 2239 int size; 2240 2241 PetscFunctionBegin; 2242 ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr); 2243 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2244 if (size == 1) { 2245 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2246 } else { 2247 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2248 } 2249 PetscFunctionReturn(0); 2250 } 2251 EXTERN_C_END 2252 2253 #undef __FUNCT__ 2254 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2255 /*@C 2256 MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format 2257 (block compressed row). For good matrix assembly performance 2258 the user should preallocate the matrix storage by setting the parameters 2259 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2260 performance can be increased by more than a factor of 50. 2261 2262 Collective on Mat 2263 2264 Input Parameters: 2265 + A - the matrix 2266 . bs - size of blockk 2267 . d_nz - number of block nonzeros per block row in diagonal portion of local 2268 submatrix (same for all local rows) 2269 . d_nnz - array containing the number of block nonzeros in the various block rows 2270 of the in diagonal portion of the local (possibly different for each block 2271 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2272 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2273 submatrix (same for all local rows). 2274 - o_nnz - array containing the number of nonzeros in the various block rows of the 2275 off-diagonal portion of the local submatrix (possibly different for 2276 each block row) or PETSC_NULL. 2277 2278 Output Parameter: 2279 2280 2281 Options Database Keys: 2282 . -mat_no_unroll - uses code that does not unroll the loops in the 2283 block calculations (much slower) 2284 . -mat_block_size - size of the blocks to use 2285 2286 Notes: 2287 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2288 than it must be used on all processors that share the object for that argument. 2289 2290 Storage Information: 2291 For a square global matrix we define each processor's diagonal portion 2292 to be its local rows and the corresponding columns (a square submatrix); 2293 each processor's off-diagonal portion encompasses the remainder of the 2294 local matrix (a rectangular submatrix). 2295 2296 The user can specify preallocated storage for the diagonal part of 2297 the local submatrix with either d_nz or d_nnz (not both). Set 2298 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2299 memory allocation. Likewise, specify preallocated storage for the 2300 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2301 2302 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2303 the figure below we depict these three local rows and all columns (0-11). 2304 2305 .vb 2306 0 1 2 3 4 5 6 7 8 9 10 11 2307 ------------------- 2308 row 3 | o o o d d d o o o o o o 2309 row 4 | o o o d d d o o o o o o 2310 row 5 | o o o d d d o o o o o o 2311 ------------------- 2312 .ve 2313 2314 Thus, any entries in the d locations are stored in the d (diagonal) 2315 submatrix, and any entries in the o locations are stored in the 2316 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2317 stored simply in the MATSEQBAIJ format for compressed row storage. 2318 2319 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2320 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2321 In general, for PDE problems in which most nonzeros are near the diagonal, 2322 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2323 or you will get TERRIBLE performance; see the users' manual chapter on 2324 matrices. 2325 2326 Level: intermediate 2327 2328 .keywords: matrix, block, aij, compressed row, sparse, parallel 2329 2330 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2331 @*/ 2332 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2333 { 2334 PetscErrorCode ierr,(*f)(Mat,int,int,const int[],int,const int[]); 2335 2336 PetscFunctionBegin; 2337 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2338 if (f) { 2339 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2340 } 2341 PetscFunctionReturn(0); 2342 } 2343 2344 #undef __FUNCT__ 2345 #define __FUNCT__ "MatCreateMPIBAIJ" 2346 /*@C 2347 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2348 (block compressed row). For good matrix assembly performance 2349 the user should preallocate the matrix storage by setting the parameters 2350 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2351 performance can be increased by more than a factor of 50. 2352 2353 Collective on MPI_Comm 2354 2355 Input Parameters: 2356 + comm - MPI communicator 2357 . bs - size of blockk 2358 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2359 This value should be the same as the local size used in creating the 2360 y vector for the matrix-vector product y = Ax. 2361 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2362 This value should be the same as the local size used in creating the 2363 x vector for the matrix-vector product y = Ax. 2364 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2365 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2366 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2367 submatrix (same for all local rows) 2368 . d_nnz - array containing the number of nonzero blocks in the various block rows 2369 of the in diagonal portion of the local (possibly different for each block 2370 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2371 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2372 submatrix (same for all local rows). 2373 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2374 off-diagonal portion of the local submatrix (possibly different for 2375 each block row) or PETSC_NULL. 2376 2377 Output Parameter: 2378 . A - the matrix 2379 2380 Options Database Keys: 2381 . -mat_no_unroll - uses code that does not unroll the loops in the 2382 block calculations (much slower) 2383 . -mat_block_size - size of the blocks to use 2384 2385 Notes: 2386 A nonzero block is any block that as 1 or more nonzeros in it 2387 2388 The user MUST specify either the local or global matrix dimensions 2389 (possibly both). 2390 2391 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2392 than it must be used on all processors that share the object for that argument. 2393 2394 Storage Information: 2395 For a square global matrix we define each processor's diagonal portion 2396 to be its local rows and the corresponding columns (a square submatrix); 2397 each processor's off-diagonal portion encompasses the remainder of the 2398 local matrix (a rectangular submatrix). 2399 2400 The user can specify preallocated storage for the diagonal part of 2401 the local submatrix with either d_nz or d_nnz (not both). Set 2402 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2403 memory allocation. Likewise, specify preallocated storage for the 2404 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2405 2406 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2407 the figure below we depict these three local rows and all columns (0-11). 2408 2409 .vb 2410 0 1 2 3 4 5 6 7 8 9 10 11 2411 ------------------- 2412 row 3 | o o o d d d o o o o o o 2413 row 4 | o o o d d d o o o o o o 2414 row 5 | o o o d d d o o o o o o 2415 ------------------- 2416 .ve 2417 2418 Thus, any entries in the d locations are stored in the d (diagonal) 2419 submatrix, and any entries in the o locations are stored in the 2420 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2421 stored simply in the MATSEQBAIJ format for compressed row storage. 2422 2423 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2424 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2425 In general, for PDE problems in which most nonzeros are near the diagonal, 2426 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2427 or you will get TERRIBLE performance; see the users' manual chapter on 2428 matrices. 2429 2430 Level: intermediate 2431 2432 .keywords: matrix, block, aij, compressed row, sparse, parallel 2433 2434 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2435 @*/ 2436 PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2437 { 2438 PetscErrorCode ierr; 2439 int size; 2440 2441 PetscFunctionBegin; 2442 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2443 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2444 if (size > 1) { 2445 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2446 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2447 } else { 2448 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2449 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2450 } 2451 PetscFunctionReturn(0); 2452 } 2453 2454 #undef __FUNCT__ 2455 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 2456 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2457 { 2458 Mat mat; 2459 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2460 PetscErrorCode ierr; 2461 int len=0; 2462 2463 PetscFunctionBegin; 2464 *newmat = 0; 2465 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2466 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2467 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2468 2469 mat->factor = matin->factor; 2470 mat->preallocated = PETSC_TRUE; 2471 mat->assembled = PETSC_TRUE; 2472 mat->insertmode = NOT_SET_VALUES; 2473 2474 a = (Mat_MPIBAIJ*)mat->data; 2475 a->bs = oldmat->bs; 2476 a->bs2 = oldmat->bs2; 2477 a->mbs = oldmat->mbs; 2478 a->nbs = oldmat->nbs; 2479 a->Mbs = oldmat->Mbs; 2480 a->Nbs = oldmat->Nbs; 2481 2482 a->rstart = oldmat->rstart; 2483 a->rend = oldmat->rend; 2484 a->cstart = oldmat->cstart; 2485 a->cend = oldmat->cend; 2486 a->size = oldmat->size; 2487 a->rank = oldmat->rank; 2488 a->donotstash = oldmat->donotstash; 2489 a->roworiented = oldmat->roworiented; 2490 a->rowindices = 0; 2491 a->rowvalues = 0; 2492 a->getrowactive = PETSC_FALSE; 2493 a->barray = 0; 2494 a->rstart_bs = oldmat->rstart_bs; 2495 a->rend_bs = oldmat->rend_bs; 2496 a->cstart_bs = oldmat->cstart_bs; 2497 a->cend_bs = oldmat->cend_bs; 2498 2499 /* hash table stuff */ 2500 a->ht = 0; 2501 a->hd = 0; 2502 a->ht_size = 0; 2503 a->ht_flag = oldmat->ht_flag; 2504 a->ht_fact = oldmat->ht_fact; 2505 a->ht_total_ct = 0; 2506 a->ht_insert_ct = 0; 2507 2508 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2509 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2510 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2511 if (oldmat->colmap) { 2512 #if defined (PETSC_USE_CTABLE) 2513 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2514 #else 2515 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2516 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2517 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2518 #endif 2519 } else a->colmap = 0; 2520 2521 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2522 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2523 PetscLogObjectMemory(mat,len*sizeof(int)); 2524 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2525 } else a->garray = 0; 2526 2527 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2528 PetscLogObjectParent(mat,a->lvec); 2529 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2530 PetscLogObjectParent(mat,a->Mvctx); 2531 2532 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2533 PetscLogObjectParent(mat,a->A); 2534 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2535 PetscLogObjectParent(mat,a->B); 2536 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2537 *newmat = mat; 2538 2539 PetscFunctionReturn(0); 2540 } 2541 2542 #include "petscsys.h" 2543 2544 #undef __FUNCT__ 2545 #define __FUNCT__ "MatLoad_MPIBAIJ" 2546 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 2547 { 2548 Mat A; 2549 PetscErrorCode ierr; 2550 int i,nz,j,rstart,rend,fd; 2551 PetscScalar *vals,*buf; 2552 MPI_Comm comm = ((PetscObject)viewer)->comm; 2553 MPI_Status status; 2554 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2555 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2556 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2557 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2558 int dcount,kmax,k,nzcount,tmp; 2559 2560 PetscFunctionBegin; 2561 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2562 2563 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2564 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2565 if (!rank) { 2566 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2567 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2568 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2569 if (header[3] < 0) { 2570 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ"); 2571 } 2572 } 2573 2574 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2575 M = header[1]; N = header[2]; 2576 2577 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2578 2579 /* 2580 This code adds extra rows to make sure the number of rows is 2581 divisible by the blocksize 2582 */ 2583 Mbs = M/bs; 2584 extra_rows = bs - M + bs*(Mbs); 2585 if (extra_rows == bs) extra_rows = 0; 2586 else Mbs++; 2587 if (extra_rows &&!rank) { 2588 PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2589 } 2590 2591 /* determine ownership of all rows */ 2592 mbs = Mbs/size + ((Mbs % size) > rank); 2593 m = mbs*bs; 2594 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2595 browners = rowners + size + 1; 2596 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2597 rowners[0] = 0; 2598 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2599 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2600 rstart = rowners[rank]; 2601 rend = rowners[rank+1]; 2602 2603 /* distribute row lengths to all processors */ 2604 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2605 if (!rank) { 2606 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2607 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2608 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2609 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2610 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2611 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2612 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2613 } else { 2614 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2615 } 2616 2617 if (!rank) { 2618 /* calculate the number of nonzeros on each processor */ 2619 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2620 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2621 for (i=0; i<size; i++) { 2622 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2623 procsnz[i] += rowlengths[j]; 2624 } 2625 } 2626 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2627 2628 /* determine max buffer needed and allocate it */ 2629 maxnz = 0; 2630 for (i=0; i<size; i++) { 2631 maxnz = PetscMax(maxnz,procsnz[i]); 2632 } 2633 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2634 2635 /* read in my part of the matrix column indices */ 2636 nz = procsnz[0]; 2637 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2638 mycols = ibuf; 2639 if (size == 1) nz -= extra_rows; 2640 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2641 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2642 2643 /* read in every ones (except the last) and ship off */ 2644 for (i=1; i<size-1; i++) { 2645 nz = procsnz[i]; 2646 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2647 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2648 } 2649 /* read in the stuff for the last proc */ 2650 if (size != 1) { 2651 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2652 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2653 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2654 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2655 } 2656 ierr = PetscFree(cols);CHKERRQ(ierr); 2657 } else { 2658 /* determine buffer space needed for message */ 2659 nz = 0; 2660 for (i=0; i<m; i++) { 2661 nz += locrowlens[i]; 2662 } 2663 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2664 mycols = ibuf; 2665 /* receive message of column indices*/ 2666 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2667 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2668 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2669 } 2670 2671 /* loop over local rows, determining number of off diagonal entries */ 2672 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2673 odlens = dlens + (rend-rstart); 2674 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2675 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2676 masked1 = mask + Mbs; 2677 masked2 = masked1 + Mbs; 2678 rowcount = 0; nzcount = 0; 2679 for (i=0; i<mbs; i++) { 2680 dcount = 0; 2681 odcount = 0; 2682 for (j=0; j<bs; j++) { 2683 kmax = locrowlens[rowcount]; 2684 for (k=0; k<kmax; k++) { 2685 tmp = mycols[nzcount++]/bs; 2686 if (!mask[tmp]) { 2687 mask[tmp] = 1; 2688 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2689 else masked1[dcount++] = tmp; 2690 } 2691 } 2692 rowcount++; 2693 } 2694 2695 dlens[i] = dcount; 2696 odlens[i] = odcount; 2697 2698 /* zero out the mask elements we set */ 2699 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2700 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2701 } 2702 2703 /* create our matrix */ 2704 ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr); 2705 ierr = MatSetType(A,type);CHKERRQ(ierr) 2706 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2707 2708 /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */ 2709 MatSetOption(A,MAT_COLUMNS_SORTED); 2710 2711 if (!rank) { 2712 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2713 /* read in my part of the matrix numerical values */ 2714 nz = procsnz[0]; 2715 vals = buf; 2716 mycols = ibuf; 2717 if (size == 1) nz -= extra_rows; 2718 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2719 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2720 2721 /* insert into matrix */ 2722 jj = rstart*bs; 2723 for (i=0; i<m; i++) { 2724 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2725 mycols += locrowlens[i]; 2726 vals += locrowlens[i]; 2727 jj++; 2728 } 2729 /* read in other processors (except the last one) and ship out */ 2730 for (i=1; i<size-1; i++) { 2731 nz = procsnz[i]; 2732 vals = buf; 2733 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2734 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2735 } 2736 /* the last proc */ 2737 if (size != 1){ 2738 nz = procsnz[i] - extra_rows; 2739 vals = buf; 2740 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2741 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2742 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2743 } 2744 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2745 } else { 2746 /* receive numeric values */ 2747 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2748 2749 /* receive message of values*/ 2750 vals = buf; 2751 mycols = ibuf; 2752 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2753 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2754 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2755 2756 /* insert into matrix */ 2757 jj = rstart*bs; 2758 for (i=0; i<m; i++) { 2759 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2760 mycols += locrowlens[i]; 2761 vals += locrowlens[i]; 2762 jj++; 2763 } 2764 } 2765 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2766 ierr = PetscFree(buf);CHKERRQ(ierr); 2767 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2768 ierr = PetscFree(rowners);CHKERRQ(ierr); 2769 ierr = PetscFree(dlens);CHKERRQ(ierr); 2770 ierr = PetscFree(mask);CHKERRQ(ierr); 2771 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2772 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2773 2774 *newmat = A; 2775 PetscFunctionReturn(0); 2776 } 2777 2778 #undef __FUNCT__ 2779 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2780 /*@ 2781 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2782 2783 Input Parameters: 2784 . mat - the matrix 2785 . fact - factor 2786 2787 Collective on Mat 2788 2789 Level: advanced 2790 2791 Notes: 2792 This can also be set by the command line option: -mat_use_hash_table fact 2793 2794 .keywords: matrix, hashtable, factor, HT 2795 2796 .seealso: MatSetOption() 2797 @*/ 2798 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2799 { 2800 PetscErrorCode ierr,(*f)(Mat,PetscReal); 2801 2802 PetscFunctionBegin; 2803 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 2804 if (f) { 2805 ierr = (*f)(mat,fact);CHKERRQ(ierr); 2806 } 2807 PetscFunctionReturn(0); 2808 } 2809 2810 #undef __FUNCT__ 2811 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 2812 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 2813 { 2814 Mat_MPIBAIJ *baij; 2815 2816 PetscFunctionBegin; 2817 baij = (Mat_MPIBAIJ*)mat->data; 2818 baij->ht_fact = fact; 2819 PetscFunctionReturn(0); 2820 } 2821 2822 #undef __FUNCT__ 2823 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2824 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2825 { 2826 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2827 PetscFunctionBegin; 2828 *Ad = a->A; 2829 *Ao = a->B; 2830 *colmap = a->garray; 2831 PetscFunctionReturn(0); 2832 } 2833