1 /*$Id: mpibaij.c,v 1.191 2000/04/12 01:47:05 bsmith Exp bsmith $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "mat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 6 extern int MatSetUpMultiply_MPIBAIJ(Mat); 7 extern int DisAssemble_MPIBAIJ(Mat); 8 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 9 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 10 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 11 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 12 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 13 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 14 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 15 extern int MatPrintHelp_SeqBAIJ(Mat); 16 extern int MatZeroRows_SeqAIJ(Mat,IS,Scalar*); 17 18 /* UGLY, ugly, ugly 19 When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 20 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 21 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 22 converts the entries into single precision and then calls ..._MatScalar() to put them 23 into the single precision data structures. 24 */ 25 #if defined(PETSC_USE_MAT_SINGLE) 26 extern int MatSetValues_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 27 extern int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 28 extern int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 29 #else 30 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 31 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 32 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 33 #define MatStashValuesRow_Private_MatScalar MatStashValuesRow_Private 34 #define MatStashValuesCol_Private_MatScalar MatStashValuesCol_Private 35 #define MatStashValuesRowBlocked_Private_MatScalar MatStashValuesRowBlocked_Private 36 #define MatStashValuesColBlocked_Private_MatScalar MatStashValuesColBlocked_Private 37 #endif 38 39 EXTERN_C_BEGIN 40 #undef __FUNC__ 41 #define __FUNC__ /*<a name=""></a>*/"MatStoreValues_MPIBAIJ" 42 int MatStoreValues_MPIBAIJ(Mat mat) 43 { 44 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 45 int ierr; 46 47 PetscFunctionBegin; 48 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 49 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 50 PetscFunctionReturn(0); 51 } 52 EXTERN_C_END 53 54 EXTERN_C_BEGIN 55 #undef __FUNC__ 56 #define __FUNC__ /*<a name=""></a>*/"MatRetrieveValues_MPIBAIJ" 57 int MatRetrieveValues_MPIBAIJ(Mat mat) 58 { 59 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 60 int ierr; 61 62 PetscFunctionBegin; 63 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 64 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 65 PetscFunctionReturn(0); 66 } 67 EXTERN_C_END 68 69 /* 70 Local utility routine that creates a mapping from the global column 71 number to the local number in the off-diagonal part of the local 72 storage of the matrix. This is done in a non scable way since the 73 length of colmap equals the global matrix length. 74 */ 75 #undef __FUNC__ 76 #define __FUNC__ /*<a name=""></a>*/"CreateColmap_MPIBAIJ_Private" 77 static int CreateColmap_MPIBAIJ_Private(Mat mat) 78 { 79 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 80 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 81 int nbs = B->nbs,i,bs=B->bs,ierr; 82 83 PetscFunctionBegin; 84 #if defined (PETSC_USE_CTABLE) 85 ierr = PetscTableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr); 86 for (i=0; i<nbs; i++){ 87 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 88 } 89 #else 90 baij->colmap = (int*)PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 91 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 92 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 93 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 94 #endif 95 PetscFunctionReturn(0); 96 } 97 98 #define CHUNKSIZE 10 99 100 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 101 { \ 102 \ 103 brow = row/bs; \ 104 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 105 rmax = aimax[brow]; nrow = ailen[brow]; \ 106 bcol = col/bs; \ 107 ridx = row % bs; cidx = col % bs; \ 108 low = 0; high = nrow; \ 109 while (high-low > 3) { \ 110 t = (low+high)/2; \ 111 if (rp[t] > bcol) high = t; \ 112 else low = t; \ 113 } \ 114 for (_i=low; _i<high; _i++) { \ 115 if (rp[_i] > bcol) break; \ 116 if (rp[_i] == bcol) { \ 117 bap = ap + bs2*_i + bs*cidx + ridx; \ 118 if (addv == ADD_VALUES) *bap += value; \ 119 else *bap = value; \ 120 goto a_noinsert; \ 121 } \ 122 } \ 123 if (a->nonew == 1) goto a_noinsert; \ 124 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 125 if (nrow >= rmax) { \ 126 /* there is no extra room in row, therefore enlarge */ \ 127 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 128 MatScalar *new_a; \ 129 \ 130 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 131 \ 132 /* malloc new storage space */ \ 133 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 134 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 135 new_j = (int*)(new_a + bs2*new_nz); \ 136 new_i = new_j + new_nz; \ 137 \ 138 /* copy over old data into new slots */ \ 139 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 140 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 141 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 142 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 143 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 144 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 145 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 146 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 147 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 148 /* free up old matrix storage */ \ 149 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 150 if (!a->singlemalloc) { \ 151 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 152 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 153 } \ 154 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 155 a->singlemalloc = PETSC_TRUE; \ 156 \ 157 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 158 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 159 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 160 a->maxnz += bs2*CHUNKSIZE; \ 161 a->reallocs++; \ 162 a->nz++; \ 163 } \ 164 N = nrow++ - 1; \ 165 /* shift up all the later entries in this row */ \ 166 for (ii=N; ii>=_i; ii--) { \ 167 rp[ii+1] = rp[ii]; \ 168 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 169 } \ 170 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 171 rp[_i] = bcol; \ 172 ap[bs2*_i + bs*cidx + ridx] = value; \ 173 a_noinsert:; \ 174 ailen[brow] = nrow; \ 175 } 176 177 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 178 { \ 179 brow = row/bs; \ 180 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 181 rmax = bimax[brow]; nrow = bilen[brow]; \ 182 bcol = col/bs; \ 183 ridx = row % bs; cidx = col % bs; \ 184 low = 0; high = nrow; \ 185 while (high-low > 3) { \ 186 t = (low+high)/2; \ 187 if (rp[t] > bcol) high = t; \ 188 else low = t; \ 189 } \ 190 for (_i=low; _i<high; _i++) { \ 191 if (rp[_i] > bcol) break; \ 192 if (rp[_i] == bcol) { \ 193 bap = ap + bs2*_i + bs*cidx + ridx; \ 194 if (addv == ADD_VALUES) *bap += value; \ 195 else *bap = value; \ 196 goto b_noinsert; \ 197 } \ 198 } \ 199 if (b->nonew == 1) goto b_noinsert; \ 200 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 201 if (nrow >= rmax) { \ 202 /* there is no extra room in row, therefore enlarge */ \ 203 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 204 MatScalar *new_a; \ 205 \ 206 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 207 \ 208 /* malloc new storage space */ \ 209 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 210 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 211 new_j = (int*)(new_a + bs2*new_nz); \ 212 new_i = new_j + new_nz; \ 213 \ 214 /* copy over old data into new slots */ \ 215 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 216 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 217 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 218 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 219 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 220 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 221 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 222 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 223 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 224 /* free up old matrix storage */ \ 225 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 226 if (!b->singlemalloc) { \ 227 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 228 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 229 } \ 230 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 231 b->singlemalloc = PETSC_TRUE; \ 232 \ 233 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 234 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 235 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 236 b->maxnz += bs2*CHUNKSIZE; \ 237 b->reallocs++; \ 238 b->nz++; \ 239 } \ 240 N = nrow++ - 1; \ 241 /* shift up all the later entries in this row */ \ 242 for (ii=N; ii>=_i; ii--) { \ 243 rp[ii+1] = rp[ii]; \ 244 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 245 } \ 246 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 247 rp[_i] = bcol; \ 248 ap[bs2*_i + bs*cidx + ridx] = value; \ 249 b_noinsert:; \ 250 bilen[brow] = nrow; \ 251 } 252 253 #if defined(PETSC_USE_MAT_SINGLE) 254 #undef __FUNC__ 255 #define __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIBAIJ" 256 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 257 { 258 int ierr,i,N = m*n; 259 MatScalar *vsingle = (MatScalar*)v; 260 261 PetscFunctionBegin; 262 for (i=0; i<N; i++) { 263 vsingle[i] = v[i]; 264 } 265 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 266 PetscFunctionReturn(0); 267 } 268 269 #undef __FUNC__ 270 #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ" 271 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 272 { 273 int ierr,i,N = m*n*((Mat_MPIBAIJ*)mat->data)->bs2; 274 MatScalar *vsingle = (MatScalar*)v; 275 276 PetscFunctionBegin; 277 for (i=0; i<N; i++) { 278 vsingle[i] = v[i]; 279 } 280 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 281 PetscFunctionReturn(0); 282 } 283 #endif 284 285 #undef __FUNC__ 286 #define __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIBAIJ" 287 int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 288 { 289 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 290 MatScalar value; 291 int ierr,i,j,row,col; 292 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 293 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 294 int cend_orig=baij->cend_bs,bs=baij->bs; 295 296 /* Some Variables required in the macro */ 297 Mat A = baij->A; 298 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 299 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 300 MatScalar *aa=a->a; 301 302 Mat B = baij->B; 303 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 304 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 305 MatScalar *ba=b->a; 306 307 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 308 int low,high,t,ridx,cidx,bs2=a->bs2; 309 MatScalar *ap,*bap; 310 311 PetscFunctionBegin; 312 for (i=0; i<m; i++) { 313 if (im[i] < 0) continue; 314 #if defined(PETSC_USE_BOPT_g) 315 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 316 #endif 317 if (im[i] >= rstart_orig && im[i] < rend_orig) { 318 row = im[i] - rstart_orig; 319 for (j=0; j<n; j++) { 320 if (in[j] >= cstart_orig && in[j] < cend_orig){ 321 col = in[j] - cstart_orig; 322 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 323 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 324 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 325 } else if (in[j] < 0) continue; 326 #if defined(PETSC_USE_BOPT_g) 327 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 328 #endif 329 else { 330 if (mat->was_assembled) { 331 if (!baij->colmap) { 332 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 333 } 334 #if defined (PETSC_USE_CTABLE) 335 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 336 col = col - 1 + in[j]%bs; 337 #else 338 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 339 #endif 340 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 341 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 342 col = in[j]; 343 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 344 B = baij->B; 345 b = (Mat_SeqBAIJ*)(B)->data; 346 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 347 ba=b->a; 348 } 349 } else col = in[j]; 350 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 351 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 352 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 353 } 354 } 355 } else { 356 if (!baij->donotstash) { 357 if (roworiented) { 358 ierr = MatStashValuesRow_Private_MatScalar(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 359 } else { 360 ierr = MatStashValuesCol_Private_MatScalar(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 361 } 362 } 363 } 364 } 365 PetscFunctionReturn(0); 366 } 367 368 #undef __FUNC__ 369 #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ" 370 int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 371 { 372 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 373 MatScalar *value,*barray=baij->barray; 374 int ierr,i,j,ii,jj,row,col; 375 int roworiented = baij->roworiented,rstart=baij->rstart ; 376 int rend=baij->rend,cstart=baij->cstart,stepval; 377 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 378 379 PetscFunctionBegin; 380 if(!barray) { 381 baij->barray = barray = (MatScalar*)PetscMalloc(bs2*sizeof(MatScalar));CHKPTRQ(barray); 382 } 383 384 if (roworiented) { 385 stepval = (n-1)*bs; 386 } else { 387 stepval = (m-1)*bs; 388 } 389 for (i=0; i<m; i++) { 390 if (im[i] < 0) continue; 391 #if defined(PETSC_USE_BOPT_g) 392 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 393 #endif 394 if (im[i] >= rstart && im[i] < rend) { 395 row = im[i] - rstart; 396 for (j=0; j<n; j++) { 397 /* If NumCol = 1 then a copy is not required */ 398 if ((roworiented) && (n == 1)) { 399 barray = v + i*bs2; 400 } else if((!roworiented) && (m == 1)) { 401 barray = v + j*bs2; 402 } else { /* Here a copy is required */ 403 if (roworiented) { 404 value = v + i*(stepval+bs)*bs + j*bs; 405 } else { 406 value = v + j*(stepval+bs)*bs + i*bs; 407 } 408 for (ii=0; ii<bs; ii++,value+=stepval) { 409 for (jj=0; jj<bs; jj++) { 410 *barray++ = *value++; 411 } 412 } 413 barray -=bs2; 414 } 415 416 if (in[j] >= cstart && in[j] < cend){ 417 col = in[j] - cstart; 418 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 419 } 420 else if (in[j] < 0) continue; 421 #if defined(PETSC_USE_BOPT_g) 422 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 423 #endif 424 else { 425 if (mat->was_assembled) { 426 if (!baij->colmap) { 427 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 428 } 429 430 #if defined(PETSC_USE_BOPT_g) 431 #if defined (PETSC_USE_CTABLE) 432 { int data; 433 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 434 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap"); 435 } 436 #else 437 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap"); 438 #endif 439 #endif 440 #if defined (PETSC_USE_CTABLE) 441 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 442 col = (col - 1)/bs; 443 #else 444 col = (baij->colmap[in[j]] - 1)/bs; 445 #endif 446 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 447 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 448 col = in[j]; 449 } 450 } 451 else col = in[j]; 452 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 453 } 454 } 455 } else { 456 if (!baij->donotstash) { 457 if (roworiented) { 458 ierr = MatStashValuesRowBlocked_Private_MatScalar(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 459 } else { 460 ierr = MatStashValuesColBlocked_Private_MatScalar(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 461 } 462 } 463 } 464 } 465 PetscFunctionReturn(0); 466 } 467 #define HASH_KEY 0.6180339887 468 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 469 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 470 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 471 #undef __FUNC__ 472 #define __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIBAIJ_HT" 473 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 474 { 475 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 476 int ierr,i,j,row,col; 477 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 478 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 479 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 480 PetscReal tmp; 481 MatScalar ** HD = baij->hd,value; 482 #if defined(PETSC_USE_BOPT_g) 483 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 484 #endif 485 486 PetscFunctionBegin; 487 488 for (i=0; i<m; i++) { 489 #if defined(PETSC_USE_BOPT_g) 490 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 491 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 492 #endif 493 row = im[i]; 494 if (row >= rstart_orig && row < rend_orig) { 495 for (j=0; j<n; j++) { 496 col = in[j]; 497 if (roworiented) value = (MatScalar)v[i*n+j]; else value = (MatScalar)v[i+j*m]; 498 /* Look up into the Hash Table */ 499 key = (row/bs)*Nbs+(col/bs)+1; 500 h1 = HASH(size,key,tmp); 501 502 503 idx = h1; 504 #if defined(PETSC_USE_BOPT_g) 505 insert_ct++; 506 total_ct++; 507 if (HT[idx] != key) { 508 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 509 if (idx == size) { 510 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 511 if (idx == h1) { 512 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 513 } 514 } 515 } 516 #else 517 if (HT[idx] != key) { 518 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 519 if (idx == size) { 520 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 521 if (idx == h1) { 522 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 523 } 524 } 525 } 526 #endif 527 /* A HASH table entry is found, so insert the values at the correct address */ 528 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 529 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 530 } 531 } else { 532 if (!baij->donotstash) { 533 if (roworiented) { 534 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 535 } else { 536 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 537 } 538 } 539 } 540 } 541 #if defined(PETSC_USE_BOPT_g) 542 baij->ht_total_ct = total_ct; 543 baij->ht_insert_ct = insert_ct; 544 #endif 545 PetscFunctionReturn(0); 546 } 547 548 #undef __FUNC__ 549 #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ_HT" 550 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 551 { 552 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 553 int ierr,i,j,ii,jj,row,col; 554 int roworiented = baij->roworiented,rstart=baij->rstart ; 555 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 556 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 557 PetscReal tmp; 558 MatScalar **HD = baij->hd,*baij_a; 559 Scalar *v_t,*value; 560 #if defined(PETSC_USE_BOPT_g) 561 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 562 #endif 563 564 PetscFunctionBegin; 565 566 if (roworiented) { 567 stepval = (n-1)*bs; 568 } else { 569 stepval = (m-1)*bs; 570 } 571 for (i=0; i<m; i++) { 572 #if defined(PETSC_USE_BOPT_g) 573 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 574 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 575 #endif 576 row = im[i]; 577 v_t = v + i*bs2; 578 if (row >= rstart && row < rend) { 579 for (j=0; j<n; j++) { 580 col = in[j]; 581 582 /* Look up into the Hash Table */ 583 key = row*Nbs+col+1; 584 h1 = HASH(size,key,tmp); 585 586 idx = h1; 587 #if defined(PETSC_USE_BOPT_g) 588 total_ct++; 589 insert_ct++; 590 if (HT[idx] != key) { 591 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 592 if (idx == size) { 593 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 594 if (idx == h1) { 595 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 596 } 597 } 598 } 599 #else 600 if (HT[idx] != key) { 601 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 602 if (idx == size) { 603 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 604 if (idx == h1) { 605 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 606 } 607 } 608 } 609 #endif 610 baij_a = HD[idx]; 611 if (roworiented) { 612 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 613 /* value = v + (i*(stepval+bs)+j)*bs; */ 614 value = v_t; 615 v_t += bs; 616 if (addv == ADD_VALUES) { 617 for (ii=0; ii<bs; ii++,value+=stepval) { 618 for (jj=ii; jj<bs2; jj+=bs) { 619 baij_a[jj] += *value++; 620 } 621 } 622 } else { 623 for (ii=0; ii<bs; ii++,value+=stepval) { 624 for (jj=ii; jj<bs2; jj+=bs) { 625 baij_a[jj] = *value++; 626 } 627 } 628 } 629 } else { 630 value = v + j*(stepval+bs)*bs + i*bs; 631 if (addv == ADD_VALUES) { 632 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 633 for (jj=0; jj<bs; jj++) { 634 baij_a[jj] += *value++; 635 } 636 } 637 } else { 638 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 639 for (jj=0; jj<bs; jj++) { 640 baij_a[jj] = *value++; 641 } 642 } 643 } 644 } 645 } 646 } else { 647 if (!baij->donotstash) { 648 if (roworiented) { 649 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 650 } else { 651 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 652 } 653 } 654 } 655 } 656 #if defined(PETSC_USE_BOPT_g) 657 baij->ht_total_ct = total_ct; 658 baij->ht_insert_ct = insert_ct; 659 #endif 660 PetscFunctionReturn(0); 661 } 662 663 #undef __FUNC__ 664 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIBAIJ" 665 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 666 { 667 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 668 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 669 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 670 671 PetscFunctionBegin; 672 for (i=0; i<m; i++) { 673 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 674 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 675 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 676 row = idxm[i] - bsrstart; 677 for (j=0; j<n; j++) { 678 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 679 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 680 if (idxn[j] >= bscstart && idxn[j] < bscend){ 681 col = idxn[j] - bscstart; 682 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 683 } else { 684 if (!baij->colmap) { 685 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 686 } 687 #if defined (PETSC_USE_CTABLE) 688 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 689 data --; 690 #else 691 data = baij->colmap[idxn[j]/bs]-1; 692 #endif 693 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 694 else { 695 col = data + idxn[j]%bs; 696 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 697 } 698 } 699 } 700 } else { 701 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 702 } 703 } 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNC__ 708 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPIBAIJ" 709 int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *norm) 710 { 711 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 712 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 713 int ierr,i,bs2=baij->bs2; 714 PetscReal sum = 0.0; 715 MatScalar *v; 716 717 PetscFunctionBegin; 718 if (baij->size == 1) { 719 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 720 } else { 721 if (type == NORM_FROBENIUS) { 722 v = amat->a; 723 for (i=0; i<amat->nz*bs2; i++) { 724 #if defined(PETSC_USE_COMPLEX) 725 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 726 #else 727 sum += (*v)*(*v); v++; 728 #endif 729 } 730 v = bmat->a; 731 for (i=0; i<bmat->nz*bs2; i++) { 732 #if defined(PETSC_USE_COMPLEX) 733 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 734 #else 735 sum += (*v)*(*v); v++; 736 #endif 737 } 738 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 739 *norm = sqrt(*norm); 740 } else { 741 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 742 } 743 } 744 PetscFunctionReturn(0); 745 } 746 747 748 /* 749 Creates the hash table, and sets the table 750 This table is created only once. 751 If new entried need to be added to the matrix 752 then the hash table has to be destroyed and 753 recreated. 754 */ 755 #undef __FUNC__ 756 #define __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPIBAIJ_Private" 757 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 758 { 759 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 760 Mat A = baij->A,B=baij->B; 761 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 762 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 763 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 764 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 765 int *HT,key; 766 MatScalar **HD; 767 PetscReal tmp; 768 #if defined(PETSC_USE_BOPT_g) 769 int ct=0,max=0; 770 #endif 771 772 PetscFunctionBegin; 773 baij->ht_size=(int)(factor*nz); 774 size = baij->ht_size; 775 776 if (baij->ht) { 777 PetscFunctionReturn(0); 778 } 779 780 /* Allocate Memory for Hash Table */ 781 baij->hd = (MatScalar**)PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1);CHKPTRQ(baij->hd); 782 baij->ht = (int*)(baij->hd + size); 783 HD = baij->hd; 784 HT = baij->ht; 785 786 787 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr); 788 789 790 /* Loop Over A */ 791 for (i=0; i<a->mbs; i++) { 792 for (j=ai[i]; j<ai[i+1]; j++) { 793 row = i+rstart; 794 col = aj[j]+cstart; 795 796 key = row*Nbs + col + 1; 797 h1 = HASH(size,key,tmp); 798 for (k=0; k<size; k++){ 799 if (HT[(h1+k)%size] == 0.0) { 800 HT[(h1+k)%size] = key; 801 HD[(h1+k)%size] = a->a + j*bs2; 802 break; 803 #if defined(PETSC_USE_BOPT_g) 804 } else { 805 ct++; 806 #endif 807 } 808 } 809 #if defined(PETSC_USE_BOPT_g) 810 if (k> max) max = k; 811 #endif 812 } 813 } 814 /* Loop Over B */ 815 for (i=0; i<b->mbs; i++) { 816 for (j=bi[i]; j<bi[i+1]; j++) { 817 row = i+rstart; 818 col = garray[bj[j]]; 819 key = row*Nbs + col + 1; 820 h1 = HASH(size,key,tmp); 821 for (k=0; k<size; k++){ 822 if (HT[(h1+k)%size] == 0.0) { 823 HT[(h1+k)%size] = key; 824 HD[(h1+k)%size] = b->a + j*bs2; 825 break; 826 #if defined(PETSC_USE_BOPT_g) 827 } else { 828 ct++; 829 #endif 830 } 831 } 832 #if defined(PETSC_USE_BOPT_g) 833 if (k> max) max = k; 834 #endif 835 } 836 } 837 838 /* Print Summary */ 839 #if defined(PETSC_USE_BOPT_g) 840 for (i=0,j=0; i<size; i++) { 841 if (HT[i]) {j++;} 842 } 843 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max); 844 #endif 845 PetscFunctionReturn(0); 846 } 847 848 #undef __FUNC__ 849 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIBAIJ" 850 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 851 { 852 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 853 int ierr,nstash,reallocs; 854 InsertMode addv; 855 856 PetscFunctionBegin; 857 if (baij->donotstash) { 858 PetscFunctionReturn(0); 859 } 860 861 /* make sure all processors are either in INSERTMODE or ADDMODE */ 862 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 863 if (addv == (ADD_VALUES|INSERT_VALUES)) { 864 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 865 } 866 mat->insertmode = addv; /* in case this processor had no cache */ 867 868 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 869 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 870 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 871 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 872 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 873 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 874 PetscFunctionReturn(0); 875 } 876 877 #undef __FUNC__ 878 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIBAIJ" 879 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 880 { 881 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 882 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 883 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 884 int *row,*col,other_disassembled; 885 PetscTruth r1,r2,r3; 886 MatScalar *val; 887 InsertMode addv = mat->insertmode; 888 889 PetscFunctionBegin; 890 if (!baij->donotstash) { 891 while (1) { 892 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 893 if (!flg) break; 894 895 for (i=0; i<n;) { 896 /* Now identify the consecutive vals belonging to the same row */ 897 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 898 if (j < n) ncols = j-i; 899 else ncols = n-i; 900 /* Now assemble all these values with a single function call */ 901 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 902 i = j; 903 } 904 } 905 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 906 /* Now process the block-stash. Since the values are stashed column-oriented, 907 set the roworiented flag to column oriented, and after MatSetValues() 908 restore the original flags */ 909 r1 = baij->roworiented; 910 r2 = a->roworiented; 911 r3 = b->roworiented; 912 baij->roworiented = PETSC_FALSE; 913 a->roworiented = PETSC_FALSE; 914 b->roworiented = PETSC_FALSE; 915 while (1) { 916 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 917 if (!flg) break; 918 919 for (i=0; i<n;) { 920 /* Now identify the consecutive vals belonging to the same row */ 921 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 922 if (j < n) ncols = j-i; 923 else ncols = n-i; 924 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 925 i = j; 926 } 927 } 928 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 929 baij->roworiented = r1; 930 a->roworiented = r2; 931 b->roworiented = r3; 932 } 933 934 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 935 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 936 937 /* determine if any processor has disassembled, if so we must 938 also disassemble ourselfs, in order that we may reassemble. */ 939 /* 940 if nonzero structure of submatrix B cannot change then we know that 941 no processor disassembled thus we can skip this stuff 942 */ 943 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 944 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 945 if (mat->was_assembled && !other_disassembled) { 946 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 947 } 948 } 949 950 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 951 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 952 } 953 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 954 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 955 956 #if defined(PETSC_USE_BOPT_g) 957 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 958 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct); 959 baij->ht_total_ct = 0; 960 baij->ht_insert_ct = 0; 961 } 962 #endif 963 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 964 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 965 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 966 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 967 } 968 969 if (baij->rowvalues) { 970 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 971 baij->rowvalues = 0; 972 } 973 PetscFunctionReturn(0); 974 } 975 976 #undef __FUNC__ 977 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ_ASCIIorDraworSocket" 978 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 979 { 980 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 981 int ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank; 982 PetscTruth isascii,isdraw; 983 984 PetscFunctionBegin; 985 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 986 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 987 if (isascii) { 988 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 989 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 990 MatInfo info; 991 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 992 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 993 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 994 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 995 baij->bs,(int)info.memory);CHKERRQ(ierr); 996 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 997 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 998 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 999 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1000 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 1001 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1002 PetscFunctionReturn(0); 1003 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 1004 ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 1005 PetscFunctionReturn(0); 1006 } 1007 } 1008 1009 if (isdraw) { 1010 Draw draw; 1011 PetscTruth isnull; 1012 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1013 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1014 } 1015 1016 if (size == 1) { 1017 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1018 } else { 1019 /* assemble the entire matrix onto first processor. */ 1020 Mat A; 1021 Mat_SeqBAIJ *Aloc; 1022 int M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1023 MatScalar *a; 1024 1025 if (!rank) { 1026 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1027 } else { 1028 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1029 } 1030 PLogObjectParent(mat,A); 1031 1032 /* copy over the A part */ 1033 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1034 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1035 rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1036 1037 for (i=0; i<mbs; i++) { 1038 rvals[0] = bs*(baij->rstart + i); 1039 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1040 for (j=ai[i]; j<ai[i+1]; j++) { 1041 col = (baij->cstart+aj[j])*bs; 1042 for (k=0; k<bs; k++) { 1043 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1044 col++; a += bs; 1045 } 1046 } 1047 } 1048 /* copy over the B part */ 1049 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1050 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1051 for (i=0; i<mbs; i++) { 1052 rvals[0] = bs*(baij->rstart + i); 1053 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1054 for (j=ai[i]; j<ai[i+1]; j++) { 1055 col = baij->garray[aj[j]]*bs; 1056 for (k=0; k<bs; k++) { 1057 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1058 col++; a += bs; 1059 } 1060 } 1061 } 1062 ierr = PetscFree(rvals);CHKERRQ(ierr); 1063 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1064 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1065 /* 1066 Everyone has to call to draw the matrix since the graphics waits are 1067 synchronized across all processors that share the Draw object 1068 */ 1069 if (!rank) { 1070 Viewer sviewer; 1071 ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1072 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1073 ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1074 } 1075 ierr = MatDestroy(A);CHKERRQ(ierr); 1076 } 1077 PetscFunctionReturn(0); 1078 } 1079 1080 #undef __FUNC__ 1081 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ" 1082 int MatView_MPIBAIJ(Mat mat,Viewer viewer) 1083 { 1084 int ierr; 1085 PetscTruth isascii,isdraw,issocket,isbinary; 1086 1087 PetscFunctionBegin; 1088 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 1089 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 1090 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 1091 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 1092 if (isascii || isdraw || issocket || isbinary) { 1093 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1094 } else { 1095 SETERRQ1(1,1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1096 } 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNC__ 1101 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIBAIJ" 1102 int MatDestroy_MPIBAIJ(Mat mat) 1103 { 1104 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1105 int ierr; 1106 1107 PetscFunctionBegin; 1108 1109 if (mat->mapping) { 1110 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 1111 } 1112 if (mat->bmapping) { 1113 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 1114 } 1115 if (mat->rmap) { 1116 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 1117 } 1118 if (mat->cmap) { 1119 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 1120 } 1121 #if defined(PETSC_USE_LOG) 1122 PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N); 1123 #endif 1124 1125 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1126 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1127 1128 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1129 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1130 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1131 #if defined (PETSC_USE_CTABLE) 1132 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1133 #else 1134 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1135 #endif 1136 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1137 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1138 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1139 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1140 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1141 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1142 ierr = PetscFree(baij);CHKERRQ(ierr); 1143 PLogObjectDestroy(mat); 1144 PetscHeaderDestroy(mat); 1145 PetscFunctionReturn(0); 1146 } 1147 1148 #undef __FUNC__ 1149 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPIBAIJ" 1150 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1151 { 1152 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1153 int ierr,nt; 1154 1155 PetscFunctionBegin; 1156 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1157 if (nt != a->n) { 1158 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1159 } 1160 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1161 if (nt != a->m) { 1162 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1163 } 1164 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1165 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1166 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1167 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1168 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1169 PetscFunctionReturn(0); 1170 } 1171 1172 #undef __FUNC__ 1173 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIBAIJ" 1174 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1175 { 1176 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1177 int ierr; 1178 1179 PetscFunctionBegin; 1180 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1181 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1182 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1183 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1184 PetscFunctionReturn(0); 1185 } 1186 1187 #undef __FUNC__ 1188 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIBAIJ" 1189 int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1190 { 1191 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1192 int ierr; 1193 1194 PetscFunctionBegin; 1195 /* do nondiagonal part */ 1196 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1197 /* send it on its way */ 1198 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1199 /* do local part */ 1200 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1201 /* receive remote parts: note this assumes the values are not actually */ 1202 /* inserted in yy until the next line, which is true for my implementation*/ 1203 /* but is not perhaps always true. */ 1204 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1205 PetscFunctionReturn(0); 1206 } 1207 1208 #undef __FUNC__ 1209 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIBAIJ" 1210 int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1211 { 1212 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1213 int ierr; 1214 1215 PetscFunctionBegin; 1216 /* do nondiagonal part */ 1217 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1218 /* send it on its way */ 1219 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1220 /* do local part */ 1221 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1222 /* receive remote parts: note this assumes the values are not actually */ 1223 /* inserted in yy until the next line, which is true for my implementation*/ 1224 /* but is not perhaps always true. */ 1225 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1226 PetscFunctionReturn(0); 1227 } 1228 1229 /* 1230 This only works correctly for square matrices where the subblock A->A is the 1231 diagonal block 1232 */ 1233 #undef __FUNC__ 1234 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIBAIJ" 1235 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1236 { 1237 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1238 int ierr; 1239 1240 PetscFunctionBegin; 1241 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1242 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1243 PetscFunctionReturn(0); 1244 } 1245 1246 #undef __FUNC__ 1247 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPIBAIJ" 1248 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1249 { 1250 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1251 int ierr; 1252 1253 PetscFunctionBegin; 1254 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1255 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #undef __FUNC__ 1260 #define __FUNC__ /*<a name=""></a>*/"MatGetSize_MPIBAIJ" 1261 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1262 { 1263 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1264 1265 PetscFunctionBegin; 1266 if (m) *m = mat->M; 1267 if (n) *n = mat->N; 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #undef __FUNC__ 1272 #define __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPIBAIJ" 1273 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1274 { 1275 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1276 1277 PetscFunctionBegin; 1278 *m = mat->m; *n = mat->n; 1279 PetscFunctionReturn(0); 1280 } 1281 1282 #undef __FUNC__ 1283 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIBAIJ" 1284 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1285 { 1286 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1287 1288 PetscFunctionBegin; 1289 if (m) *m = mat->rstart*mat->bs; 1290 if (n) *n = mat->rend*mat->bs; 1291 PetscFunctionReturn(0); 1292 } 1293 1294 #undef __FUNC__ 1295 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIBAIJ" 1296 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1297 { 1298 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1299 Scalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1300 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1301 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1302 int *cmap,*idx_p,cstart = mat->cstart; 1303 1304 PetscFunctionBegin; 1305 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1306 mat->getrowactive = PETSC_TRUE; 1307 1308 if (!mat->rowvalues && (idx || v)) { 1309 /* 1310 allocate enough space to hold information from the longest row. 1311 */ 1312 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1313 int max = 1,mbs = mat->mbs,tmp; 1314 for (i=0; i<mbs; i++) { 1315 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1316 if (max < tmp) { max = tmp; } 1317 } 1318 mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1319 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1320 } 1321 1322 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1323 lrow = row - brstart; 1324 1325 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1326 if (!v) {pvA = 0; pvB = 0;} 1327 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1328 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1329 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1330 nztot = nzA + nzB; 1331 1332 cmap = mat->garray; 1333 if (v || idx) { 1334 if (nztot) { 1335 /* Sort by increasing column numbers, assuming A and B already sorted */ 1336 int imark = -1; 1337 if (v) { 1338 *v = v_p = mat->rowvalues; 1339 for (i=0; i<nzB; i++) { 1340 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1341 else break; 1342 } 1343 imark = i; 1344 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1345 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1346 } 1347 if (idx) { 1348 *idx = idx_p = mat->rowindices; 1349 if (imark > -1) { 1350 for (i=0; i<imark; i++) { 1351 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1352 } 1353 } else { 1354 for (i=0; i<nzB; i++) { 1355 if (cmap[cworkB[i]/bs] < cstart) 1356 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1357 else break; 1358 } 1359 imark = i; 1360 } 1361 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1362 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1363 } 1364 } else { 1365 if (idx) *idx = 0; 1366 if (v) *v = 0; 1367 } 1368 } 1369 *nz = nztot; 1370 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1371 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1372 PetscFunctionReturn(0); 1373 } 1374 1375 #undef __FUNC__ 1376 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIBAIJ" 1377 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1378 { 1379 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1380 1381 PetscFunctionBegin; 1382 if (baij->getrowactive == PETSC_FALSE) { 1383 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1384 } 1385 baij->getrowactive = PETSC_FALSE; 1386 PetscFunctionReturn(0); 1387 } 1388 1389 #undef __FUNC__ 1390 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIBAIJ" 1391 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1392 { 1393 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1394 1395 PetscFunctionBegin; 1396 *bs = baij->bs; 1397 PetscFunctionReturn(0); 1398 } 1399 1400 #undef __FUNC__ 1401 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIBAIJ" 1402 int MatZeroEntries_MPIBAIJ(Mat A) 1403 { 1404 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1405 int ierr; 1406 1407 PetscFunctionBegin; 1408 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1409 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1410 PetscFunctionReturn(0); 1411 } 1412 1413 #undef __FUNC__ 1414 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIBAIJ" 1415 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1416 { 1417 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1418 Mat A = a->A,B = a->B; 1419 int ierr; 1420 PetscReal isend[5],irecv[5]; 1421 1422 PetscFunctionBegin; 1423 info->block_size = (double)a->bs; 1424 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1425 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1426 isend[3] = info->memory; isend[4] = info->mallocs; 1427 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1428 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1429 isend[3] += info->memory; isend[4] += info->mallocs; 1430 if (flag == MAT_LOCAL) { 1431 info->nz_used = isend[0]; 1432 info->nz_allocated = isend[1]; 1433 info->nz_unneeded = isend[2]; 1434 info->memory = isend[3]; 1435 info->mallocs = isend[4]; 1436 } else if (flag == MAT_GLOBAL_MAX) { 1437 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1438 info->nz_used = irecv[0]; 1439 info->nz_allocated = irecv[1]; 1440 info->nz_unneeded = irecv[2]; 1441 info->memory = irecv[3]; 1442 info->mallocs = irecv[4]; 1443 } else if (flag == MAT_GLOBAL_SUM) { 1444 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1445 info->nz_used = irecv[0]; 1446 info->nz_allocated = irecv[1]; 1447 info->nz_unneeded = irecv[2]; 1448 info->memory = irecv[3]; 1449 info->mallocs = irecv[4]; 1450 } else { 1451 SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 1452 } 1453 info->rows_global = (double)a->M; 1454 info->columns_global = (double)a->N; 1455 info->rows_local = (double)a->m; 1456 info->columns_local = (double)a->N; 1457 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1458 info->fill_ratio_needed = 0; 1459 info->factor_mallocs = 0; 1460 PetscFunctionReturn(0); 1461 } 1462 1463 #undef __FUNC__ 1464 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIBAIJ" 1465 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1466 { 1467 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1468 int ierr; 1469 1470 PetscFunctionBegin; 1471 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1472 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1473 op == MAT_COLUMNS_UNSORTED || 1474 op == MAT_COLUMNS_SORTED || 1475 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1476 op == MAT_KEEP_ZEROED_ROWS || 1477 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1478 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1479 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1480 } else if (op == MAT_ROW_ORIENTED) { 1481 a->roworiented = PETSC_TRUE; 1482 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1483 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1484 } else if (op == MAT_ROWS_SORTED || 1485 op == MAT_ROWS_UNSORTED || 1486 op == MAT_SYMMETRIC || 1487 op == MAT_STRUCTURALLY_SYMMETRIC || 1488 op == MAT_YES_NEW_DIAGONALS || 1489 op == MAT_USE_HASH_TABLE) { 1490 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1491 } else if (op == MAT_COLUMN_ORIENTED) { 1492 a->roworiented = PETSC_FALSE; 1493 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1494 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1495 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1496 a->donotstash = PETSC_TRUE; 1497 } else if (op == MAT_NO_NEW_DIAGONALS) { 1498 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1499 } else if (op == MAT_USE_HASH_TABLE) { 1500 a->ht_flag = PETSC_TRUE; 1501 } else { 1502 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1503 } 1504 PetscFunctionReturn(0); 1505 } 1506 1507 #undef __FUNC__ 1508 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIBAIJ(" 1509 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1510 { 1511 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1512 Mat_SeqBAIJ *Aloc; 1513 Mat B; 1514 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1515 int bs=baij->bs,mbs=baij->mbs; 1516 MatScalar *a; 1517 1518 PetscFunctionBegin; 1519 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1520 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1521 1522 /* copy over the A part */ 1523 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1524 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1525 rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1526 1527 for (i=0; i<mbs; i++) { 1528 rvals[0] = bs*(baij->rstart + i); 1529 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1530 for (j=ai[i]; j<ai[i+1]; j++) { 1531 col = (baij->cstart+aj[j])*bs; 1532 for (k=0; k<bs; k++) { 1533 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1534 col++; a += bs; 1535 } 1536 } 1537 } 1538 /* copy over the B part */ 1539 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1540 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1541 for (i=0; i<mbs; i++) { 1542 rvals[0] = bs*(baij->rstart + i); 1543 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1544 for (j=ai[i]; j<ai[i+1]; j++) { 1545 col = baij->garray[aj[j]]*bs; 1546 for (k=0; k<bs; k++) { 1547 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1548 col++; a += bs; 1549 } 1550 } 1551 } 1552 ierr = PetscFree(rvals);CHKERRQ(ierr); 1553 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1554 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1555 1556 if (matout) { 1557 *matout = B; 1558 } else { 1559 PetscOps *Abops; 1560 MatOps Aops; 1561 1562 /* This isn't really an in-place transpose .... but free data structures from baij */ 1563 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1564 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1565 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1566 #if defined (PETSC_USE_CTABLE) 1567 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1568 #else 1569 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1570 #endif 1571 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1572 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1573 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1574 ierr = PetscFree(baij);CHKERRQ(ierr); 1575 1576 /* 1577 This is horrible, horrible code. We need to keep the 1578 A pointers for the bops and ops but copy everything 1579 else from C. 1580 */ 1581 Abops = A->bops; 1582 Aops = A->ops; 1583 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1584 A->bops = Abops; 1585 A->ops = Aops; 1586 1587 PetscHeaderDestroy(B); 1588 } 1589 PetscFunctionReturn(0); 1590 } 1591 1592 #undef __FUNC__ 1593 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIBAIJ" 1594 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1595 { 1596 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1597 Mat a = baij->A,b = baij->B; 1598 int ierr,s1,s2,s3; 1599 1600 PetscFunctionBegin; 1601 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1602 if (rr) { 1603 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1604 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1605 /* Overlap communication with computation. */ 1606 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1607 } 1608 if (ll) { 1609 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1610 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1611 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1612 } 1613 /* scale the diagonal block */ 1614 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1615 1616 if (rr) { 1617 /* Do a scatter end and then right scale the off-diagonal block */ 1618 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1619 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1620 } 1621 1622 PetscFunctionReturn(0); 1623 } 1624 1625 #undef __FUNC__ 1626 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIBAIJ" 1627 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1628 { 1629 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1630 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1631 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1632 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1633 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1634 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1635 MPI_Comm comm = A->comm; 1636 MPI_Request *send_waits,*recv_waits; 1637 MPI_Status recv_status,*send_status; 1638 IS istmp; 1639 1640 PetscFunctionBegin; 1641 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 1642 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1643 1644 /* first count number of contributors to each processor */ 1645 nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs); 1646 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1647 procs = nprocs + size; 1648 owner = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 1649 for (i=0; i<N; i++) { 1650 idx = rows[i]; 1651 found = 0; 1652 for (j=0; j<size; j++) { 1653 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1654 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1655 } 1656 } 1657 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1658 } 1659 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 1660 1661 /* inform other processors of number of messages and max length*/ 1662 work = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work); 1663 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 1664 nmax = work[rank]; 1665 nrecvs = work[size+rank]; 1666 ierr = PetscFree(work);CHKERRQ(ierr); 1667 1668 /* post receives: */ 1669 rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 1670 recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1671 for (i=0; i<nrecvs; i++) { 1672 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1673 } 1674 1675 /* do sends: 1676 1) starts[i] gives the starting index in svalues for stuff going to 1677 the ith processor 1678 */ 1679 svalues = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues); 1680 send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1681 starts = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts); 1682 starts[0] = 0; 1683 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1684 for (i=0; i<N; i++) { 1685 svalues[starts[owner[i]]++] = rows[i]; 1686 } 1687 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1688 1689 starts[0] = 0; 1690 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1691 count = 0; 1692 for (i=0; i<size; i++) { 1693 if (procs[i]) { 1694 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1695 } 1696 } 1697 ierr = PetscFree(starts);CHKERRQ(ierr); 1698 1699 base = owners[rank]*bs; 1700 1701 /* wait on receives */ 1702 lens = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens); 1703 source = lens + nrecvs; 1704 count = nrecvs; slen = 0; 1705 while (count) { 1706 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1707 /* unpack receives into our local space */ 1708 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1709 source[imdex] = recv_status.MPI_SOURCE; 1710 lens[imdex] = n; 1711 slen += n; 1712 count--; 1713 } 1714 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1715 1716 /* move the data into the send scatter */ 1717 lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows); 1718 count = 0; 1719 for (i=0; i<nrecvs; i++) { 1720 values = rvalues + i*nmax; 1721 for (j=0; j<lens[i]; j++) { 1722 lrows[count++] = values[j] - base; 1723 } 1724 } 1725 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1726 ierr = PetscFree(lens);CHKERRQ(ierr); 1727 ierr = PetscFree(owner);CHKERRQ(ierr); 1728 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1729 1730 /* actually zap the local rows */ 1731 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1732 PLogObjectParent(A,istmp); 1733 1734 /* 1735 Zero the required rows. If the "diagonal block" of the matrix 1736 is square and the user wishes to set the diagonal we use seperate 1737 code so that MatSetValues() is not called for each diagonal allocating 1738 new memory, thus calling lots of mallocs and slowing things down. 1739 1740 Contributed by: Mathew Knepley 1741 */ 1742 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1743 ierr = MatZeroRows_SeqAIJ(l->B,istmp,0);CHKERRQ(ierr); 1744 if (diag && (l->A->M == l->A->N)) { 1745 ierr = MatZeroRows_SeqAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1746 } else if (diag) { 1747 ierr = MatZeroRows_SeqAIJ(l->A,istmp,0);CHKERRQ(ierr); 1748 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1749 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1750 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1751 } 1752 for (i = 0; i < slen; i++) { 1753 row = lrows[i] + rstart_bs; 1754 ierr = MatSetValues_MPIBAIJ(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1755 } 1756 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1757 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1758 } else { 1759 ierr = MatZeroRows_SeqAIJ(l->A,istmp,0);CHKERRQ(ierr); 1760 } 1761 1762 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1763 ierr = PetscFree(lrows);CHKERRQ(ierr); 1764 1765 /* wait on sends */ 1766 if (nsends) { 1767 send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1768 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1769 ierr = PetscFree(send_status);CHKERRQ(ierr); 1770 } 1771 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1772 ierr = PetscFree(svalues);CHKERRQ(ierr); 1773 1774 PetscFunctionReturn(0); 1775 } 1776 1777 #undef __FUNC__ 1778 #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPIBAIJ" 1779 int MatPrintHelp_MPIBAIJ(Mat A) 1780 { 1781 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1782 MPI_Comm comm = A->comm; 1783 static int called = 0; 1784 int ierr; 1785 1786 PetscFunctionBegin; 1787 if (!a->rank) { 1788 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1789 } 1790 if (called) {PetscFunctionReturn(0);} else called = 1; 1791 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1792 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1793 PetscFunctionReturn(0); 1794 } 1795 1796 #undef __FUNC__ 1797 #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPIBAIJ" 1798 int MatSetUnfactored_MPIBAIJ(Mat A) 1799 { 1800 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1801 int ierr; 1802 1803 PetscFunctionBegin; 1804 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1805 PetscFunctionReturn(0); 1806 } 1807 1808 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1809 1810 #undef __FUNC__ 1811 #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPIBAIJ" 1812 int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1813 { 1814 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1815 Mat a,b,c,d; 1816 PetscTruth flg; 1817 int ierr; 1818 1819 PetscFunctionBegin; 1820 if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1821 a = matA->A; b = matA->B; 1822 c = matB->A; d = matB->B; 1823 1824 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1825 if (flg == PETSC_TRUE) { 1826 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1827 } 1828 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1829 PetscFunctionReturn(0); 1830 } 1831 1832 /* -------------------------------------------------------------------*/ 1833 static struct _MatOps MatOps_Values = { 1834 MatSetValues_MPIBAIJ, 1835 MatGetRow_MPIBAIJ, 1836 MatRestoreRow_MPIBAIJ, 1837 MatMult_MPIBAIJ, 1838 MatMultAdd_MPIBAIJ, 1839 MatMultTranspose_MPIBAIJ, 1840 MatMultTransposeAdd_MPIBAIJ, 1841 0, 1842 0, 1843 0, 1844 0, 1845 0, 1846 0, 1847 0, 1848 MatTranspose_MPIBAIJ, 1849 MatGetInfo_MPIBAIJ, 1850 MatEqual_MPIBAIJ, 1851 MatGetDiagonal_MPIBAIJ, 1852 MatDiagonalScale_MPIBAIJ, 1853 MatNorm_MPIBAIJ, 1854 MatAssemblyBegin_MPIBAIJ, 1855 MatAssemblyEnd_MPIBAIJ, 1856 0, 1857 MatSetOption_MPIBAIJ, 1858 MatZeroEntries_MPIBAIJ, 1859 MatZeroRows_MPIBAIJ, 1860 0, 1861 0, 1862 0, 1863 0, 1864 MatGetSize_MPIBAIJ, 1865 MatGetLocalSize_MPIBAIJ, 1866 MatGetOwnershipRange_MPIBAIJ, 1867 0, 1868 0, 1869 0, 1870 0, 1871 MatDuplicate_MPIBAIJ, 1872 0, 1873 0, 1874 0, 1875 0, 1876 0, 1877 MatGetSubMatrices_MPIBAIJ, 1878 MatIncreaseOverlap_MPIBAIJ, 1879 MatGetValues_MPIBAIJ, 1880 0, 1881 MatPrintHelp_MPIBAIJ, 1882 MatScale_MPIBAIJ, 1883 0, 1884 0, 1885 0, 1886 MatGetBlockSize_MPIBAIJ, 1887 0, 1888 0, 1889 0, 1890 0, 1891 0, 1892 0, 1893 MatSetUnfactored_MPIBAIJ, 1894 0, 1895 MatSetValuesBlocked_MPIBAIJ, 1896 0, 1897 0, 1898 0, 1899 MatGetMaps_Petsc}; 1900 1901 1902 EXTERN_C_BEGIN 1903 #undef __FUNC__ 1904 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPIBAIJ" 1905 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1906 { 1907 PetscFunctionBegin; 1908 *a = ((Mat_MPIBAIJ *)A->data)->A; 1909 *iscopy = PETSC_FALSE; 1910 PetscFunctionReturn(0); 1911 } 1912 EXTERN_C_END 1913 1914 #undef __FUNC__ 1915 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPIBAIJ" 1916 /*@C 1917 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1918 (block compressed row). For good matrix assembly performance 1919 the user should preallocate the matrix storage by setting the parameters 1920 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1921 performance can be increased by more than a factor of 50. 1922 1923 Collective on MPI_Comm 1924 1925 Input Parameters: 1926 + comm - MPI communicator 1927 . bs - size of blockk 1928 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1929 This value should be the same as the local size used in creating the 1930 y vector for the matrix-vector product y = Ax. 1931 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1932 This value should be the same as the local size used in creating the 1933 x vector for the matrix-vector product y = Ax. 1934 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1935 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1936 . d_nz - number of block nonzeros per block row in diagonal portion of local 1937 submatrix (same for all local rows) 1938 . d_nnz - array containing the number of block nonzeros in the various block rows 1939 of the in diagonal portion of the local (possibly different for each block 1940 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1941 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1942 submatrix (same for all local rows). 1943 - o_nnz - array containing the number of nonzeros in the various block rows of the 1944 off-diagonal portion of the local submatrix (possibly different for 1945 each block row) or PETSC_NULL. 1946 1947 Output Parameter: 1948 . A - the matrix 1949 1950 Options Database Keys: 1951 . -mat_no_unroll - uses code that does not unroll the loops in the 1952 block calculations (much slower) 1953 . -mat_block_size - size of the blocks to use 1954 . -mat_mpi - use the parallel matrix data structures even on one processor 1955 (defaults to using SeqBAIJ format on one processor) 1956 1957 Notes: 1958 The user MUST specify either the local or global matrix dimensions 1959 (possibly both). 1960 1961 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1962 than it must be used on all processors that share the object for that argument. 1963 1964 Storage Information: 1965 For a square global matrix we define each processor's diagonal portion 1966 to be its local rows and the corresponding columns (a square submatrix); 1967 each processor's off-diagonal portion encompasses the remainder of the 1968 local matrix (a rectangular submatrix). 1969 1970 The user can specify preallocated storage for the diagonal part of 1971 the local submatrix with either d_nz or d_nnz (not both). Set 1972 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1973 memory allocation. Likewise, specify preallocated storage for the 1974 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1975 1976 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1977 the figure below we depict these three local rows and all columns (0-11). 1978 1979 .vb 1980 0 1 2 3 4 5 6 7 8 9 10 11 1981 ------------------- 1982 row 3 | o o o d d d o o o o o o 1983 row 4 | o o o d d d o o o o o o 1984 row 5 | o o o d d d o o o o o o 1985 ------------------- 1986 .ve 1987 1988 Thus, any entries in the d locations are stored in the d (diagonal) 1989 submatrix, and any entries in the o locations are stored in the 1990 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1991 stored simply in the MATSEQBAIJ format for compressed row storage. 1992 1993 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1994 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1995 In general, for PDE problems in which most nonzeros are near the diagonal, 1996 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1997 or you will get TERRIBLE performance; see the users' manual chapter on 1998 matrices. 1999 2000 Level: intermediate 2001 2002 .keywords: matrix, block, aij, compressed row, sparse, parallel 2003 2004 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2005 @*/ 2006 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2007 { 2008 Mat B; 2009 Mat_MPIBAIJ *b; 2010 int ierr,i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 2011 PetscTruth flag1,flag2,flg; 2012 2013 PetscFunctionBegin; 2014 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2015 2016 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 2017 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -1: value %d",d_nz); 2018 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -1: value %d",o_nz); 2019 if (d_nnz) { 2020 for (i=0; i<m/bs; i++) { 2021 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 2022 } 2023 } 2024 if (o_nnz) { 2025 for (i=0; i<m/bs; i++) { 2026 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 2027 } 2028 } 2029 2030 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2031 ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr); 2032 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 2033 if (!flag1 && !flag2 && size == 1) { 2034 if (M == PETSC_DECIDE) M = m; 2035 if (N == PETSC_DECIDE) N = n; 2036 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 2037 PetscFunctionReturn(0); 2038 } 2039 2040 *A = 0; 2041 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 2042 PLogObjectCreate(B); 2043 B->data = (void*)(b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b); 2044 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2045 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2046 2047 B->ops->destroy = MatDestroy_MPIBAIJ; 2048 B->ops->view = MatView_MPIBAIJ; 2049 B->mapping = 0; 2050 B->factor = 0; 2051 B->assembled = PETSC_FALSE; 2052 2053 B->insertmode = NOT_SET_VALUES; 2054 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 2055 ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr); 2056 2057 if (m == PETSC_DECIDE && (d_nnz || o_nnz)) { 2058 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 2059 } 2060 if (M == PETSC_DECIDE && m == PETSC_DECIDE) { 2061 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 2062 } 2063 if (N == PETSC_DECIDE && n == PETSC_DECIDE) { 2064 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 2065 } 2066 if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 2067 if (N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 2068 2069 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 2070 work[0] = m; work[1] = n; 2071 mbs = m/bs; nbs = n/bs; 2072 ierr = MPI_Allreduce(work,sum,2,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2073 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 2074 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 2075 } 2076 if (m == PETSC_DECIDE) { 2077 Mbs = M/bs; 2078 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 2079 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 2080 m = mbs*bs; 2081 } 2082 if (n == PETSC_DECIDE) { 2083 Nbs = N/bs; 2084 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 2085 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 2086 n = nbs*bs; 2087 } 2088 if (mbs*bs != m || nbs*bs != n) { 2089 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 2090 } 2091 2092 b->m = m; B->m = m; 2093 b->n = n; B->n = n; 2094 b->N = N; B->N = N; 2095 b->M = M; B->M = M; 2096 b->bs = bs; 2097 b->bs2 = bs*bs; 2098 b->mbs = mbs; 2099 b->nbs = nbs; 2100 b->Mbs = Mbs; 2101 b->Nbs = Nbs; 2102 2103 /* the information in the maps duplicates the information computed below, eventually 2104 we should remove the duplicate information that is not contained in the maps */ 2105 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 2106 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 2107 2108 /* build local table of row and column ownerships */ 2109 b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 2110 PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2111 b->cowners = b->rowners + b->size + 2; 2112 b->rowners_bs = b->cowners + b->size + 2; 2113 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2114 b->rowners[0] = 0; 2115 for (i=2; i<=b->size; i++) { 2116 b->rowners[i] += b->rowners[i-1]; 2117 } 2118 for (i=0; i<=b->size; i++) { 2119 b->rowners_bs[i] = b->rowners[i]*bs; 2120 } 2121 b->rstart = b->rowners[b->rank]; 2122 b->rend = b->rowners[b->rank+1]; 2123 b->rstart_bs = b->rstart * bs; 2124 b->rend_bs = b->rend * bs; 2125 2126 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2127 b->cowners[0] = 0; 2128 for (i=2; i<=b->size; i++) { 2129 b->cowners[i] += b->cowners[i-1]; 2130 } 2131 b->cstart = b->cowners[b->rank]; 2132 b->cend = b->cowners[b->rank+1]; 2133 b->cstart_bs = b->cstart * bs; 2134 b->cend_bs = b->cend * bs; 2135 2136 2137 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2138 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2139 PLogObjectParent(B,b->A); 2140 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2141 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2142 PLogObjectParent(B,b->B); 2143 2144 /* build cache for off array entries formed */ 2145 ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr); 2146 ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr); 2147 b->donotstash = PETSC_FALSE; 2148 b->colmap = PETSC_NULL; 2149 b->garray = PETSC_NULL; 2150 b->roworiented = PETSC_TRUE; 2151 2152 /* stuff used in block assembly */ 2153 b->barray = 0; 2154 2155 /* stuff used for matrix vector multiply */ 2156 b->lvec = 0; 2157 b->Mvctx = 0; 2158 2159 /* stuff for MatGetRow() */ 2160 b->rowindices = 0; 2161 b->rowvalues = 0; 2162 b->getrowactive = PETSC_FALSE; 2163 2164 /* hash table stuff */ 2165 b->ht = 0; 2166 b->hd = 0; 2167 b->ht_size = 0; 2168 b->ht_flag = PETSC_FALSE; 2169 b->ht_fact = 0; 2170 b->ht_total_ct = 0; 2171 b->ht_insert_ct = 0; 2172 2173 *A = B; 2174 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2175 if (flg) { 2176 double fact = 1.39; 2177 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2178 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2179 if (fact <= 1.0) fact = 1.39; 2180 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2181 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2182 } 2183 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2184 "MatStoreValues_MPIBAIJ", 2185 (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2186 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2187 "MatRetrieveValues_MPIBAIJ", 2188 (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2189 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2190 "MatGetDiagonalBlock_MPIBAIJ", 2191 (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2192 PetscFunctionReturn(0); 2193 } 2194 2195 #undef __FUNC__ 2196 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIBAIJ" 2197 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2198 { 2199 Mat mat; 2200 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2201 int ierr,len=0; 2202 PetscTruth flg; 2203 2204 PetscFunctionBegin; 2205 *newmat = 0; 2206 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 2207 PLogObjectCreate(mat); 2208 mat->data = (void*)(a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a); 2209 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2210 mat->ops->destroy = MatDestroy_MPIBAIJ; 2211 mat->ops->view = MatView_MPIBAIJ; 2212 mat->factor = matin->factor; 2213 mat->assembled = PETSC_TRUE; 2214 mat->insertmode = NOT_SET_VALUES; 2215 2216 a->m = mat->m = oldmat->m; 2217 a->n = mat->n = oldmat->n; 2218 a->M = mat->M = oldmat->M; 2219 a->N = mat->N = oldmat->N; 2220 2221 a->bs = oldmat->bs; 2222 a->bs2 = oldmat->bs2; 2223 a->mbs = oldmat->mbs; 2224 a->nbs = oldmat->nbs; 2225 a->Mbs = oldmat->Mbs; 2226 a->Nbs = oldmat->Nbs; 2227 2228 a->rstart = oldmat->rstart; 2229 a->rend = oldmat->rend; 2230 a->cstart = oldmat->cstart; 2231 a->cend = oldmat->cend; 2232 a->size = oldmat->size; 2233 a->rank = oldmat->rank; 2234 a->donotstash = oldmat->donotstash; 2235 a->roworiented = oldmat->roworiented; 2236 a->rowindices = 0; 2237 a->rowvalues = 0; 2238 a->getrowactive = PETSC_FALSE; 2239 a->barray = 0; 2240 a->rstart_bs = oldmat->rstart_bs; 2241 a->rend_bs = oldmat->rend_bs; 2242 a->cstart_bs = oldmat->cstart_bs; 2243 a->cend_bs = oldmat->cend_bs; 2244 2245 /* hash table stuff */ 2246 a->ht = 0; 2247 a->hd = 0; 2248 a->ht_size = 0; 2249 a->ht_flag = oldmat->ht_flag; 2250 a->ht_fact = oldmat->ht_fact; 2251 a->ht_total_ct = 0; 2252 a->ht_insert_ct = 0; 2253 2254 2255 a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 2256 PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2257 a->cowners = a->rowners + a->size + 2; 2258 a->rowners_bs = a->cowners + a->size + 2; 2259 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2260 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2261 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2262 if (oldmat->colmap) { 2263 #if defined (PETSC_USE_CTABLE) 2264 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2265 #else 2266 a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2267 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2268 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2269 #endif 2270 } else a->colmap = 0; 2271 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2272 a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray); 2273 PLogObjectMemory(mat,len*sizeof(int)); 2274 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2275 } else a->garray = 0; 2276 2277 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2278 PLogObjectParent(mat,a->lvec); 2279 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2280 2281 PLogObjectParent(mat,a->Mvctx); 2282 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2283 PLogObjectParent(mat,a->A); 2284 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2285 PLogObjectParent(mat,a->B); 2286 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 2287 ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2288 if (flg) { 2289 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 2290 } 2291 *newmat = mat; 2292 PetscFunctionReturn(0); 2293 } 2294 2295 #include "sys.h" 2296 2297 #undef __FUNC__ 2298 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIBAIJ" 2299 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2300 { 2301 Mat A; 2302 int i,nz,ierr,j,rstart,rend,fd; 2303 Scalar *vals,*buf; 2304 MPI_Comm comm = ((PetscObject)viewer)->comm; 2305 MPI_Status status; 2306 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2307 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2308 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2309 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2310 int dcount,kmax,k,nzcount,tmp; 2311 2312 PetscFunctionBegin; 2313 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2314 2315 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2316 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2317 if (!rank) { 2318 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2319 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2320 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2321 if (header[3] < 0) { 2322 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2323 } 2324 } 2325 2326 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2327 M = header[1]; N = header[2]; 2328 2329 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2330 2331 /* 2332 This code adds extra rows to make sure the number of rows is 2333 divisible by the blocksize 2334 */ 2335 Mbs = M/bs; 2336 extra_rows = bs - M + bs*(Mbs); 2337 if (extra_rows == bs) extra_rows = 0; 2338 else Mbs++; 2339 if (extra_rows &&!rank) { 2340 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2341 } 2342 2343 /* determine ownership of all rows */ 2344 mbs = Mbs/size + ((Mbs % size) > rank); 2345 m = mbs * bs; 2346 rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners); 2347 browners = rowners + size + 1; 2348 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2349 rowners[0] = 0; 2350 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2351 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2352 rstart = rowners[rank]; 2353 rend = rowners[rank+1]; 2354 2355 /* distribute row lengths to all processors */ 2356 locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens); 2357 if (!rank) { 2358 rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2359 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2360 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2361 sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts); 2362 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2363 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2364 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2365 } else { 2366 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2367 } 2368 2369 if (!rank) { 2370 /* calculate the number of nonzeros on each processor */ 2371 procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz); 2372 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2373 for (i=0; i<size; i++) { 2374 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2375 procsnz[i] += rowlengths[j]; 2376 } 2377 } 2378 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2379 2380 /* determine max buffer needed and allocate it */ 2381 maxnz = 0; 2382 for (i=0; i<size; i++) { 2383 maxnz = PetscMax(maxnz,procsnz[i]); 2384 } 2385 cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols); 2386 2387 /* read in my part of the matrix column indices */ 2388 nz = procsnz[0]; 2389 ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 2390 mycols = ibuf; 2391 if (size == 1) nz -= extra_rows; 2392 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2393 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2394 2395 /* read in every ones (except the last) and ship off */ 2396 for (i=1; i<size-1; i++) { 2397 nz = procsnz[i]; 2398 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2399 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2400 } 2401 /* read in the stuff for the last proc */ 2402 if (size != 1) { 2403 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2404 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2405 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2406 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2407 } 2408 ierr = PetscFree(cols);CHKERRQ(ierr); 2409 } else { 2410 /* determine buffer space needed for message */ 2411 nz = 0; 2412 for (i=0; i<m; i++) { 2413 nz += locrowlens[i]; 2414 } 2415 ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 2416 mycols = ibuf; 2417 /* receive message of column indices*/ 2418 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2419 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2420 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2421 } 2422 2423 /* loop over local rows, determining number of off diagonal entries */ 2424 dlens = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens); 2425 odlens = dlens + (rend-rstart); 2426 mask = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask); 2427 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2428 masked1 = mask + Mbs; 2429 masked2 = masked1 + Mbs; 2430 rowcount = 0; nzcount = 0; 2431 for (i=0; i<mbs; i++) { 2432 dcount = 0; 2433 odcount = 0; 2434 for (j=0; j<bs; j++) { 2435 kmax = locrowlens[rowcount]; 2436 for (k=0; k<kmax; k++) { 2437 tmp = mycols[nzcount++]/bs; 2438 if (!mask[tmp]) { 2439 mask[tmp] = 1; 2440 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2441 else masked1[dcount++] = tmp; 2442 } 2443 } 2444 rowcount++; 2445 } 2446 2447 dlens[i] = dcount; 2448 odlens[i] = odcount; 2449 2450 /* zero out the mask elements we set */ 2451 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2452 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2453 } 2454 2455 /* create our matrix */ 2456 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 2457 A = *newmat; 2458 MatSetOption(A,MAT_COLUMNS_SORTED); 2459 2460 if (!rank) { 2461 buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf); 2462 /* read in my part of the matrix numerical values */ 2463 nz = procsnz[0]; 2464 vals = buf; 2465 mycols = ibuf; 2466 if (size == 1) nz -= extra_rows; 2467 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2468 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2469 2470 /* insert into matrix */ 2471 jj = rstart*bs; 2472 for (i=0; i<m; i++) { 2473 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2474 mycols += locrowlens[i]; 2475 vals += locrowlens[i]; 2476 jj++; 2477 } 2478 /* read in other processors (except the last one) and ship out */ 2479 for (i=1; i<size-1; i++) { 2480 nz = procsnz[i]; 2481 vals = buf; 2482 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2483 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2484 } 2485 /* the last proc */ 2486 if (size != 1){ 2487 nz = procsnz[i] - extra_rows; 2488 vals = buf; 2489 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2490 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2491 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2492 } 2493 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2494 } else { 2495 /* receive numeric values */ 2496 buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf); 2497 2498 /* receive message of values*/ 2499 vals = buf; 2500 mycols = ibuf; 2501 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2502 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2503 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2504 2505 /* insert into matrix */ 2506 jj = rstart*bs; 2507 for (i=0; i<m; i++) { 2508 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2509 mycols += locrowlens[i]; 2510 vals += locrowlens[i]; 2511 jj++; 2512 } 2513 } 2514 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2515 ierr = PetscFree(buf);CHKERRQ(ierr); 2516 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2517 ierr = PetscFree(rowners);CHKERRQ(ierr); 2518 ierr = PetscFree(dlens);CHKERRQ(ierr); 2519 ierr = PetscFree(mask);CHKERRQ(ierr); 2520 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2521 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2522 PetscFunctionReturn(0); 2523 } 2524 2525 #undef __FUNC__ 2526 #define __FUNC__ /*<a name=""></a>*/"MatMPIBAIJSetHashTableFactor" 2527 /*@ 2528 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2529 2530 Input Parameters: 2531 . mat - the matrix 2532 . fact - factor 2533 2534 Collective on Mat 2535 2536 Level: advanced 2537 2538 Notes: 2539 This can also be set by the command line option: -mat_use_hash_table fact 2540 2541 .keywords: matrix, hashtable, factor, HT 2542 2543 .seealso: MatSetOption() 2544 @*/ 2545 int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2546 { 2547 Mat_MPIBAIJ *baij; 2548 2549 PetscFunctionBegin; 2550 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2551 if (mat->type != MATMPIBAIJ) { 2552 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2553 } 2554 baij = (Mat_MPIBAIJ*)mat->data; 2555 baij->ht_fact = fact; 2556 PetscFunctionReturn(0); 2557 } 2558