1 /*$Id: mpibaij.c,v 1.189 2000/04/09 04:36:25 bsmith Exp bsmith $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "mat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 6 extern int MatSetUpMultiply_MPIBAIJ(Mat); 7 extern int DisAssemble_MPIBAIJ(Mat); 8 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 9 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 10 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 11 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 12 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 13 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 14 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 15 extern int MatPrintHelp_SeqBAIJ(Mat); 16 extern int MatZeroRows_SeqAIJ(Mat,IS,Scalar*); 17 18 /* UGLY, ugly, ugly 19 When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 20 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 21 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 22 converts the entries into single precision and then calls ..._MatScalar() to put them 23 into the single precision data structures. 24 */ 25 #if defined(PETSC_USE_MAT_SINGLE) 26 extern int MatSetValues_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 27 extern int MatSetValuesBlock_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 28 extern int MatSetValuesBlock_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 29 #else 30 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 31 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 32 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 33 #endif 34 35 EXTERN_C_BEGIN 36 #undef __FUNC__ 37 #define __FUNC__ /*<a name=""></a>*/"MatStoreValues_MPIBAIJ" 38 int MatStoreValues_MPIBAIJ(Mat mat) 39 { 40 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 41 int ierr; 42 43 PetscFunctionBegin; 44 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 45 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 46 PetscFunctionReturn(0); 47 } 48 EXTERN_C_END 49 50 EXTERN_C_BEGIN 51 #undef __FUNC__ 52 #define __FUNC__ /*<a name=""></a>*/"MatRetrieveValues_MPIBAIJ" 53 int MatRetrieveValues_MPIBAIJ(Mat mat) 54 { 55 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 56 int ierr; 57 58 PetscFunctionBegin; 59 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 60 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 61 PetscFunctionReturn(0); 62 } 63 EXTERN_C_END 64 65 /* 66 Local utility routine that creates a mapping from the global column 67 number to the local number in the off-diagonal part of the local 68 storage of the matrix. This is done in a non scable way since the 69 length of colmap equals the global matrix length. 70 */ 71 #undef __FUNC__ 72 #define __FUNC__ /*<a name=""></a>*/"CreateColmap_MPIBAIJ_Private" 73 static int CreateColmap_MPIBAIJ_Private(Mat mat) 74 { 75 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 76 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 77 int nbs = B->nbs,i,bs=B->bs,ierr; 78 79 PetscFunctionBegin; 80 #if defined (PETSC_USE_CTABLE) 81 ierr = PetscTableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr); 82 for (i=0; i<nbs; i++){ 83 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 84 } 85 #else 86 baij->colmap = (int*)PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 87 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 88 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 89 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 90 #endif 91 PetscFunctionReturn(0); 92 } 93 94 #define CHUNKSIZE 10 95 96 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 97 { \ 98 \ 99 brow = row/bs; \ 100 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 101 rmax = aimax[brow]; nrow = ailen[brow]; \ 102 bcol = col/bs; \ 103 ridx = row % bs; cidx = col % bs; \ 104 low = 0; high = nrow; \ 105 while (high-low > 3) { \ 106 t = (low+high)/2; \ 107 if (rp[t] > bcol) high = t; \ 108 else low = t; \ 109 } \ 110 for (_i=low; _i<high; _i++) { \ 111 if (rp[_i] > bcol) break; \ 112 if (rp[_i] == bcol) { \ 113 bap = ap + bs2*_i + bs*cidx + ridx; \ 114 if (addv == ADD_VALUES) *bap += value; \ 115 else *bap = value; \ 116 goto a_noinsert; \ 117 } \ 118 } \ 119 if (a->nonew == 1) goto a_noinsert; \ 120 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 121 if (nrow >= rmax) { \ 122 /* there is no extra room in row, therefore enlarge */ \ 123 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 124 MatScalar *new_a; \ 125 \ 126 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 127 \ 128 /* malloc new storage space */ \ 129 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 130 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 131 new_j = (int*)(new_a + bs2*new_nz); \ 132 new_i = new_j + new_nz; \ 133 \ 134 /* copy over old data into new slots */ \ 135 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 136 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 137 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 138 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 139 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 140 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 141 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 142 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 143 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 144 /* free up old matrix storage */ \ 145 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 146 if (!a->singlemalloc) { \ 147 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 148 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 149 } \ 150 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 151 a->singlemalloc = PETSC_TRUE; \ 152 \ 153 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 154 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 155 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 156 a->maxnz += bs2*CHUNKSIZE; \ 157 a->reallocs++; \ 158 a->nz++; \ 159 } \ 160 N = nrow++ - 1; \ 161 /* shift up all the later entries in this row */ \ 162 for (ii=N; ii>=_i; ii--) { \ 163 rp[ii+1] = rp[ii]; \ 164 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 165 } \ 166 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 167 rp[_i] = bcol; \ 168 ap[bs2*_i + bs*cidx + ridx] = value; \ 169 a_noinsert:; \ 170 ailen[brow] = nrow; \ 171 } 172 173 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 174 { \ 175 brow = row/bs; \ 176 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 177 rmax = bimax[brow]; nrow = bilen[brow]; \ 178 bcol = col/bs; \ 179 ridx = row % bs; cidx = col % bs; \ 180 low = 0; high = nrow; \ 181 while (high-low > 3) { \ 182 t = (low+high)/2; \ 183 if (rp[t] > bcol) high = t; \ 184 else low = t; \ 185 } \ 186 for (_i=low; _i<high; _i++) { \ 187 if (rp[_i] > bcol) break; \ 188 if (rp[_i] == bcol) { \ 189 bap = ap + bs2*_i + bs*cidx + ridx; \ 190 if (addv == ADD_VALUES) *bap += value; \ 191 else *bap = value; \ 192 goto b_noinsert; \ 193 } \ 194 } \ 195 if (b->nonew == 1) goto b_noinsert; \ 196 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 197 if (nrow >= rmax) { \ 198 /* there is no extra room in row, therefore enlarge */ \ 199 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 200 MatScalar *new_a; \ 201 \ 202 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 203 \ 204 /* malloc new storage space */ \ 205 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 206 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 207 new_j = (int*)(new_a + bs2*new_nz); \ 208 new_i = new_j + new_nz; \ 209 \ 210 /* copy over old data into new slots */ \ 211 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 212 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 213 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 214 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 215 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 216 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 217 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 218 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 219 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 220 /* free up old matrix storage */ \ 221 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 222 if (!b->singlemalloc) { \ 223 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 224 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 225 } \ 226 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 227 b->singlemalloc = PETSC_TRUE; \ 228 \ 229 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 230 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 231 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 232 b->maxnz += bs2*CHUNKSIZE; \ 233 b->reallocs++; \ 234 b->nz++; \ 235 } \ 236 N = nrow++ - 1; \ 237 /* shift up all the later entries in this row */ \ 238 for (ii=N; ii>=_i; ii--) { \ 239 rp[ii+1] = rp[ii]; \ 240 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 241 } \ 242 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 243 rp[_i] = bcol; \ 244 ap[bs2*_i + bs*cidx + ridx] = value; \ 245 b_noinsert:; \ 246 bilen[brow] = nrow; \ 247 } 248 249 #if defined(PETSC_USE_MAT_SINGLE) 250 #undef __FUNC__ 251 #define __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIBAIJ" 252 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 253 { 254 int ierr,i,N = m*n; 255 MatScalar *vsingle = (MatScalar*)v; 256 257 PetscFunctionBegin; 258 for (i=0; i<N; i++) { 259 vsingle[i] = v[i]; 260 } 261 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 262 PetscFunctionReturn(0); 263 } 264 265 #undef __FUNC__ 266 #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ" 267 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 268 { 269 int ierr,i,N = m*n*((Mat_MPIBAIJ*)mat->data)->bs2; 270 MatScalar *vsingle = (MatScalar*)v; 271 272 PetscFunctionBegin; 273 for (i=0; i<N; i++) { 274 vsingle[i] = v[i]; 275 } 276 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 277 PetscFunctionReturn(0); 278 } 279 #endif 280 281 #undef __FUNC__ 282 #define __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIBAIJ" 283 int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 284 { 285 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 286 MatScalar value; 287 int ierr,i,j,row,col; 288 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 289 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 290 int cend_orig=baij->cend_bs,bs=baij->bs; 291 292 /* Some Variables required in the macro */ 293 Mat A = baij->A; 294 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 295 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 296 MatScalar *aa=a->a; 297 298 Mat B = baij->B; 299 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 300 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 301 MatScalar *ba=b->a; 302 303 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 304 int low,high,t,ridx,cidx,bs2=a->bs2; 305 MatScalar *ap,*bap; 306 307 PetscFunctionBegin; 308 for (i=0; i<m; i++) { 309 if (im[i] < 0) continue; 310 #if defined(PETSC_USE_BOPT_g) 311 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 312 #endif 313 if (im[i] >= rstart_orig && im[i] < rend_orig) { 314 row = im[i] - rstart_orig; 315 for (j=0; j<n; j++) { 316 if (in[j] >= cstart_orig && in[j] < cend_orig){ 317 col = in[j] - cstart_orig; 318 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 319 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 320 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 321 } else if (in[j] < 0) continue; 322 #if defined(PETSC_USE_BOPT_g) 323 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 324 #endif 325 else { 326 if (mat->was_assembled) { 327 if (!baij->colmap) { 328 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 329 } 330 #if defined (PETSC_USE_CTABLE) 331 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 332 col = col - 1 + in[j]%bs; 333 #else 334 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 335 #endif 336 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 337 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 338 col = in[j]; 339 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 340 B = baij->B; 341 b = (Mat_SeqBAIJ*)(B)->data; 342 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 343 ba=b->a; 344 } 345 } else col = in[j]; 346 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 347 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 348 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 349 } 350 } 351 } else { 352 if (!baij->donotstash) { 353 if (roworiented) { 354 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 355 } else { 356 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 357 } 358 } 359 } 360 } 361 PetscFunctionReturn(0); 362 } 363 364 #undef __FUNC__ 365 #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ" 366 int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 367 { 368 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 369 MatScalar *value,*barray=baij->barray; 370 int ierr,i,j,ii,jj,row,col; 371 int roworiented = baij->roworiented,rstart=baij->rstart ; 372 int rend=baij->rend,cstart=baij->cstart,stepval; 373 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 374 375 PetscFunctionBegin; 376 if(!barray) { 377 baij->barray = barray = (MatScalar*)PetscMalloc(bs2*sizeof(MatScalar));CHKPTRQ(barray); 378 } 379 380 if (roworiented) { 381 stepval = (n-1)*bs; 382 } else { 383 stepval = (m-1)*bs; 384 } 385 for (i=0; i<m; i++) { 386 if (im[i] < 0) continue; 387 #if defined(PETSC_USE_BOPT_g) 388 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 389 #endif 390 if (im[i] >= rstart && im[i] < rend) { 391 row = im[i] - rstart; 392 for (j=0; j<n; j++) { 393 /* If NumCol = 1 then a copy is not required */ 394 if ((roworiented) && (n == 1)) { 395 barray = v + i*bs2; 396 } else if((!roworiented) && (m == 1)) { 397 barray = v + j*bs2; 398 } else { /* Here a copy is required */ 399 if (roworiented) { 400 value = v + i*(stepval+bs)*bs + j*bs; 401 } else { 402 value = v + j*(stepval+bs)*bs + i*bs; 403 } 404 for (ii=0; ii<bs; ii++,value+=stepval) { 405 for (jj=0; jj<bs; jj++) { 406 *barray++ = *value++; 407 } 408 } 409 barray -=bs2; 410 } 411 412 if (in[j] >= cstart && in[j] < cend){ 413 col = in[j] - cstart; 414 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 415 } 416 else if (in[j] < 0) continue; 417 #if defined(PETSC_USE_BOPT_g) 418 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 419 #endif 420 else { 421 if (mat->was_assembled) { 422 if (!baij->colmap) { 423 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 424 } 425 426 #if defined(PETSC_USE_BOPT_g) 427 #if defined (PETSC_USE_CTABLE) 428 { int data; 429 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 430 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap"); 431 } 432 #else 433 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap"); 434 #endif 435 #endif 436 #if defined (PETSC_USE_CTABLE) 437 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 438 col = (col - 1)/bs; 439 #else 440 col = (baij->colmap[in[j]] - 1)/bs; 441 #endif 442 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 443 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 444 col = in[j]; 445 } 446 } 447 else col = in[j]; 448 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 449 } 450 } 451 } else { 452 if (!baij->donotstash) { 453 if (roworiented) { 454 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 455 } else { 456 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 457 } 458 } 459 } 460 } 461 PetscFunctionReturn(0); 462 } 463 #define HASH_KEY 0.6180339887 464 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 465 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 466 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 467 #undef __FUNC__ 468 #define __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIBAIJ_HT" 469 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 470 { 471 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 472 int ierr,i,j,row,col; 473 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 474 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 475 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 476 PetscReal tmp; 477 MatScalar ** HD = baij->hd,value; 478 #if defined(PETSC_USE_BOPT_g) 479 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 480 #endif 481 482 PetscFunctionBegin; 483 484 for (i=0; i<m; i++) { 485 #if defined(PETSC_USE_BOPT_g) 486 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 487 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 488 #endif 489 row = im[i]; 490 if (row >= rstart_orig && row < rend_orig) { 491 for (j=0; j<n; j++) { 492 col = in[j]; 493 if (roworiented) value = (MatScalar)v[i*n+j]; else value = (MatScalar)v[i+j*m]; 494 /* Look up into the Hash Table */ 495 key = (row/bs)*Nbs+(col/bs)+1; 496 h1 = HASH(size,key,tmp); 497 498 499 idx = h1; 500 #if defined(PETSC_USE_BOPT_g) 501 insert_ct++; 502 total_ct++; 503 if (HT[idx] != key) { 504 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 505 if (idx == size) { 506 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 507 if (idx == h1) { 508 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 509 } 510 } 511 } 512 #else 513 if (HT[idx] != key) { 514 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 515 if (idx == size) { 516 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 517 if (idx == h1) { 518 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 519 } 520 } 521 } 522 #endif 523 /* A HASH table entry is found, so insert the values at the correct address */ 524 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 525 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 526 } 527 } else { 528 if (!baij->donotstash) { 529 if (roworiented) { 530 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 531 } else { 532 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 533 } 534 } 535 } 536 } 537 #if defined(PETSC_USE_BOPT_g) 538 baij->ht_total_ct = total_ct; 539 baij->ht_insert_ct = insert_ct; 540 #endif 541 PetscFunctionReturn(0); 542 } 543 544 #undef __FUNC__ 545 #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ_HT" 546 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 547 { 548 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 549 int ierr,i,j,ii,jj,row,col; 550 int roworiented = baij->roworiented,rstart=baij->rstart ; 551 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 552 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 553 PetscReal tmp; 554 MatScalar **HD = baij->hd,*baij_a; 555 Scalar *v_t,*value; 556 #if defined(PETSC_USE_BOPT_g) 557 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 558 #endif 559 560 PetscFunctionBegin; 561 562 if (roworiented) { 563 stepval = (n-1)*bs; 564 } else { 565 stepval = (m-1)*bs; 566 } 567 for (i=0; i<m; i++) { 568 #if defined(PETSC_USE_BOPT_g) 569 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 570 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 571 #endif 572 row = im[i]; 573 v_t = v + i*bs2; 574 if (row >= rstart && row < rend) { 575 for (j=0; j<n; j++) { 576 col = in[j]; 577 578 /* Look up into the Hash Table */ 579 key = row*Nbs+col+1; 580 h1 = HASH(size,key,tmp); 581 582 idx = h1; 583 #if defined(PETSC_USE_BOPT_g) 584 total_ct++; 585 insert_ct++; 586 if (HT[idx] != key) { 587 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 588 if (idx == size) { 589 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 590 if (idx == h1) { 591 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 592 } 593 } 594 } 595 #else 596 if (HT[idx] != key) { 597 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 598 if (idx == size) { 599 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 600 if (idx == h1) { 601 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 602 } 603 } 604 } 605 #endif 606 baij_a = HD[idx]; 607 if (roworiented) { 608 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 609 /* value = v + (i*(stepval+bs)+j)*bs; */ 610 value = v_t; 611 v_t += bs; 612 if (addv == ADD_VALUES) { 613 for (ii=0; ii<bs; ii++,value+=stepval) { 614 for (jj=ii; jj<bs2; jj+=bs) { 615 baij_a[jj] += *value++; 616 } 617 } 618 } else { 619 for (ii=0; ii<bs; ii++,value+=stepval) { 620 for (jj=ii; jj<bs2; jj+=bs) { 621 baij_a[jj] = *value++; 622 } 623 } 624 } 625 } else { 626 value = v + j*(stepval+bs)*bs + i*bs; 627 if (addv == ADD_VALUES) { 628 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 629 for (jj=0; jj<bs; jj++) { 630 baij_a[jj] += *value++; 631 } 632 } 633 } else { 634 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 635 for (jj=0; jj<bs; jj++) { 636 baij_a[jj] = *value++; 637 } 638 } 639 } 640 } 641 } 642 } else { 643 if (!baij->donotstash) { 644 if (roworiented) { 645 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 646 } else { 647 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 648 } 649 } 650 } 651 } 652 #if defined(PETSC_USE_BOPT_g) 653 baij->ht_total_ct = total_ct; 654 baij->ht_insert_ct = insert_ct; 655 #endif 656 PetscFunctionReturn(0); 657 } 658 659 #undef __FUNC__ 660 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIBAIJ" 661 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 662 { 663 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 664 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 665 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 666 667 PetscFunctionBegin; 668 for (i=0; i<m; i++) { 669 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 670 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 671 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 672 row = idxm[i] - bsrstart; 673 for (j=0; j<n; j++) { 674 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 675 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 676 if (idxn[j] >= bscstart && idxn[j] < bscend){ 677 col = idxn[j] - bscstart; 678 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 679 } else { 680 if (!baij->colmap) { 681 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 682 } 683 #if defined (PETSC_USE_CTABLE) 684 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 685 data --; 686 #else 687 data = baij->colmap[idxn[j]/bs]-1; 688 #endif 689 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 690 else { 691 col = data + idxn[j]%bs; 692 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 693 } 694 } 695 } 696 } else { 697 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 698 } 699 } 700 PetscFunctionReturn(0); 701 } 702 703 #undef __FUNC__ 704 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPIBAIJ" 705 int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *norm) 706 { 707 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 708 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 709 int ierr,i,bs2=baij->bs2; 710 PetscReal sum = 0.0; 711 MatScalar *v; 712 713 PetscFunctionBegin; 714 if (baij->size == 1) { 715 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 716 } else { 717 if (type == NORM_FROBENIUS) { 718 v = amat->a; 719 for (i=0; i<amat->nz*bs2; i++) { 720 #if defined(PETSC_USE_COMPLEX) 721 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 722 #else 723 sum += (*v)*(*v); v++; 724 #endif 725 } 726 v = bmat->a; 727 for (i=0; i<bmat->nz*bs2; i++) { 728 #if defined(PETSC_USE_COMPLEX) 729 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 730 #else 731 sum += (*v)*(*v); v++; 732 #endif 733 } 734 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 735 *norm = sqrt(*norm); 736 } else { 737 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 738 } 739 } 740 PetscFunctionReturn(0); 741 } 742 743 744 /* 745 Creates the hash table, and sets the table 746 This table is created only once. 747 If new entried need to be added to the matrix 748 then the hash table has to be destroyed and 749 recreated. 750 */ 751 #undef __FUNC__ 752 #define __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPIBAIJ_Private" 753 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 754 { 755 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 756 Mat A = baij->A,B=baij->B; 757 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 758 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 759 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 760 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 761 int *HT,key; 762 MatScalar **HD; 763 PetscReal tmp; 764 #if defined(PETSC_USE_BOPT_g) 765 int ct=0,max=0; 766 #endif 767 768 PetscFunctionBegin; 769 baij->ht_size=(int)(factor*nz); 770 size = baij->ht_size; 771 772 if (baij->ht) { 773 PetscFunctionReturn(0); 774 } 775 776 /* Allocate Memory for Hash Table */ 777 baij->hd = (MatScalar**)PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1);CHKPTRQ(baij->hd); 778 baij->ht = (int*)(baij->hd + size); 779 HD = baij->hd; 780 HT = baij->ht; 781 782 783 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr); 784 785 786 /* Loop Over A */ 787 for (i=0; i<a->mbs; i++) { 788 for (j=ai[i]; j<ai[i+1]; j++) { 789 row = i+rstart; 790 col = aj[j]+cstart; 791 792 key = row*Nbs + col + 1; 793 h1 = HASH(size,key,tmp); 794 for (k=0; k<size; k++){ 795 if (HT[(h1+k)%size] == 0.0) { 796 HT[(h1+k)%size] = key; 797 HD[(h1+k)%size] = a->a + j*bs2; 798 break; 799 #if defined(PETSC_USE_BOPT_g) 800 } else { 801 ct++; 802 #endif 803 } 804 } 805 #if defined(PETSC_USE_BOPT_g) 806 if (k> max) max = k; 807 #endif 808 } 809 } 810 /* Loop Over B */ 811 for (i=0; i<b->mbs; i++) { 812 for (j=bi[i]; j<bi[i+1]; j++) { 813 row = i+rstart; 814 col = garray[bj[j]]; 815 key = row*Nbs + col + 1; 816 h1 = HASH(size,key,tmp); 817 for (k=0; k<size; k++){ 818 if (HT[(h1+k)%size] == 0.0) { 819 HT[(h1+k)%size] = key; 820 HD[(h1+k)%size] = b->a + j*bs2; 821 break; 822 #if defined(PETSC_USE_BOPT_g) 823 } else { 824 ct++; 825 #endif 826 } 827 } 828 #if defined(PETSC_USE_BOPT_g) 829 if (k> max) max = k; 830 #endif 831 } 832 } 833 834 /* Print Summary */ 835 #if defined(PETSC_USE_BOPT_g) 836 for (i=0,j=0; i<size; i++) { 837 if (HT[i]) {j++;} 838 } 839 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max); 840 #endif 841 PetscFunctionReturn(0); 842 } 843 844 #undef __FUNC__ 845 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIBAIJ" 846 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 847 { 848 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 849 int ierr,nstash,reallocs; 850 InsertMode addv; 851 852 PetscFunctionBegin; 853 if (baij->donotstash) { 854 PetscFunctionReturn(0); 855 } 856 857 /* make sure all processors are either in INSERTMODE or ADDMODE */ 858 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 859 if (addv == (ADD_VALUES|INSERT_VALUES)) { 860 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 861 } 862 mat->insertmode = addv; /* in case this processor had no cache */ 863 864 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 865 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 866 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 867 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 868 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 869 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNC__ 874 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIBAIJ" 875 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 876 { 877 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 878 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 879 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 880 int *row,*col,other_disassembled; 881 PetscTruth r1,r2,r3; 882 MatScalar *val; 883 InsertMode addv = mat->insertmode; 884 885 PetscFunctionBegin; 886 if (!baij->donotstash) { 887 while (1) { 888 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 889 if (!flg) break; 890 891 for (i=0; i<n;) { 892 /* Now identify the consecutive vals belonging to the same row */ 893 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 894 if (j < n) ncols = j-i; 895 else ncols = n-i; 896 /* Now assemble all these values with a single function call */ 897 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 898 i = j; 899 } 900 } 901 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 902 /* Now process the block-stash. Since the values are stashed column-oriented, 903 set the roworiented flag to column oriented, and after MatSetValues() 904 restore the original flags */ 905 r1 = baij->roworiented; 906 r2 = a->roworiented; 907 r3 = b->roworiented; 908 baij->roworiented = PETSC_FALSE; 909 a->roworiented = PETSC_FALSE; 910 b->roworiented = PETSC_FALSE; 911 while (1) { 912 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 913 if (!flg) break; 914 915 for (i=0; i<n;) { 916 /* Now identify the consecutive vals belonging to the same row */ 917 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 918 if (j < n) ncols = j-i; 919 else ncols = n-i; 920 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 921 i = j; 922 } 923 } 924 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 925 baij->roworiented = r1; 926 a->roworiented = r2; 927 b->roworiented = r3; 928 } 929 930 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 931 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 932 933 /* determine if any processor has disassembled, if so we must 934 also disassemble ourselfs, in order that we may reassemble. */ 935 /* 936 if nonzero structure of submatrix B cannot change then we know that 937 no processor disassembled thus we can skip this stuff 938 */ 939 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 940 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 941 if (mat->was_assembled && !other_disassembled) { 942 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 943 } 944 } 945 946 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 947 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 948 } 949 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 950 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 951 952 #if defined(PETSC_USE_BOPT_g) 953 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 954 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct); 955 baij->ht_total_ct = 0; 956 baij->ht_insert_ct = 0; 957 } 958 #endif 959 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 960 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 961 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 962 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 963 } 964 965 if (baij->rowvalues) { 966 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 967 baij->rowvalues = 0; 968 } 969 PetscFunctionReturn(0); 970 } 971 972 #undef __FUNC__ 973 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ_Binary" 974 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 975 { 976 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 977 int ierr; 978 979 PetscFunctionBegin; 980 if (baij->size == 1) { 981 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 982 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 983 PetscFunctionReturn(0); 984 } 985 986 #undef __FUNC__ 987 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ_ASCIIorDraworSocket" 988 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 989 { 990 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 991 int ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank; 992 PetscTruth isascii,isdraw; 993 994 PetscFunctionBegin; 995 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 996 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 997 if (isascii) { 998 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 999 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 1000 MatInfo info; 1001 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1002 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1003 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 1004 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 1005 baij->bs,(int)info.memory);CHKERRQ(ierr); 1006 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1007 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1008 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1009 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1010 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 1011 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1012 PetscFunctionReturn(0); 1013 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 1014 ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 1015 PetscFunctionReturn(0); 1016 } 1017 } 1018 1019 if (isdraw) { 1020 Draw draw; 1021 PetscTruth isnull; 1022 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1023 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1024 } 1025 1026 if (size == 1) { 1027 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1028 } else { 1029 /* assemble the entire matrix onto first processor. */ 1030 Mat A; 1031 Mat_SeqBAIJ *Aloc; 1032 int M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1033 MatScalar *a; 1034 1035 if (!rank) { 1036 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1037 } else { 1038 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1039 } 1040 PLogObjectParent(mat,A); 1041 1042 /* copy over the A part */ 1043 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1044 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1045 rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1046 1047 for (i=0; i<mbs; i++) { 1048 rvals[0] = bs*(baij->rstart + i); 1049 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1050 for (j=ai[i]; j<ai[i+1]; j++) { 1051 col = (baij->cstart+aj[j])*bs; 1052 for (k=0; k<bs; k++) { 1053 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1054 col++; a += bs; 1055 } 1056 } 1057 } 1058 /* copy over the B part */ 1059 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1060 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1061 for (i=0; i<mbs; i++) { 1062 rvals[0] = bs*(baij->rstart + i); 1063 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1064 for (j=ai[i]; j<ai[i+1]; j++) { 1065 col = baij->garray[aj[j]]*bs; 1066 for (k=0; k<bs; k++) { 1067 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1068 col++; a += bs; 1069 } 1070 } 1071 } 1072 ierr = PetscFree(rvals);CHKERRQ(ierr); 1073 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1074 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1075 /* 1076 Everyone has to call to draw the matrix since the graphics waits are 1077 synchronized across all processors that share the Draw object 1078 */ 1079 if (!rank) { 1080 Viewer sviewer; 1081 ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1082 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1083 ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1084 } 1085 ierr = MatDestroy(A);CHKERRQ(ierr); 1086 } 1087 PetscFunctionReturn(0); 1088 } 1089 1090 1091 1092 #undef __FUNC__ 1093 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ" 1094 int MatView_MPIBAIJ(Mat mat,Viewer viewer) 1095 { 1096 int ierr; 1097 PetscTruth isascii,isdraw,issocket,isbinary; 1098 1099 PetscFunctionBegin; 1100 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 1101 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 1102 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 1103 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 1104 if (isascii || isdraw || issocket || isbinary) { 1105 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1106 } else { 1107 SETERRQ1(1,1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1108 } 1109 PetscFunctionReturn(0); 1110 } 1111 1112 #undef __FUNC__ 1113 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIBAIJ" 1114 int MatDestroy_MPIBAIJ(Mat mat) 1115 { 1116 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1117 int ierr; 1118 1119 PetscFunctionBegin; 1120 1121 if (mat->mapping) { 1122 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 1123 } 1124 if (mat->bmapping) { 1125 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 1126 } 1127 if (mat->rmap) { 1128 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 1129 } 1130 if (mat->cmap) { 1131 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 1132 } 1133 #if defined(PETSC_USE_LOG) 1134 PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N); 1135 #endif 1136 1137 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1138 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1139 1140 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1141 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1142 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1143 #if defined (PETSC_USE_CTABLE) 1144 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1145 #else 1146 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1147 #endif 1148 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1149 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1150 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1151 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1152 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1153 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1154 ierr = PetscFree(baij);CHKERRQ(ierr); 1155 PLogObjectDestroy(mat); 1156 PetscHeaderDestroy(mat); 1157 PetscFunctionReturn(0); 1158 } 1159 1160 #undef __FUNC__ 1161 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPIBAIJ" 1162 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1163 { 1164 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1165 int ierr,nt; 1166 1167 PetscFunctionBegin; 1168 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1169 if (nt != a->n) { 1170 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1171 } 1172 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1173 if (nt != a->m) { 1174 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1175 } 1176 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1177 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1178 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1179 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1180 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1181 PetscFunctionReturn(0); 1182 } 1183 1184 #undef __FUNC__ 1185 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIBAIJ" 1186 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1187 { 1188 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1189 int ierr; 1190 1191 PetscFunctionBegin; 1192 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1193 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1194 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1195 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1196 PetscFunctionReturn(0); 1197 } 1198 1199 #undef __FUNC__ 1200 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIBAIJ" 1201 int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1202 { 1203 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1204 int ierr; 1205 1206 PetscFunctionBegin; 1207 /* do nondiagonal part */ 1208 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1209 /* send it on its way */ 1210 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1211 /* do local part */ 1212 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1213 /* receive remote parts: note this assumes the values are not actually */ 1214 /* inserted in yy until the next line, which is true for my implementation*/ 1215 /* but is not perhaps always true. */ 1216 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1217 PetscFunctionReturn(0); 1218 } 1219 1220 #undef __FUNC__ 1221 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIBAIJ" 1222 int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1223 { 1224 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1225 int ierr; 1226 1227 PetscFunctionBegin; 1228 /* do nondiagonal part */ 1229 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1230 /* send it on its way */ 1231 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1232 /* do local part */ 1233 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1234 /* receive remote parts: note this assumes the values are not actually */ 1235 /* inserted in yy until the next line, which is true for my implementation*/ 1236 /* but is not perhaps always true. */ 1237 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1238 PetscFunctionReturn(0); 1239 } 1240 1241 /* 1242 This only works correctly for square matrices where the subblock A->A is the 1243 diagonal block 1244 */ 1245 #undef __FUNC__ 1246 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIBAIJ" 1247 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1248 { 1249 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1250 int ierr; 1251 1252 PetscFunctionBegin; 1253 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1254 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNC__ 1259 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPIBAIJ" 1260 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1261 { 1262 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1263 int ierr; 1264 1265 PetscFunctionBegin; 1266 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1267 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #undef __FUNC__ 1272 #define __FUNC__ /*<a name=""></a>*/"MatGetSize_MPIBAIJ" 1273 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1274 { 1275 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1276 1277 PetscFunctionBegin; 1278 if (m) *m = mat->M; 1279 if (n) *n = mat->N; 1280 PetscFunctionReturn(0); 1281 } 1282 1283 #undef __FUNC__ 1284 #define __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPIBAIJ" 1285 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1286 { 1287 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1288 1289 PetscFunctionBegin; 1290 *m = mat->m; *n = mat->n; 1291 PetscFunctionReturn(0); 1292 } 1293 1294 #undef __FUNC__ 1295 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIBAIJ" 1296 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1297 { 1298 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1299 1300 PetscFunctionBegin; 1301 if (m) *m = mat->rstart*mat->bs; 1302 if (n) *n = mat->rend*mat->bs; 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #undef __FUNC__ 1307 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIBAIJ" 1308 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1309 { 1310 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1311 Scalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1312 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1313 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1314 int *cmap,*idx_p,cstart = mat->cstart; 1315 1316 PetscFunctionBegin; 1317 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1318 mat->getrowactive = PETSC_TRUE; 1319 1320 if (!mat->rowvalues && (idx || v)) { 1321 /* 1322 allocate enough space to hold information from the longest row. 1323 */ 1324 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1325 int max = 1,mbs = mat->mbs,tmp; 1326 for (i=0; i<mbs; i++) { 1327 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1328 if (max < tmp) { max = tmp; } 1329 } 1330 mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1331 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1332 } 1333 1334 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1335 lrow = row - brstart; 1336 1337 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1338 if (!v) {pvA = 0; pvB = 0;} 1339 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1340 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1341 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1342 nztot = nzA + nzB; 1343 1344 cmap = mat->garray; 1345 if (v || idx) { 1346 if (nztot) { 1347 /* Sort by increasing column numbers, assuming A and B already sorted */ 1348 int imark = -1; 1349 if (v) { 1350 *v = v_p = mat->rowvalues; 1351 for (i=0; i<nzB; i++) { 1352 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1353 else break; 1354 } 1355 imark = i; 1356 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1357 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1358 } 1359 if (idx) { 1360 *idx = idx_p = mat->rowindices; 1361 if (imark > -1) { 1362 for (i=0; i<imark; i++) { 1363 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1364 } 1365 } else { 1366 for (i=0; i<nzB; i++) { 1367 if (cmap[cworkB[i]/bs] < cstart) 1368 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1369 else break; 1370 } 1371 imark = i; 1372 } 1373 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1374 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1375 } 1376 } else { 1377 if (idx) *idx = 0; 1378 if (v) *v = 0; 1379 } 1380 } 1381 *nz = nztot; 1382 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1383 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1384 PetscFunctionReturn(0); 1385 } 1386 1387 #undef __FUNC__ 1388 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIBAIJ" 1389 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1390 { 1391 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1392 1393 PetscFunctionBegin; 1394 if (baij->getrowactive == PETSC_FALSE) { 1395 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1396 } 1397 baij->getrowactive = PETSC_FALSE; 1398 PetscFunctionReturn(0); 1399 } 1400 1401 #undef __FUNC__ 1402 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIBAIJ" 1403 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1404 { 1405 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1406 1407 PetscFunctionBegin; 1408 *bs = baij->bs; 1409 PetscFunctionReturn(0); 1410 } 1411 1412 #undef __FUNC__ 1413 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIBAIJ" 1414 int MatZeroEntries_MPIBAIJ(Mat A) 1415 { 1416 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1417 int ierr; 1418 1419 PetscFunctionBegin; 1420 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1421 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1422 PetscFunctionReturn(0); 1423 } 1424 1425 #undef __FUNC__ 1426 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIBAIJ" 1427 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1428 { 1429 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1430 Mat A = a->A,B = a->B; 1431 int ierr; 1432 PetscReal isend[5],irecv[5]; 1433 1434 PetscFunctionBegin; 1435 info->block_size = (double)a->bs; 1436 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1437 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1438 isend[3] = info->memory; isend[4] = info->mallocs; 1439 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1440 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1441 isend[3] += info->memory; isend[4] += info->mallocs; 1442 if (flag == MAT_LOCAL) { 1443 info->nz_used = isend[0]; 1444 info->nz_allocated = isend[1]; 1445 info->nz_unneeded = isend[2]; 1446 info->memory = isend[3]; 1447 info->mallocs = isend[4]; 1448 } else if (flag == MAT_GLOBAL_MAX) { 1449 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1450 info->nz_used = irecv[0]; 1451 info->nz_allocated = irecv[1]; 1452 info->nz_unneeded = irecv[2]; 1453 info->memory = irecv[3]; 1454 info->mallocs = irecv[4]; 1455 } else if (flag == MAT_GLOBAL_SUM) { 1456 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1457 info->nz_used = irecv[0]; 1458 info->nz_allocated = irecv[1]; 1459 info->nz_unneeded = irecv[2]; 1460 info->memory = irecv[3]; 1461 info->mallocs = irecv[4]; 1462 } else { 1463 SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 1464 } 1465 info->rows_global = (double)a->M; 1466 info->columns_global = (double)a->N; 1467 info->rows_local = (double)a->m; 1468 info->columns_local = (double)a->N; 1469 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1470 info->fill_ratio_needed = 0; 1471 info->factor_mallocs = 0; 1472 PetscFunctionReturn(0); 1473 } 1474 1475 #undef __FUNC__ 1476 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIBAIJ" 1477 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1478 { 1479 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1480 int ierr; 1481 1482 PetscFunctionBegin; 1483 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1484 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1485 op == MAT_COLUMNS_UNSORTED || 1486 op == MAT_COLUMNS_SORTED || 1487 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1488 op == MAT_KEEP_ZEROED_ROWS || 1489 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1490 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1491 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1492 } else if (op == MAT_ROW_ORIENTED) { 1493 a->roworiented = PETSC_TRUE; 1494 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1495 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1496 } else if (op == MAT_ROWS_SORTED || 1497 op == MAT_ROWS_UNSORTED || 1498 op == MAT_SYMMETRIC || 1499 op == MAT_STRUCTURALLY_SYMMETRIC || 1500 op == MAT_YES_NEW_DIAGONALS || 1501 op == MAT_USE_HASH_TABLE) { 1502 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1503 } else if (op == MAT_COLUMN_ORIENTED) { 1504 a->roworiented = PETSC_FALSE; 1505 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1506 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1507 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1508 a->donotstash = PETSC_TRUE; 1509 } else if (op == MAT_NO_NEW_DIAGONALS) { 1510 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1511 } else if (op == MAT_USE_HASH_TABLE) { 1512 a->ht_flag = PETSC_TRUE; 1513 } else { 1514 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1515 } 1516 PetscFunctionReturn(0); 1517 } 1518 1519 #undef __FUNC__ 1520 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIBAIJ(" 1521 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1522 { 1523 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1524 Mat_SeqBAIJ *Aloc; 1525 Mat B; 1526 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1527 int bs=baij->bs,mbs=baij->mbs; 1528 MatScalar *a; 1529 1530 PetscFunctionBegin; 1531 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1532 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1533 1534 /* copy over the A part */ 1535 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1536 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1537 rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1538 1539 for (i=0; i<mbs; i++) { 1540 rvals[0] = bs*(baij->rstart + i); 1541 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1542 for (j=ai[i]; j<ai[i+1]; j++) { 1543 col = (baij->cstart+aj[j])*bs; 1544 for (k=0; k<bs; k++) { 1545 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1546 col++; a += bs; 1547 } 1548 } 1549 } 1550 /* copy over the B part */ 1551 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1552 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1553 for (i=0; i<mbs; i++) { 1554 rvals[0] = bs*(baij->rstart + i); 1555 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1556 for (j=ai[i]; j<ai[i+1]; j++) { 1557 col = baij->garray[aj[j]]*bs; 1558 for (k=0; k<bs; k++) { 1559 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1560 col++; a += bs; 1561 } 1562 } 1563 } 1564 ierr = PetscFree(rvals);CHKERRQ(ierr); 1565 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1566 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1567 1568 if (matout) { 1569 *matout = B; 1570 } else { 1571 PetscOps *Abops; 1572 MatOps Aops; 1573 1574 /* This isn't really an in-place transpose .... but free data structures from baij */ 1575 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1576 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1577 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1578 #if defined (PETSC_USE_CTABLE) 1579 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1580 #else 1581 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1582 #endif 1583 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1584 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1585 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1586 ierr = PetscFree(baij);CHKERRQ(ierr); 1587 1588 /* 1589 This is horrible, horrible code. We need to keep the 1590 A pointers for the bops and ops but copy everything 1591 else from C. 1592 */ 1593 Abops = A->bops; 1594 Aops = A->ops; 1595 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1596 A->bops = Abops; 1597 A->ops = Aops; 1598 1599 PetscHeaderDestroy(B); 1600 } 1601 PetscFunctionReturn(0); 1602 } 1603 1604 #undef __FUNC__ 1605 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIBAIJ" 1606 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1607 { 1608 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1609 Mat a = baij->A,b = baij->B; 1610 int ierr,s1,s2,s3; 1611 1612 PetscFunctionBegin; 1613 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1614 if (rr) { 1615 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1616 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1617 /* Overlap communication with computation. */ 1618 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1619 } 1620 if (ll) { 1621 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1622 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1623 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1624 } 1625 /* scale the diagonal block */ 1626 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1627 1628 if (rr) { 1629 /* Do a scatter end and then right scale the off-diagonal block */ 1630 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1631 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1632 } 1633 1634 PetscFunctionReturn(0); 1635 } 1636 1637 #undef __FUNC__ 1638 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIBAIJ" 1639 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1640 { 1641 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1642 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1643 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1644 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1645 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1646 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1647 MPI_Comm comm = A->comm; 1648 MPI_Request *send_waits,*recv_waits; 1649 MPI_Status recv_status,*send_status; 1650 IS istmp; 1651 1652 PetscFunctionBegin; 1653 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 1654 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1655 1656 /* first count number of contributors to each processor */ 1657 nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs); 1658 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1659 procs = nprocs + size; 1660 owner = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 1661 for (i=0; i<N; i++) { 1662 idx = rows[i]; 1663 found = 0; 1664 for (j=0; j<size; j++) { 1665 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1666 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1667 } 1668 } 1669 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1670 } 1671 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 1672 1673 /* inform other processors of number of messages and max length*/ 1674 work = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work); 1675 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 1676 nmax = work[rank]; 1677 nrecvs = work[size+rank]; 1678 ierr = PetscFree(work);CHKERRQ(ierr); 1679 1680 /* post receives: */ 1681 rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 1682 recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1683 for (i=0; i<nrecvs; i++) { 1684 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1685 } 1686 1687 /* do sends: 1688 1) starts[i] gives the starting index in svalues for stuff going to 1689 the ith processor 1690 */ 1691 svalues = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues); 1692 send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1693 starts = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts); 1694 starts[0] = 0; 1695 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1696 for (i=0; i<N; i++) { 1697 svalues[starts[owner[i]]++] = rows[i]; 1698 } 1699 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1700 1701 starts[0] = 0; 1702 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1703 count = 0; 1704 for (i=0; i<size; i++) { 1705 if (procs[i]) { 1706 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1707 } 1708 } 1709 ierr = PetscFree(starts);CHKERRQ(ierr); 1710 1711 base = owners[rank]*bs; 1712 1713 /* wait on receives */ 1714 lens = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens); 1715 source = lens + nrecvs; 1716 count = nrecvs; slen = 0; 1717 while (count) { 1718 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1719 /* unpack receives into our local space */ 1720 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1721 source[imdex] = recv_status.MPI_SOURCE; 1722 lens[imdex] = n; 1723 slen += n; 1724 count--; 1725 } 1726 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1727 1728 /* move the data into the send scatter */ 1729 lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows); 1730 count = 0; 1731 for (i=0; i<nrecvs; i++) { 1732 values = rvalues + i*nmax; 1733 for (j=0; j<lens[i]; j++) { 1734 lrows[count++] = values[j] - base; 1735 } 1736 } 1737 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1738 ierr = PetscFree(lens);CHKERRQ(ierr); 1739 ierr = PetscFree(owner);CHKERRQ(ierr); 1740 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1741 1742 /* actually zap the local rows */ 1743 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1744 PLogObjectParent(A,istmp); 1745 1746 /* 1747 Zero the required rows. If the "diagonal block" of the matrix 1748 is square and the user wishes to set the diagonal we use seperate 1749 code so that MatSetValues() is not called for each diagonal allocating 1750 new memory, thus calling lots of mallocs and slowing things down. 1751 1752 Contributed by: Mathew Knepley 1753 */ 1754 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1755 ierr = MatZeroRows_SeqAIJ(l->B,istmp,0);CHKERRQ(ierr); 1756 if (diag && (l->A->M == l->A->N)) { 1757 ierr = MatZeroRows_SeqAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1758 } else if (diag) { 1759 ierr = MatZeroRows_SeqAIJ(l->A,istmp,0);CHKERRQ(ierr); 1760 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1761 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1762 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1763 } 1764 for (i = 0; i < slen; i++) { 1765 row = lrows[i] + rstart_bs; 1766 ierr = MatSetValues_MPIBAIJ(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1767 } 1768 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1769 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1770 } else { 1771 ierr = MatZeroRows_SeqAIJ(l->A,istmp,0);CHKERRQ(ierr); 1772 } 1773 1774 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1775 ierr = PetscFree(lrows);CHKERRQ(ierr); 1776 1777 /* wait on sends */ 1778 if (nsends) { 1779 send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1780 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1781 ierr = PetscFree(send_status);CHKERRQ(ierr); 1782 } 1783 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1784 ierr = PetscFree(svalues);CHKERRQ(ierr); 1785 1786 PetscFunctionReturn(0); 1787 } 1788 1789 #undef __FUNC__ 1790 #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPIBAIJ" 1791 int MatPrintHelp_MPIBAIJ(Mat A) 1792 { 1793 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1794 MPI_Comm comm = A->comm; 1795 static int called = 0; 1796 int ierr; 1797 1798 PetscFunctionBegin; 1799 if (!a->rank) { 1800 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1801 } 1802 if (called) {PetscFunctionReturn(0);} else called = 1; 1803 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1804 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1805 PetscFunctionReturn(0); 1806 } 1807 1808 #undef __FUNC__ 1809 #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPIBAIJ" 1810 int MatSetUnfactored_MPIBAIJ(Mat A) 1811 { 1812 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1813 int ierr; 1814 1815 PetscFunctionBegin; 1816 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1817 PetscFunctionReturn(0); 1818 } 1819 1820 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1821 1822 #undef __FUNC__ 1823 #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPIBAIJ" 1824 int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1825 { 1826 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1827 Mat a,b,c,d; 1828 PetscTruth flg; 1829 int ierr; 1830 1831 PetscFunctionBegin; 1832 if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1833 a = matA->A; b = matA->B; 1834 c = matB->A; d = matB->B; 1835 1836 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1837 if (flg == PETSC_TRUE) { 1838 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1839 } 1840 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1841 PetscFunctionReturn(0); 1842 } 1843 1844 /* -------------------------------------------------------------------*/ 1845 static struct _MatOps MatOps_Values = { 1846 MatSetValues_MPIBAIJ, 1847 MatGetRow_MPIBAIJ, 1848 MatRestoreRow_MPIBAIJ, 1849 MatMult_MPIBAIJ, 1850 MatMultAdd_MPIBAIJ, 1851 MatMultTranspose_MPIBAIJ, 1852 MatMultTransposeAdd_MPIBAIJ, 1853 0, 1854 0, 1855 0, 1856 0, 1857 0, 1858 0, 1859 0, 1860 MatTranspose_MPIBAIJ, 1861 MatGetInfo_MPIBAIJ, 1862 MatEqual_MPIBAIJ, 1863 MatGetDiagonal_MPIBAIJ, 1864 MatDiagonalScale_MPIBAIJ, 1865 MatNorm_MPIBAIJ, 1866 MatAssemblyBegin_MPIBAIJ, 1867 MatAssemblyEnd_MPIBAIJ, 1868 0, 1869 MatSetOption_MPIBAIJ, 1870 MatZeroEntries_MPIBAIJ, 1871 MatZeroRows_MPIBAIJ, 1872 0, 1873 0, 1874 0, 1875 0, 1876 MatGetSize_MPIBAIJ, 1877 MatGetLocalSize_MPIBAIJ, 1878 MatGetOwnershipRange_MPIBAIJ, 1879 0, 1880 0, 1881 0, 1882 0, 1883 MatDuplicate_MPIBAIJ, 1884 0, 1885 0, 1886 0, 1887 0, 1888 0, 1889 MatGetSubMatrices_MPIBAIJ, 1890 MatIncreaseOverlap_MPIBAIJ, 1891 MatGetValues_MPIBAIJ, 1892 0, 1893 MatPrintHelp_MPIBAIJ, 1894 MatScale_MPIBAIJ, 1895 0, 1896 0, 1897 0, 1898 MatGetBlockSize_MPIBAIJ, 1899 0, 1900 0, 1901 0, 1902 0, 1903 0, 1904 0, 1905 MatSetUnfactored_MPIBAIJ, 1906 0, 1907 MatSetValuesBlocked_MPIBAIJ, 1908 0, 1909 0, 1910 0, 1911 MatGetMaps_Petsc}; 1912 1913 1914 EXTERN_C_BEGIN 1915 #undef __FUNC__ 1916 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPIBAIJ" 1917 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1918 { 1919 PetscFunctionBegin; 1920 *a = ((Mat_MPIBAIJ *)A->data)->A; 1921 *iscopy = PETSC_FALSE; 1922 PetscFunctionReturn(0); 1923 } 1924 EXTERN_C_END 1925 1926 #undef __FUNC__ 1927 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPIBAIJ" 1928 /*@C 1929 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1930 (block compressed row). For good matrix assembly performance 1931 the user should preallocate the matrix storage by setting the parameters 1932 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1933 performance can be increased by more than a factor of 50. 1934 1935 Collective on MPI_Comm 1936 1937 Input Parameters: 1938 + comm - MPI communicator 1939 . bs - size of blockk 1940 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1941 This value should be the same as the local size used in creating the 1942 y vector for the matrix-vector product y = Ax. 1943 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1944 This value should be the same as the local size used in creating the 1945 x vector for the matrix-vector product y = Ax. 1946 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1947 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1948 . d_nz - number of block nonzeros per block row in diagonal portion of local 1949 submatrix (same for all local rows) 1950 . d_nnz - array containing the number of block nonzeros in the various block rows 1951 of the in diagonal portion of the local (possibly different for each block 1952 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1953 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1954 submatrix (same for all local rows). 1955 - o_nnz - array containing the number of nonzeros in the various block rows of the 1956 off-diagonal portion of the local submatrix (possibly different for 1957 each block row) or PETSC_NULL. 1958 1959 Output Parameter: 1960 . A - the matrix 1961 1962 Options Database Keys: 1963 . -mat_no_unroll - uses code that does not unroll the loops in the 1964 block calculations (much slower) 1965 . -mat_block_size - size of the blocks to use 1966 . -mat_mpi - use the parallel matrix data structures even on one processor 1967 (defaults to using SeqBAIJ format on one processor) 1968 1969 Notes: 1970 The user MUST specify either the local or global matrix dimensions 1971 (possibly both). 1972 1973 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1974 than it must be used on all processors that share the object for that argument. 1975 1976 Storage Information: 1977 For a square global matrix we define each processor's diagonal portion 1978 to be its local rows and the corresponding columns (a square submatrix); 1979 each processor's off-diagonal portion encompasses the remainder of the 1980 local matrix (a rectangular submatrix). 1981 1982 The user can specify preallocated storage for the diagonal part of 1983 the local submatrix with either d_nz or d_nnz (not both). Set 1984 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1985 memory allocation. Likewise, specify preallocated storage for the 1986 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1987 1988 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1989 the figure below we depict these three local rows and all columns (0-11). 1990 1991 .vb 1992 0 1 2 3 4 5 6 7 8 9 10 11 1993 ------------------- 1994 row 3 | o o o d d d o o o o o o 1995 row 4 | o o o d d d o o o o o o 1996 row 5 | o o o d d d o o o o o o 1997 ------------------- 1998 .ve 1999 2000 Thus, any entries in the d locations are stored in the d (diagonal) 2001 submatrix, and any entries in the o locations are stored in the 2002 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2003 stored simply in the MATSEQBAIJ format for compressed row storage. 2004 2005 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2006 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2007 In general, for PDE problems in which most nonzeros are near the diagonal, 2008 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2009 or you will get TERRIBLE performance; see the users' manual chapter on 2010 matrices. 2011 2012 Level: intermediate 2013 2014 .keywords: matrix, block, aij, compressed row, sparse, parallel 2015 2016 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2017 @*/ 2018 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2019 { 2020 Mat B; 2021 Mat_MPIBAIJ *b; 2022 int ierr,i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 2023 PetscTruth flag1,flag2,flg; 2024 2025 PetscFunctionBegin; 2026 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2027 2028 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 2029 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -1: value %d",d_nz); 2030 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -1: value %d",o_nz); 2031 if (d_nnz) { 2032 for (i=0; i<m/bs; i++) { 2033 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 2034 } 2035 } 2036 if (o_nnz) { 2037 for (i=0; i<m/bs; i++) { 2038 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 2039 } 2040 } 2041 2042 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2043 ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr); 2044 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 2045 if (!flag1 && !flag2 && size == 1) { 2046 if (M == PETSC_DECIDE) M = m; 2047 if (N == PETSC_DECIDE) N = n; 2048 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 2049 PetscFunctionReturn(0); 2050 } 2051 2052 *A = 0; 2053 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 2054 PLogObjectCreate(B); 2055 B->data = (void*)(b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b); 2056 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2057 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2058 2059 B->ops->destroy = MatDestroy_MPIBAIJ; 2060 B->ops->view = MatView_MPIBAIJ; 2061 B->mapping = 0; 2062 B->factor = 0; 2063 B->assembled = PETSC_FALSE; 2064 2065 B->insertmode = NOT_SET_VALUES; 2066 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 2067 ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr); 2068 2069 if (m == PETSC_DECIDE && (d_nnz || o_nnz)) { 2070 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 2071 } 2072 if (M == PETSC_DECIDE && m == PETSC_DECIDE) { 2073 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 2074 } 2075 if (N == PETSC_DECIDE && n == PETSC_DECIDE) { 2076 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 2077 } 2078 if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 2079 if (N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 2080 2081 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 2082 work[0] = m; work[1] = n; 2083 mbs = m/bs; nbs = n/bs; 2084 ierr = MPI_Allreduce(work,sum,2,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2085 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 2086 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 2087 } 2088 if (m == PETSC_DECIDE) { 2089 Mbs = M/bs; 2090 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 2091 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 2092 m = mbs*bs; 2093 } 2094 if (n == PETSC_DECIDE) { 2095 Nbs = N/bs; 2096 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 2097 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 2098 n = nbs*bs; 2099 } 2100 if (mbs*bs != m || nbs*bs != n) { 2101 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 2102 } 2103 2104 b->m = m; B->m = m; 2105 b->n = n; B->n = n; 2106 b->N = N; B->N = N; 2107 b->M = M; B->M = M; 2108 b->bs = bs; 2109 b->bs2 = bs*bs; 2110 b->mbs = mbs; 2111 b->nbs = nbs; 2112 b->Mbs = Mbs; 2113 b->Nbs = Nbs; 2114 2115 /* the information in the maps duplicates the information computed below, eventually 2116 we should remove the duplicate information that is not contained in the maps */ 2117 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 2118 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 2119 2120 /* build local table of row and column ownerships */ 2121 b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 2122 PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2123 b->cowners = b->rowners + b->size + 2; 2124 b->rowners_bs = b->cowners + b->size + 2; 2125 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2126 b->rowners[0] = 0; 2127 for (i=2; i<=b->size; i++) { 2128 b->rowners[i] += b->rowners[i-1]; 2129 } 2130 for (i=0; i<=b->size; i++) { 2131 b->rowners_bs[i] = b->rowners[i]*bs; 2132 } 2133 b->rstart = b->rowners[b->rank]; 2134 b->rend = b->rowners[b->rank+1]; 2135 b->rstart_bs = b->rstart * bs; 2136 b->rend_bs = b->rend * bs; 2137 2138 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2139 b->cowners[0] = 0; 2140 for (i=2; i<=b->size; i++) { 2141 b->cowners[i] += b->cowners[i-1]; 2142 } 2143 b->cstart = b->cowners[b->rank]; 2144 b->cend = b->cowners[b->rank+1]; 2145 b->cstart_bs = b->cstart * bs; 2146 b->cend_bs = b->cend * bs; 2147 2148 2149 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2150 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2151 PLogObjectParent(B,b->A); 2152 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2153 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2154 PLogObjectParent(B,b->B); 2155 2156 /* build cache for off array entries formed */ 2157 ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr); 2158 ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr); 2159 b->donotstash = PETSC_FALSE; 2160 b->colmap = PETSC_NULL; 2161 b->garray = PETSC_NULL; 2162 b->roworiented = PETSC_TRUE; 2163 2164 /* stuff used in block assembly */ 2165 b->barray = 0; 2166 2167 /* stuff used for matrix vector multiply */ 2168 b->lvec = 0; 2169 b->Mvctx = 0; 2170 2171 /* stuff for MatGetRow() */ 2172 b->rowindices = 0; 2173 b->rowvalues = 0; 2174 b->getrowactive = PETSC_FALSE; 2175 2176 /* hash table stuff */ 2177 b->ht = 0; 2178 b->hd = 0; 2179 b->ht_size = 0; 2180 b->ht_flag = PETSC_FALSE; 2181 b->ht_fact = 0; 2182 b->ht_total_ct = 0; 2183 b->ht_insert_ct = 0; 2184 2185 *A = B; 2186 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2187 if (flg) { 2188 double fact = 1.39; 2189 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2190 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2191 if (fact <= 1.0) fact = 1.39; 2192 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2193 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2194 } 2195 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2196 "MatStoreValues_MPIBAIJ", 2197 (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2198 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2199 "MatRetrieveValues_MPIBAIJ", 2200 (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2201 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2202 "MatGetDiagonalBlock_MPIBAIJ", 2203 (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2204 PetscFunctionReturn(0); 2205 } 2206 2207 #undef __FUNC__ 2208 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIBAIJ" 2209 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2210 { 2211 Mat mat; 2212 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2213 int ierr,len=0; 2214 PetscTruth flg; 2215 2216 PetscFunctionBegin; 2217 *newmat = 0; 2218 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 2219 PLogObjectCreate(mat); 2220 mat->data = (void*)(a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a); 2221 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2222 mat->ops->destroy = MatDestroy_MPIBAIJ; 2223 mat->ops->view = MatView_MPIBAIJ; 2224 mat->factor = matin->factor; 2225 mat->assembled = PETSC_TRUE; 2226 mat->insertmode = NOT_SET_VALUES; 2227 2228 a->m = mat->m = oldmat->m; 2229 a->n = mat->n = oldmat->n; 2230 a->M = mat->M = oldmat->M; 2231 a->N = mat->N = oldmat->N; 2232 2233 a->bs = oldmat->bs; 2234 a->bs2 = oldmat->bs2; 2235 a->mbs = oldmat->mbs; 2236 a->nbs = oldmat->nbs; 2237 a->Mbs = oldmat->Mbs; 2238 a->Nbs = oldmat->Nbs; 2239 2240 a->rstart = oldmat->rstart; 2241 a->rend = oldmat->rend; 2242 a->cstart = oldmat->cstart; 2243 a->cend = oldmat->cend; 2244 a->size = oldmat->size; 2245 a->rank = oldmat->rank; 2246 a->donotstash = oldmat->donotstash; 2247 a->roworiented = oldmat->roworiented; 2248 a->rowindices = 0; 2249 a->rowvalues = 0; 2250 a->getrowactive = PETSC_FALSE; 2251 a->barray = 0; 2252 a->rstart_bs = oldmat->rstart_bs; 2253 a->rend_bs = oldmat->rend_bs; 2254 a->cstart_bs = oldmat->cstart_bs; 2255 a->cend_bs = oldmat->cend_bs; 2256 2257 /* hash table stuff */ 2258 a->ht = 0; 2259 a->hd = 0; 2260 a->ht_size = 0; 2261 a->ht_flag = oldmat->ht_flag; 2262 a->ht_fact = oldmat->ht_fact; 2263 a->ht_total_ct = 0; 2264 a->ht_insert_ct = 0; 2265 2266 2267 a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 2268 PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2269 a->cowners = a->rowners + a->size + 2; 2270 a->rowners_bs = a->cowners + a->size + 2; 2271 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2272 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2273 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2274 if (oldmat->colmap) { 2275 #if defined (PETSC_USE_CTABLE) 2276 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2277 #else 2278 a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2279 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2280 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2281 #endif 2282 } else a->colmap = 0; 2283 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2284 a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray); 2285 PLogObjectMemory(mat,len*sizeof(int)); 2286 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2287 } else a->garray = 0; 2288 2289 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2290 PLogObjectParent(mat,a->lvec); 2291 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2292 2293 PLogObjectParent(mat,a->Mvctx); 2294 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2295 PLogObjectParent(mat,a->A); 2296 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2297 PLogObjectParent(mat,a->B); 2298 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 2299 ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2300 if (flg) { 2301 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 2302 } 2303 *newmat = mat; 2304 PetscFunctionReturn(0); 2305 } 2306 2307 #include "sys.h" 2308 2309 #undef __FUNC__ 2310 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIBAIJ" 2311 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2312 { 2313 Mat A; 2314 int i,nz,ierr,j,rstart,rend,fd; 2315 Scalar *vals,*buf; 2316 MPI_Comm comm = ((PetscObject)viewer)->comm; 2317 MPI_Status status; 2318 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2319 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2320 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2321 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2322 int dcount,kmax,k,nzcount,tmp; 2323 2324 PetscFunctionBegin; 2325 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2326 2327 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2328 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2329 if (!rank) { 2330 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2331 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2332 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2333 if (header[3] < 0) { 2334 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2335 } 2336 } 2337 2338 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2339 M = header[1]; N = header[2]; 2340 2341 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2342 2343 /* 2344 This code adds extra rows to make sure the number of rows is 2345 divisible by the blocksize 2346 */ 2347 Mbs = M/bs; 2348 extra_rows = bs - M + bs*(Mbs); 2349 if (extra_rows == bs) extra_rows = 0; 2350 else Mbs++; 2351 if (extra_rows &&!rank) { 2352 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2353 } 2354 2355 /* determine ownership of all rows */ 2356 mbs = Mbs/size + ((Mbs % size) > rank); 2357 m = mbs * bs; 2358 rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners); 2359 browners = rowners + size + 1; 2360 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2361 rowners[0] = 0; 2362 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2363 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2364 rstart = rowners[rank]; 2365 rend = rowners[rank+1]; 2366 2367 /* distribute row lengths to all processors */ 2368 locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens); 2369 if (!rank) { 2370 rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2371 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2372 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2373 sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts); 2374 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2375 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2376 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2377 } else { 2378 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2379 } 2380 2381 if (!rank) { 2382 /* calculate the number of nonzeros on each processor */ 2383 procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz); 2384 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2385 for (i=0; i<size; i++) { 2386 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2387 procsnz[i] += rowlengths[j]; 2388 } 2389 } 2390 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2391 2392 /* determine max buffer needed and allocate it */ 2393 maxnz = 0; 2394 for (i=0; i<size; i++) { 2395 maxnz = PetscMax(maxnz,procsnz[i]); 2396 } 2397 cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols); 2398 2399 /* read in my part of the matrix column indices */ 2400 nz = procsnz[0]; 2401 ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 2402 mycols = ibuf; 2403 if (size == 1) nz -= extra_rows; 2404 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2405 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2406 2407 /* read in every ones (except the last) and ship off */ 2408 for (i=1; i<size-1; i++) { 2409 nz = procsnz[i]; 2410 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2411 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2412 } 2413 /* read in the stuff for the last proc */ 2414 if (size != 1) { 2415 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2416 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2417 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2418 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2419 } 2420 ierr = PetscFree(cols);CHKERRQ(ierr); 2421 } else { 2422 /* determine buffer space needed for message */ 2423 nz = 0; 2424 for (i=0; i<m; i++) { 2425 nz += locrowlens[i]; 2426 } 2427 ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 2428 mycols = ibuf; 2429 /* receive message of column indices*/ 2430 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2431 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2432 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2433 } 2434 2435 /* loop over local rows, determining number of off diagonal entries */ 2436 dlens = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens); 2437 odlens = dlens + (rend-rstart); 2438 mask = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask); 2439 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2440 masked1 = mask + Mbs; 2441 masked2 = masked1 + Mbs; 2442 rowcount = 0; nzcount = 0; 2443 for (i=0; i<mbs; i++) { 2444 dcount = 0; 2445 odcount = 0; 2446 for (j=0; j<bs; j++) { 2447 kmax = locrowlens[rowcount]; 2448 for (k=0; k<kmax; k++) { 2449 tmp = mycols[nzcount++]/bs; 2450 if (!mask[tmp]) { 2451 mask[tmp] = 1; 2452 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2453 else masked1[dcount++] = tmp; 2454 } 2455 } 2456 rowcount++; 2457 } 2458 2459 dlens[i] = dcount; 2460 odlens[i] = odcount; 2461 2462 /* zero out the mask elements we set */ 2463 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2464 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2465 } 2466 2467 /* create our matrix */ 2468 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 2469 A = *newmat; 2470 MatSetOption(A,MAT_COLUMNS_SORTED); 2471 2472 if (!rank) { 2473 buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf); 2474 /* read in my part of the matrix numerical values */ 2475 nz = procsnz[0]; 2476 vals = buf; 2477 mycols = ibuf; 2478 if (size == 1) nz -= extra_rows; 2479 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2480 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2481 2482 /* insert into matrix */ 2483 jj = rstart*bs; 2484 for (i=0; i<m; i++) { 2485 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2486 mycols += locrowlens[i]; 2487 vals += locrowlens[i]; 2488 jj++; 2489 } 2490 /* read in other processors (except the last one) and ship out */ 2491 for (i=1; i<size-1; i++) { 2492 nz = procsnz[i]; 2493 vals = buf; 2494 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2495 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2496 } 2497 /* the last proc */ 2498 if (size != 1){ 2499 nz = procsnz[i] - extra_rows; 2500 vals = buf; 2501 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2502 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2503 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2504 } 2505 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2506 } else { 2507 /* receive numeric values */ 2508 buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf); 2509 2510 /* receive message of values*/ 2511 vals = buf; 2512 mycols = ibuf; 2513 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2514 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2515 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2516 2517 /* insert into matrix */ 2518 jj = rstart*bs; 2519 for (i=0; i<m; i++) { 2520 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2521 mycols += locrowlens[i]; 2522 vals += locrowlens[i]; 2523 jj++; 2524 } 2525 } 2526 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2527 ierr = PetscFree(buf);CHKERRQ(ierr); 2528 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2529 ierr = PetscFree(rowners);CHKERRQ(ierr); 2530 ierr = PetscFree(dlens);CHKERRQ(ierr); 2531 ierr = PetscFree(mask);CHKERRQ(ierr); 2532 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2533 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2534 PetscFunctionReturn(0); 2535 } 2536 2537 #undef __FUNC__ 2538 #define __FUNC__ /*<a name=""></a>*/"MatMPIBAIJSetHashTableFactor" 2539 /*@ 2540 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2541 2542 Input Parameters: 2543 . mat - the matrix 2544 . fact - factor 2545 2546 Collective on Mat 2547 2548 Level: advanced 2549 2550 Notes: 2551 This can also be set by the command line option: -mat_use_hash_table fact 2552 2553 .keywords: matrix, hashtable, factor, HT 2554 2555 .seealso: MatSetOption() 2556 @*/ 2557 int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2558 { 2559 Mat_MPIBAIJ *baij; 2560 2561 PetscFunctionBegin; 2562 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2563 if (mat->type != MATMPIBAIJ) { 2564 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2565 } 2566 baij = (Mat_MPIBAIJ*)mat->data; 2567 baij->ht_fact = fact; 2568 PetscFunctionReturn(0); 2569 } 2570