1 /*$Id: mpibaij.c,v 1.190 2000/04/10 04:25:49 bsmith Exp bsmith $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "mat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 6 extern int MatSetUpMultiply_MPIBAIJ(Mat); 7 extern int DisAssemble_MPIBAIJ(Mat); 8 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 9 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 10 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 11 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 12 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 13 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 14 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 15 extern int MatPrintHelp_SeqBAIJ(Mat); 16 extern int MatZeroRows_SeqAIJ(Mat,IS,Scalar*); 17 18 /* UGLY, ugly, ugly 19 When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 20 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 21 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 22 converts the entries into single precision and then calls ..._MatScalar() to put them 23 into the single precision data structures. 24 */ 25 #if defined(PETSC_USE_MAT_SINGLE) 26 extern int MatSetValues_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 27 extern int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 28 extern int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 29 #else 30 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 31 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 32 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 33 #endif 34 35 EXTERN_C_BEGIN 36 #undef __FUNC__ 37 #define __FUNC__ /*<a name=""></a>*/"MatStoreValues_MPIBAIJ" 38 int MatStoreValues_MPIBAIJ(Mat mat) 39 { 40 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 41 int ierr; 42 43 PetscFunctionBegin; 44 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 45 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 46 PetscFunctionReturn(0); 47 } 48 EXTERN_C_END 49 50 EXTERN_C_BEGIN 51 #undef __FUNC__ 52 #define __FUNC__ /*<a name=""></a>*/"MatRetrieveValues_MPIBAIJ" 53 int MatRetrieveValues_MPIBAIJ(Mat mat) 54 { 55 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 56 int ierr; 57 58 PetscFunctionBegin; 59 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 60 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 61 PetscFunctionReturn(0); 62 } 63 EXTERN_C_END 64 65 /* 66 Local utility routine that creates a mapping from the global column 67 number to the local number in the off-diagonal part of the local 68 storage of the matrix. This is done in a non scable way since the 69 length of colmap equals the global matrix length. 70 */ 71 #undef __FUNC__ 72 #define __FUNC__ /*<a name=""></a>*/"CreateColmap_MPIBAIJ_Private" 73 static int CreateColmap_MPIBAIJ_Private(Mat mat) 74 { 75 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 76 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 77 int nbs = B->nbs,i,bs=B->bs,ierr; 78 79 PetscFunctionBegin; 80 #if defined (PETSC_USE_CTABLE) 81 ierr = PetscTableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr); 82 for (i=0; i<nbs; i++){ 83 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 84 } 85 #else 86 baij->colmap = (int*)PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 87 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 88 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 89 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 90 #endif 91 PetscFunctionReturn(0); 92 } 93 94 #define CHUNKSIZE 10 95 96 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 97 { \ 98 \ 99 brow = row/bs; \ 100 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 101 rmax = aimax[brow]; nrow = ailen[brow]; \ 102 bcol = col/bs; \ 103 ridx = row % bs; cidx = col % bs; \ 104 low = 0; high = nrow; \ 105 while (high-low > 3) { \ 106 t = (low+high)/2; \ 107 if (rp[t] > bcol) high = t; \ 108 else low = t; \ 109 } \ 110 for (_i=low; _i<high; _i++) { \ 111 if (rp[_i] > bcol) break; \ 112 if (rp[_i] == bcol) { \ 113 bap = ap + bs2*_i + bs*cidx + ridx; \ 114 if (addv == ADD_VALUES) *bap += value; \ 115 else *bap = value; \ 116 goto a_noinsert; \ 117 } \ 118 } \ 119 if (a->nonew == 1) goto a_noinsert; \ 120 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 121 if (nrow >= rmax) { \ 122 /* there is no extra room in row, therefore enlarge */ \ 123 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 124 MatScalar *new_a; \ 125 \ 126 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 127 \ 128 /* malloc new storage space */ \ 129 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 130 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 131 new_j = (int*)(new_a + bs2*new_nz); \ 132 new_i = new_j + new_nz; \ 133 \ 134 /* copy over old data into new slots */ \ 135 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 136 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 137 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 138 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 139 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 140 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 141 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 142 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 143 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 144 /* free up old matrix storage */ \ 145 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 146 if (!a->singlemalloc) { \ 147 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 148 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 149 } \ 150 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 151 a->singlemalloc = PETSC_TRUE; \ 152 \ 153 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 154 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 155 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 156 a->maxnz += bs2*CHUNKSIZE; \ 157 a->reallocs++; \ 158 a->nz++; \ 159 } \ 160 N = nrow++ - 1; \ 161 /* shift up all the later entries in this row */ \ 162 for (ii=N; ii>=_i; ii--) { \ 163 rp[ii+1] = rp[ii]; \ 164 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 165 } \ 166 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 167 rp[_i] = bcol; \ 168 ap[bs2*_i + bs*cidx + ridx] = value; \ 169 a_noinsert:; \ 170 ailen[brow] = nrow; \ 171 } 172 173 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 174 { \ 175 brow = row/bs; \ 176 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 177 rmax = bimax[brow]; nrow = bilen[brow]; \ 178 bcol = col/bs; \ 179 ridx = row % bs; cidx = col % bs; \ 180 low = 0; high = nrow; \ 181 while (high-low > 3) { \ 182 t = (low+high)/2; \ 183 if (rp[t] > bcol) high = t; \ 184 else low = t; \ 185 } \ 186 for (_i=low; _i<high; _i++) { \ 187 if (rp[_i] > bcol) break; \ 188 if (rp[_i] == bcol) { \ 189 bap = ap + bs2*_i + bs*cidx + ridx; \ 190 if (addv == ADD_VALUES) *bap += value; \ 191 else *bap = value; \ 192 goto b_noinsert; \ 193 } \ 194 } \ 195 if (b->nonew == 1) goto b_noinsert; \ 196 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 197 if (nrow >= rmax) { \ 198 /* there is no extra room in row, therefore enlarge */ \ 199 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 200 MatScalar *new_a; \ 201 \ 202 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 203 \ 204 /* malloc new storage space */ \ 205 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 206 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 207 new_j = (int*)(new_a + bs2*new_nz); \ 208 new_i = new_j + new_nz; \ 209 \ 210 /* copy over old data into new slots */ \ 211 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 212 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 213 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 214 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 215 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 216 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 217 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 218 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 219 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 220 /* free up old matrix storage */ \ 221 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 222 if (!b->singlemalloc) { \ 223 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 224 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 225 } \ 226 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 227 b->singlemalloc = PETSC_TRUE; \ 228 \ 229 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 230 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 231 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 232 b->maxnz += bs2*CHUNKSIZE; \ 233 b->reallocs++; \ 234 b->nz++; \ 235 } \ 236 N = nrow++ - 1; \ 237 /* shift up all the later entries in this row */ \ 238 for (ii=N; ii>=_i; ii--) { \ 239 rp[ii+1] = rp[ii]; \ 240 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 241 } \ 242 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 243 rp[_i] = bcol; \ 244 ap[bs2*_i + bs*cidx + ridx] = value; \ 245 b_noinsert:; \ 246 bilen[brow] = nrow; \ 247 } 248 249 #if defined(PETSC_USE_MAT_SINGLE) 250 #undef __FUNC__ 251 #define __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIBAIJ" 252 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 253 { 254 int ierr,i,N = m*n; 255 MatScalar *vsingle = (MatScalar*)v; 256 257 PetscFunctionBegin; 258 for (i=0; i<N; i++) { 259 vsingle[i] = v[i]; 260 } 261 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 262 PetscFunctionReturn(0); 263 } 264 265 #undef __FUNC__ 266 #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ" 267 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 268 { 269 int ierr,i,N = m*n*((Mat_MPIBAIJ*)mat->data)->bs2; 270 MatScalar *vsingle = (MatScalar*)v; 271 272 PetscFunctionBegin; 273 for (i=0; i<N; i++) { 274 vsingle[i] = v[i]; 275 } 276 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 277 PetscFunctionReturn(0); 278 } 279 #endif 280 281 #undef __FUNC__ 282 #define __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIBAIJ" 283 int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 284 { 285 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 286 MatScalar value; 287 int ierr,i,j,row,col; 288 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 289 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 290 int cend_orig=baij->cend_bs,bs=baij->bs; 291 292 /* Some Variables required in the macro */ 293 Mat A = baij->A; 294 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 295 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 296 MatScalar *aa=a->a; 297 298 Mat B = baij->B; 299 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 300 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 301 MatScalar *ba=b->a; 302 303 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 304 int low,high,t,ridx,cidx,bs2=a->bs2; 305 MatScalar *ap,*bap; 306 307 PetscFunctionBegin; 308 for (i=0; i<m; i++) { 309 if (im[i] < 0) continue; 310 #if defined(PETSC_USE_BOPT_g) 311 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 312 #endif 313 if (im[i] >= rstart_orig && im[i] < rend_orig) { 314 row = im[i] - rstart_orig; 315 for (j=0; j<n; j++) { 316 if (in[j] >= cstart_orig && in[j] < cend_orig){ 317 col = in[j] - cstart_orig; 318 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 319 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 320 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 321 } else if (in[j] < 0) continue; 322 #if defined(PETSC_USE_BOPT_g) 323 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 324 #endif 325 else { 326 if (mat->was_assembled) { 327 if (!baij->colmap) { 328 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 329 } 330 #if defined (PETSC_USE_CTABLE) 331 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 332 col = col - 1 + in[j]%bs; 333 #else 334 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 335 #endif 336 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 337 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 338 col = in[j]; 339 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 340 B = baij->B; 341 b = (Mat_SeqBAIJ*)(B)->data; 342 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 343 ba=b->a; 344 } 345 } else col = in[j]; 346 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 347 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 348 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 349 } 350 } 351 } else { 352 if (!baij->donotstash) { 353 if (roworiented) { 354 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 355 } else { 356 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 357 } 358 } 359 } 360 } 361 PetscFunctionReturn(0); 362 } 363 364 #undef __FUNC__ 365 #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ" 366 int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 367 { 368 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 369 MatScalar *value,*barray=baij->barray; 370 int ierr,i,j,ii,jj,row,col; 371 int roworiented = baij->roworiented,rstart=baij->rstart ; 372 int rend=baij->rend,cstart=baij->cstart,stepval; 373 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 374 375 PetscFunctionBegin; 376 if(!barray) { 377 baij->barray = barray = (MatScalar*)PetscMalloc(bs2*sizeof(MatScalar));CHKPTRQ(barray); 378 } 379 380 if (roworiented) { 381 stepval = (n-1)*bs; 382 } else { 383 stepval = (m-1)*bs; 384 } 385 for (i=0; i<m; i++) { 386 if (im[i] < 0) continue; 387 #if defined(PETSC_USE_BOPT_g) 388 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 389 #endif 390 if (im[i] >= rstart && im[i] < rend) { 391 row = im[i] - rstart; 392 for (j=0; j<n; j++) { 393 /* If NumCol = 1 then a copy is not required */ 394 if ((roworiented) && (n == 1)) { 395 barray = v + i*bs2; 396 } else if((!roworiented) && (m == 1)) { 397 barray = v + j*bs2; 398 } else { /* Here a copy is required */ 399 if (roworiented) { 400 value = v + i*(stepval+bs)*bs + j*bs; 401 } else { 402 value = v + j*(stepval+bs)*bs + i*bs; 403 } 404 for (ii=0; ii<bs; ii++,value+=stepval) { 405 for (jj=0; jj<bs; jj++) { 406 *barray++ = *value++; 407 } 408 } 409 barray -=bs2; 410 } 411 412 if (in[j] >= cstart && in[j] < cend){ 413 col = in[j] - cstart; 414 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 415 } 416 else if (in[j] < 0) continue; 417 #if defined(PETSC_USE_BOPT_g) 418 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 419 #endif 420 else { 421 if (mat->was_assembled) { 422 if (!baij->colmap) { 423 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 424 } 425 426 #if defined(PETSC_USE_BOPT_g) 427 #if defined (PETSC_USE_CTABLE) 428 { int data; 429 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 430 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap"); 431 } 432 #else 433 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap"); 434 #endif 435 #endif 436 #if defined (PETSC_USE_CTABLE) 437 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 438 col = (col - 1)/bs; 439 #else 440 col = (baij->colmap[in[j]] - 1)/bs; 441 #endif 442 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 443 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 444 col = in[j]; 445 } 446 } 447 else col = in[j]; 448 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 449 } 450 } 451 } else { 452 if (!baij->donotstash) { 453 if (roworiented) { 454 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 455 } else { 456 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 457 } 458 } 459 } 460 } 461 PetscFunctionReturn(0); 462 } 463 #define HASH_KEY 0.6180339887 464 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 465 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 466 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 467 #undef __FUNC__ 468 #define __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIBAIJ_HT" 469 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 470 { 471 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 472 int ierr,i,j,row,col; 473 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 474 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 475 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 476 PetscReal tmp; 477 MatScalar ** HD = baij->hd,value; 478 #if defined(PETSC_USE_BOPT_g) 479 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 480 #endif 481 482 PetscFunctionBegin; 483 484 for (i=0; i<m; i++) { 485 #if defined(PETSC_USE_BOPT_g) 486 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 487 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 488 #endif 489 row = im[i]; 490 if (row >= rstart_orig && row < rend_orig) { 491 for (j=0; j<n; j++) { 492 col = in[j]; 493 if (roworiented) value = (MatScalar)v[i*n+j]; else value = (MatScalar)v[i+j*m]; 494 /* Look up into the Hash Table */ 495 key = (row/bs)*Nbs+(col/bs)+1; 496 h1 = HASH(size,key,tmp); 497 498 499 idx = h1; 500 #if defined(PETSC_USE_BOPT_g) 501 insert_ct++; 502 total_ct++; 503 if (HT[idx] != key) { 504 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 505 if (idx == size) { 506 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 507 if (idx == h1) { 508 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 509 } 510 } 511 } 512 #else 513 if (HT[idx] != key) { 514 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 515 if (idx == size) { 516 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 517 if (idx == h1) { 518 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 519 } 520 } 521 } 522 #endif 523 /* A HASH table entry is found, so insert the values at the correct address */ 524 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 525 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 526 } 527 } else { 528 if (!baij->donotstash) { 529 if (roworiented) { 530 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 531 } else { 532 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 533 } 534 } 535 } 536 } 537 #if defined(PETSC_USE_BOPT_g) 538 baij->ht_total_ct = total_ct; 539 baij->ht_insert_ct = insert_ct; 540 #endif 541 PetscFunctionReturn(0); 542 } 543 544 #undef __FUNC__ 545 #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ_HT" 546 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 547 { 548 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 549 int ierr,i,j,ii,jj,row,col; 550 int roworiented = baij->roworiented,rstart=baij->rstart ; 551 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 552 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 553 PetscReal tmp; 554 MatScalar **HD = baij->hd,*baij_a; 555 Scalar *v_t,*value; 556 #if defined(PETSC_USE_BOPT_g) 557 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 558 #endif 559 560 PetscFunctionBegin; 561 562 if (roworiented) { 563 stepval = (n-1)*bs; 564 } else { 565 stepval = (m-1)*bs; 566 } 567 for (i=0; i<m; i++) { 568 #if defined(PETSC_USE_BOPT_g) 569 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 570 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 571 #endif 572 row = im[i]; 573 v_t = v + i*bs2; 574 if (row >= rstart && row < rend) { 575 for (j=0; j<n; j++) { 576 col = in[j]; 577 578 /* Look up into the Hash Table */ 579 key = row*Nbs+col+1; 580 h1 = HASH(size,key,tmp); 581 582 idx = h1; 583 #if defined(PETSC_USE_BOPT_g) 584 total_ct++; 585 insert_ct++; 586 if (HT[idx] != key) { 587 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 588 if (idx == size) { 589 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 590 if (idx == h1) { 591 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 592 } 593 } 594 } 595 #else 596 if (HT[idx] != key) { 597 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 598 if (idx == size) { 599 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 600 if (idx == h1) { 601 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 602 } 603 } 604 } 605 #endif 606 baij_a = HD[idx]; 607 if (roworiented) { 608 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 609 /* value = v + (i*(stepval+bs)+j)*bs; */ 610 value = v_t; 611 v_t += bs; 612 if (addv == ADD_VALUES) { 613 for (ii=0; ii<bs; ii++,value+=stepval) { 614 for (jj=ii; jj<bs2; jj+=bs) { 615 baij_a[jj] += *value++; 616 } 617 } 618 } else { 619 for (ii=0; ii<bs; ii++,value+=stepval) { 620 for (jj=ii; jj<bs2; jj+=bs) { 621 baij_a[jj] = *value++; 622 } 623 } 624 } 625 } else { 626 value = v + j*(stepval+bs)*bs + i*bs; 627 if (addv == ADD_VALUES) { 628 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 629 for (jj=0; jj<bs; jj++) { 630 baij_a[jj] += *value++; 631 } 632 } 633 } else { 634 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 635 for (jj=0; jj<bs; jj++) { 636 baij_a[jj] = *value++; 637 } 638 } 639 } 640 } 641 } 642 } else { 643 if (!baij->donotstash) { 644 if (roworiented) { 645 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 646 } else { 647 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 648 } 649 } 650 } 651 } 652 #if defined(PETSC_USE_BOPT_g) 653 baij->ht_total_ct = total_ct; 654 baij->ht_insert_ct = insert_ct; 655 #endif 656 PetscFunctionReturn(0); 657 } 658 659 #undef __FUNC__ 660 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIBAIJ" 661 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 662 { 663 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 664 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 665 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 666 667 PetscFunctionBegin; 668 for (i=0; i<m; i++) { 669 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 670 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 671 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 672 row = idxm[i] - bsrstart; 673 for (j=0; j<n; j++) { 674 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 675 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 676 if (idxn[j] >= bscstart && idxn[j] < bscend){ 677 col = idxn[j] - bscstart; 678 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 679 } else { 680 if (!baij->colmap) { 681 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 682 } 683 #if defined (PETSC_USE_CTABLE) 684 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 685 data --; 686 #else 687 data = baij->colmap[idxn[j]/bs]-1; 688 #endif 689 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 690 else { 691 col = data + idxn[j]%bs; 692 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 693 } 694 } 695 } 696 } else { 697 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 698 } 699 } 700 PetscFunctionReturn(0); 701 } 702 703 #undef __FUNC__ 704 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPIBAIJ" 705 int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *norm) 706 { 707 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 708 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 709 int ierr,i,bs2=baij->bs2; 710 PetscReal sum = 0.0; 711 MatScalar *v; 712 713 PetscFunctionBegin; 714 if (baij->size == 1) { 715 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 716 } else { 717 if (type == NORM_FROBENIUS) { 718 v = amat->a; 719 for (i=0; i<amat->nz*bs2; i++) { 720 #if defined(PETSC_USE_COMPLEX) 721 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 722 #else 723 sum += (*v)*(*v); v++; 724 #endif 725 } 726 v = bmat->a; 727 for (i=0; i<bmat->nz*bs2; i++) { 728 #if defined(PETSC_USE_COMPLEX) 729 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 730 #else 731 sum += (*v)*(*v); v++; 732 #endif 733 } 734 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 735 *norm = sqrt(*norm); 736 } else { 737 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 738 } 739 } 740 PetscFunctionReturn(0); 741 } 742 743 744 /* 745 Creates the hash table, and sets the table 746 This table is created only once. 747 If new entried need to be added to the matrix 748 then the hash table has to be destroyed and 749 recreated. 750 */ 751 #undef __FUNC__ 752 #define __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPIBAIJ_Private" 753 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 754 { 755 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 756 Mat A = baij->A,B=baij->B; 757 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 758 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 759 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 760 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 761 int *HT,key; 762 MatScalar **HD; 763 PetscReal tmp; 764 #if defined(PETSC_USE_BOPT_g) 765 int ct=0,max=0; 766 #endif 767 768 PetscFunctionBegin; 769 baij->ht_size=(int)(factor*nz); 770 size = baij->ht_size; 771 772 if (baij->ht) { 773 PetscFunctionReturn(0); 774 } 775 776 /* Allocate Memory for Hash Table */ 777 baij->hd = (MatScalar**)PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1);CHKPTRQ(baij->hd); 778 baij->ht = (int*)(baij->hd + size); 779 HD = baij->hd; 780 HT = baij->ht; 781 782 783 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr); 784 785 786 /* Loop Over A */ 787 for (i=0; i<a->mbs; i++) { 788 for (j=ai[i]; j<ai[i+1]; j++) { 789 row = i+rstart; 790 col = aj[j]+cstart; 791 792 key = row*Nbs + col + 1; 793 h1 = HASH(size,key,tmp); 794 for (k=0; k<size; k++){ 795 if (HT[(h1+k)%size] == 0.0) { 796 HT[(h1+k)%size] = key; 797 HD[(h1+k)%size] = a->a + j*bs2; 798 break; 799 #if defined(PETSC_USE_BOPT_g) 800 } else { 801 ct++; 802 #endif 803 } 804 } 805 #if defined(PETSC_USE_BOPT_g) 806 if (k> max) max = k; 807 #endif 808 } 809 } 810 /* Loop Over B */ 811 for (i=0; i<b->mbs; i++) { 812 for (j=bi[i]; j<bi[i+1]; j++) { 813 row = i+rstart; 814 col = garray[bj[j]]; 815 key = row*Nbs + col + 1; 816 h1 = HASH(size,key,tmp); 817 for (k=0; k<size; k++){ 818 if (HT[(h1+k)%size] == 0.0) { 819 HT[(h1+k)%size] = key; 820 HD[(h1+k)%size] = b->a + j*bs2; 821 break; 822 #if defined(PETSC_USE_BOPT_g) 823 } else { 824 ct++; 825 #endif 826 } 827 } 828 #if defined(PETSC_USE_BOPT_g) 829 if (k> max) max = k; 830 #endif 831 } 832 } 833 834 /* Print Summary */ 835 #if defined(PETSC_USE_BOPT_g) 836 for (i=0,j=0; i<size; i++) { 837 if (HT[i]) {j++;} 838 } 839 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max); 840 #endif 841 PetscFunctionReturn(0); 842 } 843 844 #undef __FUNC__ 845 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIBAIJ" 846 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 847 { 848 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 849 int ierr,nstash,reallocs; 850 InsertMode addv; 851 852 PetscFunctionBegin; 853 if (baij->donotstash) { 854 PetscFunctionReturn(0); 855 } 856 857 /* make sure all processors are either in INSERTMODE or ADDMODE */ 858 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 859 if (addv == (ADD_VALUES|INSERT_VALUES)) { 860 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 861 } 862 mat->insertmode = addv; /* in case this processor had no cache */ 863 864 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 865 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 866 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 867 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 868 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 869 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNC__ 874 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIBAIJ" 875 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 876 { 877 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 878 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 879 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 880 int *row,*col,other_disassembled; 881 PetscTruth r1,r2,r3; 882 MatScalar *val; 883 InsertMode addv = mat->insertmode; 884 885 PetscFunctionBegin; 886 if (!baij->donotstash) { 887 while (1) { 888 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 889 if (!flg) break; 890 891 for (i=0; i<n;) { 892 /* Now identify the consecutive vals belonging to the same row */ 893 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 894 if (j < n) ncols = j-i; 895 else ncols = n-i; 896 /* Now assemble all these values with a single function call */ 897 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 898 i = j; 899 } 900 } 901 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 902 /* Now process the block-stash. Since the values are stashed column-oriented, 903 set the roworiented flag to column oriented, and after MatSetValues() 904 restore the original flags */ 905 r1 = baij->roworiented; 906 r2 = a->roworiented; 907 r3 = b->roworiented; 908 baij->roworiented = PETSC_FALSE; 909 a->roworiented = PETSC_FALSE; 910 b->roworiented = PETSC_FALSE; 911 while (1) { 912 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 913 if (!flg) break; 914 915 for (i=0; i<n;) { 916 /* Now identify the consecutive vals belonging to the same row */ 917 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 918 if (j < n) ncols = j-i; 919 else ncols = n-i; 920 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 921 i = j; 922 } 923 } 924 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 925 baij->roworiented = r1; 926 a->roworiented = r2; 927 b->roworiented = r3; 928 } 929 930 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 931 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 932 933 /* determine if any processor has disassembled, if so we must 934 also disassemble ourselfs, in order that we may reassemble. */ 935 /* 936 if nonzero structure of submatrix B cannot change then we know that 937 no processor disassembled thus we can skip this stuff 938 */ 939 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 940 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 941 if (mat->was_assembled && !other_disassembled) { 942 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 943 } 944 } 945 946 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 947 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 948 } 949 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 950 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 951 952 #if defined(PETSC_USE_BOPT_g) 953 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 954 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct); 955 baij->ht_total_ct = 0; 956 baij->ht_insert_ct = 0; 957 } 958 #endif 959 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 960 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 961 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 962 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 963 } 964 965 if (baij->rowvalues) { 966 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 967 baij->rowvalues = 0; 968 } 969 PetscFunctionReturn(0); 970 } 971 972 #undef __FUNC__ 973 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ_ASCIIorDraworSocket" 974 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 975 { 976 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 977 int ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank; 978 PetscTruth isascii,isdraw; 979 980 PetscFunctionBegin; 981 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 982 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 983 if (isascii) { 984 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 985 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 986 MatInfo info; 987 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 988 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 989 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 990 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 991 baij->bs,(int)info.memory);CHKERRQ(ierr); 992 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 993 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 994 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 995 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 996 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 997 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 998 PetscFunctionReturn(0); 999 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 1000 ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 1001 PetscFunctionReturn(0); 1002 } 1003 } 1004 1005 if (isdraw) { 1006 Draw draw; 1007 PetscTruth isnull; 1008 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1009 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1010 } 1011 1012 if (size == 1) { 1013 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1014 } else { 1015 /* assemble the entire matrix onto first processor. */ 1016 Mat A; 1017 Mat_SeqBAIJ *Aloc; 1018 int M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1019 MatScalar *a; 1020 1021 if (!rank) { 1022 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1023 } else { 1024 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1025 } 1026 PLogObjectParent(mat,A); 1027 1028 /* copy over the A part */ 1029 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1030 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1031 rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1032 1033 for (i=0; i<mbs; i++) { 1034 rvals[0] = bs*(baij->rstart + i); 1035 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1036 for (j=ai[i]; j<ai[i+1]; j++) { 1037 col = (baij->cstart+aj[j])*bs; 1038 for (k=0; k<bs; k++) { 1039 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1040 col++; a += bs; 1041 } 1042 } 1043 } 1044 /* copy over the B part */ 1045 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1046 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1047 for (i=0; i<mbs; i++) { 1048 rvals[0] = bs*(baij->rstart + i); 1049 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1050 for (j=ai[i]; j<ai[i+1]; j++) { 1051 col = baij->garray[aj[j]]*bs; 1052 for (k=0; k<bs; k++) { 1053 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1054 col++; a += bs; 1055 } 1056 } 1057 } 1058 ierr = PetscFree(rvals);CHKERRQ(ierr); 1059 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1060 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1061 /* 1062 Everyone has to call to draw the matrix since the graphics waits are 1063 synchronized across all processors that share the Draw object 1064 */ 1065 if (!rank) { 1066 Viewer sviewer; 1067 ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1068 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1069 ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1070 } 1071 ierr = MatDestroy(A);CHKERRQ(ierr); 1072 } 1073 PetscFunctionReturn(0); 1074 } 1075 1076 #undef __FUNC__ 1077 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ" 1078 int MatView_MPIBAIJ(Mat mat,Viewer viewer) 1079 { 1080 int ierr; 1081 PetscTruth isascii,isdraw,issocket,isbinary; 1082 1083 PetscFunctionBegin; 1084 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 1085 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 1086 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 1087 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 1088 if (isascii || isdraw || issocket || isbinary) { 1089 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1090 } else { 1091 SETERRQ1(1,1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1092 } 1093 PetscFunctionReturn(0); 1094 } 1095 1096 #undef __FUNC__ 1097 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIBAIJ" 1098 int MatDestroy_MPIBAIJ(Mat mat) 1099 { 1100 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1101 int ierr; 1102 1103 PetscFunctionBegin; 1104 1105 if (mat->mapping) { 1106 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 1107 } 1108 if (mat->bmapping) { 1109 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 1110 } 1111 if (mat->rmap) { 1112 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 1113 } 1114 if (mat->cmap) { 1115 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 1116 } 1117 #if defined(PETSC_USE_LOG) 1118 PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N); 1119 #endif 1120 1121 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1122 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1123 1124 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1125 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1126 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1127 #if defined (PETSC_USE_CTABLE) 1128 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1129 #else 1130 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1131 #endif 1132 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1133 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1134 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1135 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1136 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1137 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1138 ierr = PetscFree(baij);CHKERRQ(ierr); 1139 PLogObjectDestroy(mat); 1140 PetscHeaderDestroy(mat); 1141 PetscFunctionReturn(0); 1142 } 1143 1144 #undef __FUNC__ 1145 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPIBAIJ" 1146 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1147 { 1148 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1149 int ierr,nt; 1150 1151 PetscFunctionBegin; 1152 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1153 if (nt != a->n) { 1154 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1155 } 1156 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1157 if (nt != a->m) { 1158 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1159 } 1160 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1161 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1162 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1163 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1164 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1165 PetscFunctionReturn(0); 1166 } 1167 1168 #undef __FUNC__ 1169 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIBAIJ" 1170 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1171 { 1172 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1173 int ierr; 1174 1175 PetscFunctionBegin; 1176 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1177 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1178 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1179 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1180 PetscFunctionReturn(0); 1181 } 1182 1183 #undef __FUNC__ 1184 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIBAIJ" 1185 int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1186 { 1187 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1188 int ierr; 1189 1190 PetscFunctionBegin; 1191 /* do nondiagonal part */ 1192 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1193 /* send it on its way */ 1194 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1195 /* do local part */ 1196 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1197 /* receive remote parts: note this assumes the values are not actually */ 1198 /* inserted in yy until the next line, which is true for my implementation*/ 1199 /* but is not perhaps always true. */ 1200 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1201 PetscFunctionReturn(0); 1202 } 1203 1204 #undef __FUNC__ 1205 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIBAIJ" 1206 int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1207 { 1208 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1209 int ierr; 1210 1211 PetscFunctionBegin; 1212 /* do nondiagonal part */ 1213 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1214 /* send it on its way */ 1215 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1216 /* do local part */ 1217 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1218 /* receive remote parts: note this assumes the values are not actually */ 1219 /* inserted in yy until the next line, which is true for my implementation*/ 1220 /* but is not perhaps always true. */ 1221 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1222 PetscFunctionReturn(0); 1223 } 1224 1225 /* 1226 This only works correctly for square matrices where the subblock A->A is the 1227 diagonal block 1228 */ 1229 #undef __FUNC__ 1230 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIBAIJ" 1231 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1232 { 1233 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1234 int ierr; 1235 1236 PetscFunctionBegin; 1237 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1238 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1239 PetscFunctionReturn(0); 1240 } 1241 1242 #undef __FUNC__ 1243 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPIBAIJ" 1244 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1245 { 1246 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1247 int ierr; 1248 1249 PetscFunctionBegin; 1250 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1251 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNC__ 1256 #define __FUNC__ /*<a name=""></a>*/"MatGetSize_MPIBAIJ" 1257 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1258 { 1259 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1260 1261 PetscFunctionBegin; 1262 if (m) *m = mat->M; 1263 if (n) *n = mat->N; 1264 PetscFunctionReturn(0); 1265 } 1266 1267 #undef __FUNC__ 1268 #define __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPIBAIJ" 1269 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1270 { 1271 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1272 1273 PetscFunctionBegin; 1274 *m = mat->m; *n = mat->n; 1275 PetscFunctionReturn(0); 1276 } 1277 1278 #undef __FUNC__ 1279 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIBAIJ" 1280 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1281 { 1282 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1283 1284 PetscFunctionBegin; 1285 if (m) *m = mat->rstart*mat->bs; 1286 if (n) *n = mat->rend*mat->bs; 1287 PetscFunctionReturn(0); 1288 } 1289 1290 #undef __FUNC__ 1291 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIBAIJ" 1292 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1293 { 1294 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1295 Scalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1296 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1297 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1298 int *cmap,*idx_p,cstart = mat->cstart; 1299 1300 PetscFunctionBegin; 1301 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1302 mat->getrowactive = PETSC_TRUE; 1303 1304 if (!mat->rowvalues && (idx || v)) { 1305 /* 1306 allocate enough space to hold information from the longest row. 1307 */ 1308 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1309 int max = 1,mbs = mat->mbs,tmp; 1310 for (i=0; i<mbs; i++) { 1311 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1312 if (max < tmp) { max = tmp; } 1313 } 1314 mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1315 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1316 } 1317 1318 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1319 lrow = row - brstart; 1320 1321 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1322 if (!v) {pvA = 0; pvB = 0;} 1323 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1324 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1325 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1326 nztot = nzA + nzB; 1327 1328 cmap = mat->garray; 1329 if (v || idx) { 1330 if (nztot) { 1331 /* Sort by increasing column numbers, assuming A and B already sorted */ 1332 int imark = -1; 1333 if (v) { 1334 *v = v_p = mat->rowvalues; 1335 for (i=0; i<nzB; i++) { 1336 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1337 else break; 1338 } 1339 imark = i; 1340 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1341 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1342 } 1343 if (idx) { 1344 *idx = idx_p = mat->rowindices; 1345 if (imark > -1) { 1346 for (i=0; i<imark; i++) { 1347 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1348 } 1349 } else { 1350 for (i=0; i<nzB; i++) { 1351 if (cmap[cworkB[i]/bs] < cstart) 1352 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1353 else break; 1354 } 1355 imark = i; 1356 } 1357 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1358 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1359 } 1360 } else { 1361 if (idx) *idx = 0; 1362 if (v) *v = 0; 1363 } 1364 } 1365 *nz = nztot; 1366 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1367 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1368 PetscFunctionReturn(0); 1369 } 1370 1371 #undef __FUNC__ 1372 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIBAIJ" 1373 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1374 { 1375 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1376 1377 PetscFunctionBegin; 1378 if (baij->getrowactive == PETSC_FALSE) { 1379 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1380 } 1381 baij->getrowactive = PETSC_FALSE; 1382 PetscFunctionReturn(0); 1383 } 1384 1385 #undef __FUNC__ 1386 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIBAIJ" 1387 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1388 { 1389 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1390 1391 PetscFunctionBegin; 1392 *bs = baij->bs; 1393 PetscFunctionReturn(0); 1394 } 1395 1396 #undef __FUNC__ 1397 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIBAIJ" 1398 int MatZeroEntries_MPIBAIJ(Mat A) 1399 { 1400 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1401 int ierr; 1402 1403 PetscFunctionBegin; 1404 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1405 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1406 PetscFunctionReturn(0); 1407 } 1408 1409 #undef __FUNC__ 1410 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIBAIJ" 1411 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1412 { 1413 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1414 Mat A = a->A,B = a->B; 1415 int ierr; 1416 PetscReal isend[5],irecv[5]; 1417 1418 PetscFunctionBegin; 1419 info->block_size = (double)a->bs; 1420 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1421 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1422 isend[3] = info->memory; isend[4] = info->mallocs; 1423 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1424 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1425 isend[3] += info->memory; isend[4] += info->mallocs; 1426 if (flag == MAT_LOCAL) { 1427 info->nz_used = isend[0]; 1428 info->nz_allocated = isend[1]; 1429 info->nz_unneeded = isend[2]; 1430 info->memory = isend[3]; 1431 info->mallocs = isend[4]; 1432 } else if (flag == MAT_GLOBAL_MAX) { 1433 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1434 info->nz_used = irecv[0]; 1435 info->nz_allocated = irecv[1]; 1436 info->nz_unneeded = irecv[2]; 1437 info->memory = irecv[3]; 1438 info->mallocs = irecv[4]; 1439 } else if (flag == MAT_GLOBAL_SUM) { 1440 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1441 info->nz_used = irecv[0]; 1442 info->nz_allocated = irecv[1]; 1443 info->nz_unneeded = irecv[2]; 1444 info->memory = irecv[3]; 1445 info->mallocs = irecv[4]; 1446 } else { 1447 SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 1448 } 1449 info->rows_global = (double)a->M; 1450 info->columns_global = (double)a->N; 1451 info->rows_local = (double)a->m; 1452 info->columns_local = (double)a->N; 1453 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1454 info->fill_ratio_needed = 0; 1455 info->factor_mallocs = 0; 1456 PetscFunctionReturn(0); 1457 } 1458 1459 #undef __FUNC__ 1460 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIBAIJ" 1461 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1462 { 1463 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1464 int ierr; 1465 1466 PetscFunctionBegin; 1467 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1468 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1469 op == MAT_COLUMNS_UNSORTED || 1470 op == MAT_COLUMNS_SORTED || 1471 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1472 op == MAT_KEEP_ZEROED_ROWS || 1473 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1474 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1475 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1476 } else if (op == MAT_ROW_ORIENTED) { 1477 a->roworiented = PETSC_TRUE; 1478 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1479 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1480 } else if (op == MAT_ROWS_SORTED || 1481 op == MAT_ROWS_UNSORTED || 1482 op == MAT_SYMMETRIC || 1483 op == MAT_STRUCTURALLY_SYMMETRIC || 1484 op == MAT_YES_NEW_DIAGONALS || 1485 op == MAT_USE_HASH_TABLE) { 1486 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1487 } else if (op == MAT_COLUMN_ORIENTED) { 1488 a->roworiented = PETSC_FALSE; 1489 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1490 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1491 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1492 a->donotstash = PETSC_TRUE; 1493 } else if (op == MAT_NO_NEW_DIAGONALS) { 1494 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1495 } else if (op == MAT_USE_HASH_TABLE) { 1496 a->ht_flag = PETSC_TRUE; 1497 } else { 1498 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1499 } 1500 PetscFunctionReturn(0); 1501 } 1502 1503 #undef __FUNC__ 1504 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIBAIJ(" 1505 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1506 { 1507 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1508 Mat_SeqBAIJ *Aloc; 1509 Mat B; 1510 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1511 int bs=baij->bs,mbs=baij->mbs; 1512 MatScalar *a; 1513 1514 PetscFunctionBegin; 1515 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1516 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1517 1518 /* copy over the A part */ 1519 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1520 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1521 rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1522 1523 for (i=0; i<mbs; i++) { 1524 rvals[0] = bs*(baij->rstart + i); 1525 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1526 for (j=ai[i]; j<ai[i+1]; j++) { 1527 col = (baij->cstart+aj[j])*bs; 1528 for (k=0; k<bs; k++) { 1529 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1530 col++; a += bs; 1531 } 1532 } 1533 } 1534 /* copy over the B part */ 1535 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1536 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1537 for (i=0; i<mbs; i++) { 1538 rvals[0] = bs*(baij->rstart + i); 1539 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1540 for (j=ai[i]; j<ai[i+1]; j++) { 1541 col = baij->garray[aj[j]]*bs; 1542 for (k=0; k<bs; k++) { 1543 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1544 col++; a += bs; 1545 } 1546 } 1547 } 1548 ierr = PetscFree(rvals);CHKERRQ(ierr); 1549 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1550 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1551 1552 if (matout) { 1553 *matout = B; 1554 } else { 1555 PetscOps *Abops; 1556 MatOps Aops; 1557 1558 /* This isn't really an in-place transpose .... but free data structures from baij */ 1559 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1560 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1561 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1562 #if defined (PETSC_USE_CTABLE) 1563 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1564 #else 1565 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1566 #endif 1567 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1568 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1569 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1570 ierr = PetscFree(baij);CHKERRQ(ierr); 1571 1572 /* 1573 This is horrible, horrible code. We need to keep the 1574 A pointers for the bops and ops but copy everything 1575 else from C. 1576 */ 1577 Abops = A->bops; 1578 Aops = A->ops; 1579 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1580 A->bops = Abops; 1581 A->ops = Aops; 1582 1583 PetscHeaderDestroy(B); 1584 } 1585 PetscFunctionReturn(0); 1586 } 1587 1588 #undef __FUNC__ 1589 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIBAIJ" 1590 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1591 { 1592 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1593 Mat a = baij->A,b = baij->B; 1594 int ierr,s1,s2,s3; 1595 1596 PetscFunctionBegin; 1597 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1598 if (rr) { 1599 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1600 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1601 /* Overlap communication with computation. */ 1602 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1603 } 1604 if (ll) { 1605 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1606 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1607 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1608 } 1609 /* scale the diagonal block */ 1610 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1611 1612 if (rr) { 1613 /* Do a scatter end and then right scale the off-diagonal block */ 1614 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1615 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1616 } 1617 1618 PetscFunctionReturn(0); 1619 } 1620 1621 #undef __FUNC__ 1622 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIBAIJ" 1623 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1624 { 1625 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1626 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1627 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1628 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1629 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1630 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1631 MPI_Comm comm = A->comm; 1632 MPI_Request *send_waits,*recv_waits; 1633 MPI_Status recv_status,*send_status; 1634 IS istmp; 1635 1636 PetscFunctionBegin; 1637 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 1638 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1639 1640 /* first count number of contributors to each processor */ 1641 nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs); 1642 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1643 procs = nprocs + size; 1644 owner = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 1645 for (i=0; i<N; i++) { 1646 idx = rows[i]; 1647 found = 0; 1648 for (j=0; j<size; j++) { 1649 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1650 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1651 } 1652 } 1653 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1654 } 1655 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 1656 1657 /* inform other processors of number of messages and max length*/ 1658 work = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work); 1659 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 1660 nmax = work[rank]; 1661 nrecvs = work[size+rank]; 1662 ierr = PetscFree(work);CHKERRQ(ierr); 1663 1664 /* post receives: */ 1665 rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 1666 recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1667 for (i=0; i<nrecvs; i++) { 1668 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1669 } 1670 1671 /* do sends: 1672 1) starts[i] gives the starting index in svalues for stuff going to 1673 the ith processor 1674 */ 1675 svalues = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues); 1676 send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1677 starts = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts); 1678 starts[0] = 0; 1679 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1680 for (i=0; i<N; i++) { 1681 svalues[starts[owner[i]]++] = rows[i]; 1682 } 1683 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1684 1685 starts[0] = 0; 1686 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1687 count = 0; 1688 for (i=0; i<size; i++) { 1689 if (procs[i]) { 1690 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1691 } 1692 } 1693 ierr = PetscFree(starts);CHKERRQ(ierr); 1694 1695 base = owners[rank]*bs; 1696 1697 /* wait on receives */ 1698 lens = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens); 1699 source = lens + nrecvs; 1700 count = nrecvs; slen = 0; 1701 while (count) { 1702 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1703 /* unpack receives into our local space */ 1704 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1705 source[imdex] = recv_status.MPI_SOURCE; 1706 lens[imdex] = n; 1707 slen += n; 1708 count--; 1709 } 1710 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1711 1712 /* move the data into the send scatter */ 1713 lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows); 1714 count = 0; 1715 for (i=0; i<nrecvs; i++) { 1716 values = rvalues + i*nmax; 1717 for (j=0; j<lens[i]; j++) { 1718 lrows[count++] = values[j] - base; 1719 } 1720 } 1721 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1722 ierr = PetscFree(lens);CHKERRQ(ierr); 1723 ierr = PetscFree(owner);CHKERRQ(ierr); 1724 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1725 1726 /* actually zap the local rows */ 1727 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1728 PLogObjectParent(A,istmp); 1729 1730 /* 1731 Zero the required rows. If the "diagonal block" of the matrix 1732 is square and the user wishes to set the diagonal we use seperate 1733 code so that MatSetValues() is not called for each diagonal allocating 1734 new memory, thus calling lots of mallocs and slowing things down. 1735 1736 Contributed by: Mathew Knepley 1737 */ 1738 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1739 ierr = MatZeroRows_SeqAIJ(l->B,istmp,0);CHKERRQ(ierr); 1740 if (diag && (l->A->M == l->A->N)) { 1741 ierr = MatZeroRows_SeqAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1742 } else if (diag) { 1743 ierr = MatZeroRows_SeqAIJ(l->A,istmp,0);CHKERRQ(ierr); 1744 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1745 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1746 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1747 } 1748 for (i = 0; i < slen; i++) { 1749 row = lrows[i] + rstart_bs; 1750 ierr = MatSetValues_MPIBAIJ(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1751 } 1752 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1753 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1754 } else { 1755 ierr = MatZeroRows_SeqAIJ(l->A,istmp,0);CHKERRQ(ierr); 1756 } 1757 1758 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1759 ierr = PetscFree(lrows);CHKERRQ(ierr); 1760 1761 /* wait on sends */ 1762 if (nsends) { 1763 send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1764 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1765 ierr = PetscFree(send_status);CHKERRQ(ierr); 1766 } 1767 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1768 ierr = PetscFree(svalues);CHKERRQ(ierr); 1769 1770 PetscFunctionReturn(0); 1771 } 1772 1773 #undef __FUNC__ 1774 #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPIBAIJ" 1775 int MatPrintHelp_MPIBAIJ(Mat A) 1776 { 1777 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1778 MPI_Comm comm = A->comm; 1779 static int called = 0; 1780 int ierr; 1781 1782 PetscFunctionBegin; 1783 if (!a->rank) { 1784 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1785 } 1786 if (called) {PetscFunctionReturn(0);} else called = 1; 1787 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1788 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1789 PetscFunctionReturn(0); 1790 } 1791 1792 #undef __FUNC__ 1793 #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPIBAIJ" 1794 int MatSetUnfactored_MPIBAIJ(Mat A) 1795 { 1796 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1797 int ierr; 1798 1799 PetscFunctionBegin; 1800 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1801 PetscFunctionReturn(0); 1802 } 1803 1804 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1805 1806 #undef __FUNC__ 1807 #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPIBAIJ" 1808 int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1809 { 1810 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1811 Mat a,b,c,d; 1812 PetscTruth flg; 1813 int ierr; 1814 1815 PetscFunctionBegin; 1816 if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1817 a = matA->A; b = matA->B; 1818 c = matB->A; d = matB->B; 1819 1820 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1821 if (flg == PETSC_TRUE) { 1822 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1823 } 1824 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1825 PetscFunctionReturn(0); 1826 } 1827 1828 /* -------------------------------------------------------------------*/ 1829 static struct _MatOps MatOps_Values = { 1830 MatSetValues_MPIBAIJ, 1831 MatGetRow_MPIBAIJ, 1832 MatRestoreRow_MPIBAIJ, 1833 MatMult_MPIBAIJ, 1834 MatMultAdd_MPIBAIJ, 1835 MatMultTranspose_MPIBAIJ, 1836 MatMultTransposeAdd_MPIBAIJ, 1837 0, 1838 0, 1839 0, 1840 0, 1841 0, 1842 0, 1843 0, 1844 MatTranspose_MPIBAIJ, 1845 MatGetInfo_MPIBAIJ, 1846 MatEqual_MPIBAIJ, 1847 MatGetDiagonal_MPIBAIJ, 1848 MatDiagonalScale_MPIBAIJ, 1849 MatNorm_MPIBAIJ, 1850 MatAssemblyBegin_MPIBAIJ, 1851 MatAssemblyEnd_MPIBAIJ, 1852 0, 1853 MatSetOption_MPIBAIJ, 1854 MatZeroEntries_MPIBAIJ, 1855 MatZeroRows_MPIBAIJ, 1856 0, 1857 0, 1858 0, 1859 0, 1860 MatGetSize_MPIBAIJ, 1861 MatGetLocalSize_MPIBAIJ, 1862 MatGetOwnershipRange_MPIBAIJ, 1863 0, 1864 0, 1865 0, 1866 0, 1867 MatDuplicate_MPIBAIJ, 1868 0, 1869 0, 1870 0, 1871 0, 1872 0, 1873 MatGetSubMatrices_MPIBAIJ, 1874 MatIncreaseOverlap_MPIBAIJ, 1875 MatGetValues_MPIBAIJ, 1876 0, 1877 MatPrintHelp_MPIBAIJ, 1878 MatScale_MPIBAIJ, 1879 0, 1880 0, 1881 0, 1882 MatGetBlockSize_MPIBAIJ, 1883 0, 1884 0, 1885 0, 1886 0, 1887 0, 1888 0, 1889 MatSetUnfactored_MPIBAIJ, 1890 0, 1891 MatSetValuesBlocked_MPIBAIJ, 1892 0, 1893 0, 1894 0, 1895 MatGetMaps_Petsc}; 1896 1897 1898 EXTERN_C_BEGIN 1899 #undef __FUNC__ 1900 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPIBAIJ" 1901 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1902 { 1903 PetscFunctionBegin; 1904 *a = ((Mat_MPIBAIJ *)A->data)->A; 1905 *iscopy = PETSC_FALSE; 1906 PetscFunctionReturn(0); 1907 } 1908 EXTERN_C_END 1909 1910 #undef __FUNC__ 1911 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPIBAIJ" 1912 /*@C 1913 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1914 (block compressed row). For good matrix assembly performance 1915 the user should preallocate the matrix storage by setting the parameters 1916 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1917 performance can be increased by more than a factor of 50. 1918 1919 Collective on MPI_Comm 1920 1921 Input Parameters: 1922 + comm - MPI communicator 1923 . bs - size of blockk 1924 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1925 This value should be the same as the local size used in creating the 1926 y vector for the matrix-vector product y = Ax. 1927 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1928 This value should be the same as the local size used in creating the 1929 x vector for the matrix-vector product y = Ax. 1930 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1931 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1932 . d_nz - number of block nonzeros per block row in diagonal portion of local 1933 submatrix (same for all local rows) 1934 . d_nnz - array containing the number of block nonzeros in the various block rows 1935 of the in diagonal portion of the local (possibly different for each block 1936 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1937 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1938 submatrix (same for all local rows). 1939 - o_nnz - array containing the number of nonzeros in the various block rows of the 1940 off-diagonal portion of the local submatrix (possibly different for 1941 each block row) or PETSC_NULL. 1942 1943 Output Parameter: 1944 . A - the matrix 1945 1946 Options Database Keys: 1947 . -mat_no_unroll - uses code that does not unroll the loops in the 1948 block calculations (much slower) 1949 . -mat_block_size - size of the blocks to use 1950 . -mat_mpi - use the parallel matrix data structures even on one processor 1951 (defaults to using SeqBAIJ format on one processor) 1952 1953 Notes: 1954 The user MUST specify either the local or global matrix dimensions 1955 (possibly both). 1956 1957 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1958 than it must be used on all processors that share the object for that argument. 1959 1960 Storage Information: 1961 For a square global matrix we define each processor's diagonal portion 1962 to be its local rows and the corresponding columns (a square submatrix); 1963 each processor's off-diagonal portion encompasses the remainder of the 1964 local matrix (a rectangular submatrix). 1965 1966 The user can specify preallocated storage for the diagonal part of 1967 the local submatrix with either d_nz or d_nnz (not both). Set 1968 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1969 memory allocation. Likewise, specify preallocated storage for the 1970 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1971 1972 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1973 the figure below we depict these three local rows and all columns (0-11). 1974 1975 .vb 1976 0 1 2 3 4 5 6 7 8 9 10 11 1977 ------------------- 1978 row 3 | o o o d d d o o o o o o 1979 row 4 | o o o d d d o o o o o o 1980 row 5 | o o o d d d o o o o o o 1981 ------------------- 1982 .ve 1983 1984 Thus, any entries in the d locations are stored in the d (diagonal) 1985 submatrix, and any entries in the o locations are stored in the 1986 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1987 stored simply in the MATSEQBAIJ format for compressed row storage. 1988 1989 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1990 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1991 In general, for PDE problems in which most nonzeros are near the diagonal, 1992 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1993 or you will get TERRIBLE performance; see the users' manual chapter on 1994 matrices. 1995 1996 Level: intermediate 1997 1998 .keywords: matrix, block, aij, compressed row, sparse, parallel 1999 2000 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2001 @*/ 2002 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2003 { 2004 Mat B; 2005 Mat_MPIBAIJ *b; 2006 int ierr,i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 2007 PetscTruth flag1,flag2,flg; 2008 2009 PetscFunctionBegin; 2010 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2011 2012 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 2013 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -1: value %d",d_nz); 2014 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -1: value %d",o_nz); 2015 if (d_nnz) { 2016 for (i=0; i<m/bs; i++) { 2017 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 2018 } 2019 } 2020 if (o_nnz) { 2021 for (i=0; i<m/bs; i++) { 2022 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 2023 } 2024 } 2025 2026 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2027 ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr); 2028 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 2029 if (!flag1 && !flag2 && size == 1) { 2030 if (M == PETSC_DECIDE) M = m; 2031 if (N == PETSC_DECIDE) N = n; 2032 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 2033 PetscFunctionReturn(0); 2034 } 2035 2036 *A = 0; 2037 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 2038 PLogObjectCreate(B); 2039 B->data = (void*)(b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b); 2040 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2041 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2042 2043 B->ops->destroy = MatDestroy_MPIBAIJ; 2044 B->ops->view = MatView_MPIBAIJ; 2045 B->mapping = 0; 2046 B->factor = 0; 2047 B->assembled = PETSC_FALSE; 2048 2049 B->insertmode = NOT_SET_VALUES; 2050 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 2051 ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr); 2052 2053 if (m == PETSC_DECIDE && (d_nnz || o_nnz)) { 2054 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 2055 } 2056 if (M == PETSC_DECIDE && m == PETSC_DECIDE) { 2057 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 2058 } 2059 if (N == PETSC_DECIDE && n == PETSC_DECIDE) { 2060 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 2061 } 2062 if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 2063 if (N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 2064 2065 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 2066 work[0] = m; work[1] = n; 2067 mbs = m/bs; nbs = n/bs; 2068 ierr = MPI_Allreduce(work,sum,2,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2069 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 2070 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 2071 } 2072 if (m == PETSC_DECIDE) { 2073 Mbs = M/bs; 2074 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 2075 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 2076 m = mbs*bs; 2077 } 2078 if (n == PETSC_DECIDE) { 2079 Nbs = N/bs; 2080 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 2081 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 2082 n = nbs*bs; 2083 } 2084 if (mbs*bs != m || nbs*bs != n) { 2085 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 2086 } 2087 2088 b->m = m; B->m = m; 2089 b->n = n; B->n = n; 2090 b->N = N; B->N = N; 2091 b->M = M; B->M = M; 2092 b->bs = bs; 2093 b->bs2 = bs*bs; 2094 b->mbs = mbs; 2095 b->nbs = nbs; 2096 b->Mbs = Mbs; 2097 b->Nbs = Nbs; 2098 2099 /* the information in the maps duplicates the information computed below, eventually 2100 we should remove the duplicate information that is not contained in the maps */ 2101 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 2102 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 2103 2104 /* build local table of row and column ownerships */ 2105 b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 2106 PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2107 b->cowners = b->rowners + b->size + 2; 2108 b->rowners_bs = b->cowners + b->size + 2; 2109 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2110 b->rowners[0] = 0; 2111 for (i=2; i<=b->size; i++) { 2112 b->rowners[i] += b->rowners[i-1]; 2113 } 2114 for (i=0; i<=b->size; i++) { 2115 b->rowners_bs[i] = b->rowners[i]*bs; 2116 } 2117 b->rstart = b->rowners[b->rank]; 2118 b->rend = b->rowners[b->rank+1]; 2119 b->rstart_bs = b->rstart * bs; 2120 b->rend_bs = b->rend * bs; 2121 2122 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2123 b->cowners[0] = 0; 2124 for (i=2; i<=b->size; i++) { 2125 b->cowners[i] += b->cowners[i-1]; 2126 } 2127 b->cstart = b->cowners[b->rank]; 2128 b->cend = b->cowners[b->rank+1]; 2129 b->cstart_bs = b->cstart * bs; 2130 b->cend_bs = b->cend * bs; 2131 2132 2133 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2134 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2135 PLogObjectParent(B,b->A); 2136 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2137 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2138 PLogObjectParent(B,b->B); 2139 2140 /* build cache for off array entries formed */ 2141 ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr); 2142 ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr); 2143 b->donotstash = PETSC_FALSE; 2144 b->colmap = PETSC_NULL; 2145 b->garray = PETSC_NULL; 2146 b->roworiented = PETSC_TRUE; 2147 2148 /* stuff used in block assembly */ 2149 b->barray = 0; 2150 2151 /* stuff used for matrix vector multiply */ 2152 b->lvec = 0; 2153 b->Mvctx = 0; 2154 2155 /* stuff for MatGetRow() */ 2156 b->rowindices = 0; 2157 b->rowvalues = 0; 2158 b->getrowactive = PETSC_FALSE; 2159 2160 /* hash table stuff */ 2161 b->ht = 0; 2162 b->hd = 0; 2163 b->ht_size = 0; 2164 b->ht_flag = PETSC_FALSE; 2165 b->ht_fact = 0; 2166 b->ht_total_ct = 0; 2167 b->ht_insert_ct = 0; 2168 2169 *A = B; 2170 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2171 if (flg) { 2172 double fact = 1.39; 2173 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2174 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2175 if (fact <= 1.0) fact = 1.39; 2176 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2177 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2178 } 2179 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2180 "MatStoreValues_MPIBAIJ", 2181 (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2182 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2183 "MatRetrieveValues_MPIBAIJ", 2184 (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2185 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2186 "MatGetDiagonalBlock_MPIBAIJ", 2187 (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2188 PetscFunctionReturn(0); 2189 } 2190 2191 #undef __FUNC__ 2192 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIBAIJ" 2193 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2194 { 2195 Mat mat; 2196 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2197 int ierr,len=0; 2198 PetscTruth flg; 2199 2200 PetscFunctionBegin; 2201 *newmat = 0; 2202 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 2203 PLogObjectCreate(mat); 2204 mat->data = (void*)(a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a); 2205 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2206 mat->ops->destroy = MatDestroy_MPIBAIJ; 2207 mat->ops->view = MatView_MPIBAIJ; 2208 mat->factor = matin->factor; 2209 mat->assembled = PETSC_TRUE; 2210 mat->insertmode = NOT_SET_VALUES; 2211 2212 a->m = mat->m = oldmat->m; 2213 a->n = mat->n = oldmat->n; 2214 a->M = mat->M = oldmat->M; 2215 a->N = mat->N = oldmat->N; 2216 2217 a->bs = oldmat->bs; 2218 a->bs2 = oldmat->bs2; 2219 a->mbs = oldmat->mbs; 2220 a->nbs = oldmat->nbs; 2221 a->Mbs = oldmat->Mbs; 2222 a->Nbs = oldmat->Nbs; 2223 2224 a->rstart = oldmat->rstart; 2225 a->rend = oldmat->rend; 2226 a->cstart = oldmat->cstart; 2227 a->cend = oldmat->cend; 2228 a->size = oldmat->size; 2229 a->rank = oldmat->rank; 2230 a->donotstash = oldmat->donotstash; 2231 a->roworiented = oldmat->roworiented; 2232 a->rowindices = 0; 2233 a->rowvalues = 0; 2234 a->getrowactive = PETSC_FALSE; 2235 a->barray = 0; 2236 a->rstart_bs = oldmat->rstart_bs; 2237 a->rend_bs = oldmat->rend_bs; 2238 a->cstart_bs = oldmat->cstart_bs; 2239 a->cend_bs = oldmat->cend_bs; 2240 2241 /* hash table stuff */ 2242 a->ht = 0; 2243 a->hd = 0; 2244 a->ht_size = 0; 2245 a->ht_flag = oldmat->ht_flag; 2246 a->ht_fact = oldmat->ht_fact; 2247 a->ht_total_ct = 0; 2248 a->ht_insert_ct = 0; 2249 2250 2251 a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 2252 PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2253 a->cowners = a->rowners + a->size + 2; 2254 a->rowners_bs = a->cowners + a->size + 2; 2255 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2256 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2257 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2258 if (oldmat->colmap) { 2259 #if defined (PETSC_USE_CTABLE) 2260 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2261 #else 2262 a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2263 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2264 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2265 #endif 2266 } else a->colmap = 0; 2267 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2268 a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray); 2269 PLogObjectMemory(mat,len*sizeof(int)); 2270 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2271 } else a->garray = 0; 2272 2273 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2274 PLogObjectParent(mat,a->lvec); 2275 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2276 2277 PLogObjectParent(mat,a->Mvctx); 2278 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2279 PLogObjectParent(mat,a->A); 2280 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2281 PLogObjectParent(mat,a->B); 2282 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 2283 ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2284 if (flg) { 2285 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 2286 } 2287 *newmat = mat; 2288 PetscFunctionReturn(0); 2289 } 2290 2291 #include "sys.h" 2292 2293 #undef __FUNC__ 2294 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIBAIJ" 2295 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2296 { 2297 Mat A; 2298 int i,nz,ierr,j,rstart,rend,fd; 2299 Scalar *vals,*buf; 2300 MPI_Comm comm = ((PetscObject)viewer)->comm; 2301 MPI_Status status; 2302 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2303 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2304 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2305 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2306 int dcount,kmax,k,nzcount,tmp; 2307 2308 PetscFunctionBegin; 2309 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2310 2311 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2312 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2313 if (!rank) { 2314 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2315 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2316 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2317 if (header[3] < 0) { 2318 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2319 } 2320 } 2321 2322 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2323 M = header[1]; N = header[2]; 2324 2325 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2326 2327 /* 2328 This code adds extra rows to make sure the number of rows is 2329 divisible by the blocksize 2330 */ 2331 Mbs = M/bs; 2332 extra_rows = bs - M + bs*(Mbs); 2333 if (extra_rows == bs) extra_rows = 0; 2334 else Mbs++; 2335 if (extra_rows &&!rank) { 2336 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2337 } 2338 2339 /* determine ownership of all rows */ 2340 mbs = Mbs/size + ((Mbs % size) > rank); 2341 m = mbs * bs; 2342 rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners); 2343 browners = rowners + size + 1; 2344 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2345 rowners[0] = 0; 2346 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2347 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2348 rstart = rowners[rank]; 2349 rend = rowners[rank+1]; 2350 2351 /* distribute row lengths to all processors */ 2352 locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens); 2353 if (!rank) { 2354 rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2355 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2356 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2357 sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts); 2358 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2359 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2360 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2361 } else { 2362 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2363 } 2364 2365 if (!rank) { 2366 /* calculate the number of nonzeros on each processor */ 2367 procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz); 2368 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2369 for (i=0; i<size; i++) { 2370 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2371 procsnz[i] += rowlengths[j]; 2372 } 2373 } 2374 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2375 2376 /* determine max buffer needed and allocate it */ 2377 maxnz = 0; 2378 for (i=0; i<size; i++) { 2379 maxnz = PetscMax(maxnz,procsnz[i]); 2380 } 2381 cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols); 2382 2383 /* read in my part of the matrix column indices */ 2384 nz = procsnz[0]; 2385 ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 2386 mycols = ibuf; 2387 if (size == 1) nz -= extra_rows; 2388 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2389 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2390 2391 /* read in every ones (except the last) and ship off */ 2392 for (i=1; i<size-1; i++) { 2393 nz = procsnz[i]; 2394 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2395 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2396 } 2397 /* read in the stuff for the last proc */ 2398 if (size != 1) { 2399 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2400 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2401 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2402 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2403 } 2404 ierr = PetscFree(cols);CHKERRQ(ierr); 2405 } else { 2406 /* determine buffer space needed for message */ 2407 nz = 0; 2408 for (i=0; i<m; i++) { 2409 nz += locrowlens[i]; 2410 } 2411 ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 2412 mycols = ibuf; 2413 /* receive message of column indices*/ 2414 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2415 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2416 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2417 } 2418 2419 /* loop over local rows, determining number of off diagonal entries */ 2420 dlens = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens); 2421 odlens = dlens + (rend-rstart); 2422 mask = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask); 2423 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2424 masked1 = mask + Mbs; 2425 masked2 = masked1 + Mbs; 2426 rowcount = 0; nzcount = 0; 2427 for (i=0; i<mbs; i++) { 2428 dcount = 0; 2429 odcount = 0; 2430 for (j=0; j<bs; j++) { 2431 kmax = locrowlens[rowcount]; 2432 for (k=0; k<kmax; k++) { 2433 tmp = mycols[nzcount++]/bs; 2434 if (!mask[tmp]) { 2435 mask[tmp] = 1; 2436 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2437 else masked1[dcount++] = tmp; 2438 } 2439 } 2440 rowcount++; 2441 } 2442 2443 dlens[i] = dcount; 2444 odlens[i] = odcount; 2445 2446 /* zero out the mask elements we set */ 2447 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2448 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2449 } 2450 2451 /* create our matrix */ 2452 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 2453 A = *newmat; 2454 MatSetOption(A,MAT_COLUMNS_SORTED); 2455 2456 if (!rank) { 2457 buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf); 2458 /* read in my part of the matrix numerical values */ 2459 nz = procsnz[0]; 2460 vals = buf; 2461 mycols = ibuf; 2462 if (size == 1) nz -= extra_rows; 2463 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2464 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2465 2466 /* insert into matrix */ 2467 jj = rstart*bs; 2468 for (i=0; i<m; i++) { 2469 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2470 mycols += locrowlens[i]; 2471 vals += locrowlens[i]; 2472 jj++; 2473 } 2474 /* read in other processors (except the last one) and ship out */ 2475 for (i=1; i<size-1; i++) { 2476 nz = procsnz[i]; 2477 vals = buf; 2478 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2479 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2480 } 2481 /* the last proc */ 2482 if (size != 1){ 2483 nz = procsnz[i] - extra_rows; 2484 vals = buf; 2485 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2486 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2487 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2488 } 2489 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2490 } else { 2491 /* receive numeric values */ 2492 buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf); 2493 2494 /* receive message of values*/ 2495 vals = buf; 2496 mycols = ibuf; 2497 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2498 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2499 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2500 2501 /* insert into matrix */ 2502 jj = rstart*bs; 2503 for (i=0; i<m; i++) { 2504 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2505 mycols += locrowlens[i]; 2506 vals += locrowlens[i]; 2507 jj++; 2508 } 2509 } 2510 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2511 ierr = PetscFree(buf);CHKERRQ(ierr); 2512 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2513 ierr = PetscFree(rowners);CHKERRQ(ierr); 2514 ierr = PetscFree(dlens);CHKERRQ(ierr); 2515 ierr = PetscFree(mask);CHKERRQ(ierr); 2516 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2517 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2518 PetscFunctionReturn(0); 2519 } 2520 2521 #undef __FUNC__ 2522 #define __FUNC__ /*<a name=""></a>*/"MatMPIBAIJSetHashTableFactor" 2523 /*@ 2524 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2525 2526 Input Parameters: 2527 . mat - the matrix 2528 . fact - factor 2529 2530 Collective on Mat 2531 2532 Level: advanced 2533 2534 Notes: 2535 This can also be set by the command line option: -mat_use_hash_table fact 2536 2537 .keywords: matrix, hashtable, factor, HT 2538 2539 .seealso: MatSetOption() 2540 @*/ 2541 int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2542 { 2543 Mat_MPIBAIJ *baij; 2544 2545 PetscFunctionBegin; 2546 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2547 if (mat->type != MATMPIBAIJ) { 2548 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2549 } 2550 baij = (Mat_MPIBAIJ*)mat->data; 2551 baij->ht_fact = fact; 2552 PetscFunctionReturn(0); 2553 } 2554