1 /*$Id: mpibaij.c,v 1.188 2000/04/09 03:09:57 bsmith Exp bsmith $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "mat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 6 extern int MatSetUpMultiply_MPIBAIJ(Mat); 7 extern int DisAssemble_MPIBAIJ(Mat); 8 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 9 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 10 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 11 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 12 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 13 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 14 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 15 extern int MatPrintHelp_SeqBAIJ(Mat); 16 17 EXTERN_C_BEGIN 18 #undef __FUNC__ 19 #define __FUNC__ /*<a name=""></a>*/"MatStoreValues_MPIBAIJ" 20 int MatStoreValues_MPIBAIJ(Mat mat) 21 { 22 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 23 int ierr; 24 25 PetscFunctionBegin; 26 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 27 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 28 PetscFunctionReturn(0); 29 } 30 EXTERN_C_END 31 32 EXTERN_C_BEGIN 33 #undef __FUNC__ 34 #define __FUNC__ /*<a name=""></a>*/"MatRetrieveValues_MPIBAIJ" 35 int MatRetrieveValues_MPIBAIJ(Mat mat) 36 { 37 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 38 int ierr; 39 40 PetscFunctionBegin; 41 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 42 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 43 PetscFunctionReturn(0); 44 } 45 EXTERN_C_END 46 47 /* 48 Local utility routine that creates a mapping from the global column 49 number to the local number in the off-diagonal part of the local 50 storage of the matrix. This is done in a non scable way since the 51 length of colmap equals the global matrix length. 52 */ 53 #undef __FUNC__ 54 #define __FUNC__ /*<a name=""></a>*/"CreateColmap_MPIBAIJ_Private" 55 static int CreateColmap_MPIBAIJ_Private(Mat mat) 56 { 57 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 58 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 59 int nbs = B->nbs,i,bs=B->bs,ierr; 60 61 PetscFunctionBegin; 62 #if defined (PETSC_USE_CTABLE) 63 ierr = PetscTableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr); 64 for (i=0; i<nbs; i++){ 65 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 66 } 67 #else 68 baij->colmap = (int*)PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 69 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 70 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 71 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 72 #endif 73 PetscFunctionReturn(0); 74 } 75 76 #define CHUNKSIZE 10 77 78 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 79 { \ 80 \ 81 brow = row/bs; \ 82 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 83 rmax = aimax[brow]; nrow = ailen[brow]; \ 84 bcol = col/bs; \ 85 ridx = row % bs; cidx = col % bs; \ 86 low = 0; high = nrow; \ 87 while (high-low > 3) { \ 88 t = (low+high)/2; \ 89 if (rp[t] > bcol) high = t; \ 90 else low = t; \ 91 } \ 92 for (_i=low; _i<high; _i++) { \ 93 if (rp[_i] > bcol) break; \ 94 if (rp[_i] == bcol) { \ 95 bap = ap + bs2*_i + bs*cidx + ridx; \ 96 if (addv == ADD_VALUES) *bap += value; \ 97 else *bap = value; \ 98 goto a_noinsert; \ 99 } \ 100 } \ 101 if (a->nonew == 1) goto a_noinsert; \ 102 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 103 if (nrow >= rmax) { \ 104 /* there is no extra room in row, therefore enlarge */ \ 105 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 106 MatScalar *new_a; \ 107 \ 108 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 109 \ 110 /* malloc new storage space */ \ 111 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 112 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 113 new_j = (int*)(new_a + bs2*new_nz); \ 114 new_i = new_j + new_nz; \ 115 \ 116 /* copy over old data into new slots */ \ 117 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 118 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 119 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 120 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 121 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 122 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 123 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 124 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 125 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 126 /* free up old matrix storage */ \ 127 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 128 if (!a->singlemalloc) { \ 129 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 130 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 131 } \ 132 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 133 a->singlemalloc = PETSC_TRUE; \ 134 \ 135 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 136 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 137 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 138 a->maxnz += bs2*CHUNKSIZE; \ 139 a->reallocs++; \ 140 a->nz++; \ 141 } \ 142 N = nrow++ - 1; \ 143 /* shift up all the later entries in this row */ \ 144 for (ii=N; ii>=_i; ii--) { \ 145 rp[ii+1] = rp[ii]; \ 146 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 147 } \ 148 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 149 rp[_i] = bcol; \ 150 ap[bs2*_i + bs*cidx + ridx] = value; \ 151 a_noinsert:; \ 152 ailen[brow] = nrow; \ 153 } 154 155 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 156 { \ 157 brow = row/bs; \ 158 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 159 rmax = bimax[brow]; nrow = bilen[brow]; \ 160 bcol = col/bs; \ 161 ridx = row % bs; cidx = col % bs; \ 162 low = 0; high = nrow; \ 163 while (high-low > 3) { \ 164 t = (low+high)/2; \ 165 if (rp[t] > bcol) high = t; \ 166 else low = t; \ 167 } \ 168 for (_i=low; _i<high; _i++) { \ 169 if (rp[_i] > bcol) break; \ 170 if (rp[_i] == bcol) { \ 171 bap = ap + bs2*_i + bs*cidx + ridx; \ 172 if (addv == ADD_VALUES) *bap += value; \ 173 else *bap = value; \ 174 goto b_noinsert; \ 175 } \ 176 } \ 177 if (b->nonew == 1) goto b_noinsert; \ 178 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 179 if (nrow >= rmax) { \ 180 /* there is no extra room in row, therefore enlarge */ \ 181 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 182 MatScalar *new_a; \ 183 \ 184 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 185 \ 186 /* malloc new storage space */ \ 187 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 188 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 189 new_j = (int*)(new_a + bs2*new_nz); \ 190 new_i = new_j + new_nz; \ 191 \ 192 /* copy over old data into new slots */ \ 193 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 194 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 195 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 196 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 197 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 198 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 199 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 200 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 201 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 202 /* free up old matrix storage */ \ 203 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 204 if (!b->singlemalloc) { \ 205 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 206 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 207 } \ 208 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 209 b->singlemalloc = PETSC_TRUE; \ 210 \ 211 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 212 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 213 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 214 b->maxnz += bs2*CHUNKSIZE; \ 215 b->reallocs++; \ 216 b->nz++; \ 217 } \ 218 N = nrow++ - 1; \ 219 /* shift up all the later entries in this row */ \ 220 for (ii=N; ii>=_i; ii--) { \ 221 rp[ii+1] = rp[ii]; \ 222 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 223 } \ 224 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 225 rp[_i] = bcol; \ 226 ap[bs2*_i + bs*cidx + ridx] = value; \ 227 b_noinsert:; \ 228 bilen[brow] = nrow; \ 229 } 230 231 #undef __FUNC__ 232 #define __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIBAIJ" 233 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 234 { 235 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 236 Scalar value; 237 int ierr,i,j,row,col; 238 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 239 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 240 int cend_orig=baij->cend_bs,bs=baij->bs; 241 242 /* Some Variables required in the macro */ 243 Mat A = baij->A; 244 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 245 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 246 MatScalar *aa=a->a; 247 248 Mat B = baij->B; 249 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 250 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 251 MatScalar *ba=b->a; 252 253 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 254 int low,high,t,ridx,cidx,bs2=a->bs2; 255 MatScalar *ap,*bap; 256 257 PetscFunctionBegin; 258 for (i=0; i<m; i++) { 259 if (im[i] < 0) continue; 260 #if defined(PETSC_USE_BOPT_g) 261 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 262 #endif 263 if (im[i] >= rstart_orig && im[i] < rend_orig) { 264 row = im[i] - rstart_orig; 265 for (j=0; j<n; j++) { 266 if (in[j] >= cstart_orig && in[j] < cend_orig){ 267 col = in[j] - cstart_orig; 268 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 269 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 270 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 271 } else if (in[j] < 0) continue; 272 #if defined(PETSC_USE_BOPT_g) 273 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 274 #endif 275 else { 276 if (mat->was_assembled) { 277 if (!baij->colmap) { 278 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 279 } 280 #if defined (PETSC_USE_CTABLE) 281 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 282 col = col - 1 + in[j]%bs; 283 #else 284 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 285 #endif 286 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 287 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 288 col = in[j]; 289 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 290 B = baij->B; 291 b = (Mat_SeqBAIJ*)(B)->data; 292 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 293 ba=b->a; 294 } 295 } else col = in[j]; 296 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 297 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 298 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 299 } 300 } 301 } else { 302 if (!baij->donotstash) { 303 if (roworiented) { 304 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 305 } else { 306 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 307 } 308 } 309 } 310 } 311 PetscFunctionReturn(0); 312 } 313 314 #undef __FUNC__ 315 #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ" 316 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 317 { 318 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 319 Scalar *value,*barray=baij->barray; 320 int ierr,i,j,ii,jj,row,col; 321 int roworiented = baij->roworiented,rstart=baij->rstart ; 322 int rend=baij->rend,cstart=baij->cstart,stepval; 323 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 324 325 PetscFunctionBegin; 326 if(!barray) { 327 baij->barray = barray = (Scalar*)PetscMalloc(bs2*sizeof(Scalar));CHKPTRQ(barray); 328 } 329 330 if (roworiented) { 331 stepval = (n-1)*bs; 332 } else { 333 stepval = (m-1)*bs; 334 } 335 for (i=0; i<m; i++) { 336 if (im[i] < 0) continue; 337 #if defined(PETSC_USE_BOPT_g) 338 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 339 #endif 340 if (im[i] >= rstart && im[i] < rend) { 341 row = im[i] - rstart; 342 for (j=0; j<n; j++) { 343 /* If NumCol = 1 then a copy is not required */ 344 if ((roworiented) && (n == 1)) { 345 barray = v + i*bs2; 346 } else if((!roworiented) && (m == 1)) { 347 barray = v + j*bs2; 348 } else { /* Here a copy is required */ 349 if (roworiented) { 350 value = v + i*(stepval+bs)*bs + j*bs; 351 } else { 352 value = v + j*(stepval+bs)*bs + i*bs; 353 } 354 for (ii=0; ii<bs; ii++,value+=stepval) { 355 for (jj=0; jj<bs; jj++) { 356 *barray++ = *value++; 357 } 358 } 359 barray -=bs2; 360 } 361 362 if (in[j] >= cstart && in[j] < cend){ 363 col = in[j] - cstart; 364 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 365 } 366 else if (in[j] < 0) continue; 367 #if defined(PETSC_USE_BOPT_g) 368 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 369 #endif 370 else { 371 if (mat->was_assembled) { 372 if (!baij->colmap) { 373 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 374 } 375 376 #if defined(PETSC_USE_BOPT_g) 377 #if defined (PETSC_USE_CTABLE) 378 { int data; 379 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 380 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap"); 381 } 382 #else 383 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap"); 384 #endif 385 #endif 386 #if defined (PETSC_USE_CTABLE) 387 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 388 col = (col - 1)/bs; 389 #else 390 col = (baij->colmap[in[j]] - 1)/bs; 391 #endif 392 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 393 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 394 col = in[j]; 395 } 396 } 397 else col = in[j]; 398 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 399 } 400 } 401 } else { 402 if (!baij->donotstash) { 403 if (roworiented) { 404 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 405 } else { 406 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 407 } 408 } 409 } 410 } 411 PetscFunctionReturn(0); 412 } 413 #define HASH_KEY 0.6180339887 414 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 415 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 416 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 417 #undef __FUNC__ 418 #define __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIBAIJ_HT" 419 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 420 { 421 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 422 int ierr,i,j,row,col; 423 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 424 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 425 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 426 PetscReal tmp; 427 MatScalar ** HD = baij->hd,value; 428 #if defined(PETSC_USE_BOPT_g) 429 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 430 #endif 431 432 PetscFunctionBegin; 433 434 for (i=0; i<m; i++) { 435 #if defined(PETSC_USE_BOPT_g) 436 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 437 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 438 #endif 439 row = im[i]; 440 if (row >= rstart_orig && row < rend_orig) { 441 for (j=0; j<n; j++) { 442 col = in[j]; 443 if (roworiented) value = (MatScalar)v[i*n+j]; else value = (MatScalar)v[i+j*m]; 444 /* Look up into the Hash Table */ 445 key = (row/bs)*Nbs+(col/bs)+1; 446 h1 = HASH(size,key,tmp); 447 448 449 idx = h1; 450 #if defined(PETSC_USE_BOPT_g) 451 insert_ct++; 452 total_ct++; 453 if (HT[idx] != key) { 454 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 455 if (idx == size) { 456 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 457 if (idx == h1) { 458 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 459 } 460 } 461 } 462 #else 463 if (HT[idx] != key) { 464 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 465 if (idx == size) { 466 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 467 if (idx == h1) { 468 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 469 } 470 } 471 } 472 #endif 473 /* A HASH table entry is found, so insert the values at the correct address */ 474 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 475 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 476 } 477 } else { 478 if (!baij->donotstash) { 479 if (roworiented) { 480 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 481 } else { 482 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 483 } 484 } 485 } 486 } 487 #if defined(PETSC_USE_BOPT_g) 488 baij->ht_total_ct = total_ct; 489 baij->ht_insert_ct = insert_ct; 490 #endif 491 PetscFunctionReturn(0); 492 } 493 494 #undef __FUNC__ 495 #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ_HT" 496 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 497 { 498 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 499 int ierr,i,j,ii,jj,row,col; 500 int roworiented = baij->roworiented,rstart=baij->rstart ; 501 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 502 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 503 PetscReal tmp; 504 MatScalar **HD = baij->hd,*baij_a; 505 Scalar *v_t,*value; 506 #if defined(PETSC_USE_BOPT_g) 507 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 508 #endif 509 510 PetscFunctionBegin; 511 512 if (roworiented) { 513 stepval = (n-1)*bs; 514 } else { 515 stepval = (m-1)*bs; 516 } 517 for (i=0; i<m; i++) { 518 #if defined(PETSC_USE_BOPT_g) 519 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 520 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 521 #endif 522 row = im[i]; 523 v_t = v + i*bs2; 524 if (row >= rstart && row < rend) { 525 for (j=0; j<n; j++) { 526 col = in[j]; 527 528 /* Look up into the Hash Table */ 529 key = row*Nbs+col+1; 530 h1 = HASH(size,key,tmp); 531 532 idx = h1; 533 #if defined(PETSC_USE_BOPT_g) 534 total_ct++; 535 insert_ct++; 536 if (HT[idx] != key) { 537 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 538 if (idx == size) { 539 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 540 if (idx == h1) { 541 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 542 } 543 } 544 } 545 #else 546 if (HT[idx] != key) { 547 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 548 if (idx == size) { 549 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 550 if (idx == h1) { 551 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 552 } 553 } 554 } 555 #endif 556 baij_a = HD[idx]; 557 if (roworiented) { 558 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 559 /* value = v + (i*(stepval+bs)+j)*bs; */ 560 value = v_t; 561 v_t += bs; 562 if (addv == ADD_VALUES) { 563 for (ii=0; ii<bs; ii++,value+=stepval) { 564 for (jj=ii; jj<bs2; jj+=bs) { 565 baij_a[jj] += *value++; 566 } 567 } 568 } else { 569 for (ii=0; ii<bs; ii++,value+=stepval) { 570 for (jj=ii; jj<bs2; jj+=bs) { 571 baij_a[jj] = *value++; 572 } 573 } 574 } 575 } else { 576 value = v + j*(stepval+bs)*bs + i*bs; 577 if (addv == ADD_VALUES) { 578 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 579 for (jj=0; jj<bs; jj++) { 580 baij_a[jj] += *value++; 581 } 582 } 583 } else { 584 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 585 for (jj=0; jj<bs; jj++) { 586 baij_a[jj] = *value++; 587 } 588 } 589 } 590 } 591 } 592 } else { 593 if (!baij->donotstash) { 594 if (roworiented) { 595 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 596 } else { 597 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 598 } 599 } 600 } 601 } 602 #if defined(PETSC_USE_BOPT_g) 603 baij->ht_total_ct = total_ct; 604 baij->ht_insert_ct = insert_ct; 605 #endif 606 PetscFunctionReturn(0); 607 } 608 609 #undef __FUNC__ 610 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIBAIJ" 611 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 612 { 613 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 614 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 615 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 616 617 PetscFunctionBegin; 618 for (i=0; i<m; i++) { 619 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 620 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 621 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 622 row = idxm[i] - bsrstart; 623 for (j=0; j<n; j++) { 624 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 625 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 626 if (idxn[j] >= bscstart && idxn[j] < bscend){ 627 col = idxn[j] - bscstart; 628 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 629 } else { 630 if (!baij->colmap) { 631 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 632 } 633 #if defined (PETSC_USE_CTABLE) 634 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 635 data --; 636 #else 637 data = baij->colmap[idxn[j]/bs]-1; 638 #endif 639 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 640 else { 641 col = data + idxn[j]%bs; 642 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 643 } 644 } 645 } 646 } else { 647 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 648 } 649 } 650 PetscFunctionReturn(0); 651 } 652 653 #undef __FUNC__ 654 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPIBAIJ" 655 int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *norm) 656 { 657 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 658 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 659 int ierr,i,bs2=baij->bs2; 660 PetscReal sum = 0.0; 661 MatScalar *v; 662 663 PetscFunctionBegin; 664 if (baij->size == 1) { 665 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 666 } else { 667 if (type == NORM_FROBENIUS) { 668 v = amat->a; 669 for (i=0; i<amat->nz*bs2; i++) { 670 #if defined(PETSC_USE_COMPLEX) 671 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 672 #else 673 sum += (*v)*(*v); v++; 674 #endif 675 } 676 v = bmat->a; 677 for (i=0; i<bmat->nz*bs2; i++) { 678 #if defined(PETSC_USE_COMPLEX) 679 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 680 #else 681 sum += (*v)*(*v); v++; 682 #endif 683 } 684 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 685 *norm = sqrt(*norm); 686 } else { 687 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 688 } 689 } 690 PetscFunctionReturn(0); 691 } 692 693 694 /* 695 Creates the hash table, and sets the table 696 This table is created only once. 697 If new entried need to be added to the matrix 698 then the hash table has to be destroyed and 699 recreated. 700 */ 701 #undef __FUNC__ 702 #define __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPIBAIJ_Private" 703 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 704 { 705 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 706 Mat A = baij->A,B=baij->B; 707 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 708 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 709 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 710 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 711 int *HT,key; 712 MatScalar **HD; 713 PetscReal tmp; 714 #if defined(PETSC_USE_BOPT_g) 715 int ct=0,max=0; 716 #endif 717 718 PetscFunctionBegin; 719 baij->ht_size=(int)(factor*nz); 720 size = baij->ht_size; 721 722 if (baij->ht) { 723 PetscFunctionReturn(0); 724 } 725 726 /* Allocate Memory for Hash Table */ 727 baij->hd = (MatScalar**)PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1);CHKPTRQ(baij->hd); 728 baij->ht = (int*)(baij->hd + size); 729 HD = baij->hd; 730 HT = baij->ht; 731 732 733 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr); 734 735 736 /* Loop Over A */ 737 for (i=0; i<a->mbs; i++) { 738 for (j=ai[i]; j<ai[i+1]; j++) { 739 row = i+rstart; 740 col = aj[j]+cstart; 741 742 key = row*Nbs + col + 1; 743 h1 = HASH(size,key,tmp); 744 for (k=0; k<size; k++){ 745 if (HT[(h1+k)%size] == 0.0) { 746 HT[(h1+k)%size] = key; 747 HD[(h1+k)%size] = a->a + j*bs2; 748 break; 749 #if defined(PETSC_USE_BOPT_g) 750 } else { 751 ct++; 752 #endif 753 } 754 } 755 #if defined(PETSC_USE_BOPT_g) 756 if (k> max) max = k; 757 #endif 758 } 759 } 760 /* Loop Over B */ 761 for (i=0; i<b->mbs; i++) { 762 for (j=bi[i]; j<bi[i+1]; j++) { 763 row = i+rstart; 764 col = garray[bj[j]]; 765 key = row*Nbs + col + 1; 766 h1 = HASH(size,key,tmp); 767 for (k=0; k<size; k++){ 768 if (HT[(h1+k)%size] == 0.0) { 769 HT[(h1+k)%size] = key; 770 HD[(h1+k)%size] = b->a + j*bs2; 771 break; 772 #if defined(PETSC_USE_BOPT_g) 773 } else { 774 ct++; 775 #endif 776 } 777 } 778 #if defined(PETSC_USE_BOPT_g) 779 if (k> max) max = k; 780 #endif 781 } 782 } 783 784 /* Print Summary */ 785 #if defined(PETSC_USE_BOPT_g) 786 for (i=0,j=0; i<size; i++) { 787 if (HT[i]) {j++;} 788 } 789 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max); 790 #endif 791 PetscFunctionReturn(0); 792 } 793 794 #undef __FUNC__ 795 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIBAIJ" 796 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 797 { 798 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 799 int ierr,nstash,reallocs; 800 InsertMode addv; 801 802 PetscFunctionBegin; 803 if (baij->donotstash) { 804 PetscFunctionReturn(0); 805 } 806 807 /* make sure all processors are either in INSERTMODE or ADDMODE */ 808 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 809 if (addv == (ADD_VALUES|INSERT_VALUES)) { 810 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 811 } 812 mat->insertmode = addv; /* in case this processor had no cache */ 813 814 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 815 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 816 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 817 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 818 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 819 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 820 PetscFunctionReturn(0); 821 } 822 823 #undef __FUNC__ 824 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIBAIJ" 825 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 826 { 827 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 828 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 829 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 830 int *row,*col,other_disassembled; 831 PetscTruth r1,r2,r3; 832 MatScalar *val; 833 InsertMode addv = mat->insertmode; 834 835 PetscFunctionBegin; 836 if (!baij->donotstash) { 837 while (1) { 838 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 839 if (!flg) break; 840 841 for (i=0; i<n;) { 842 /* Now identify the consecutive vals belonging to the same row */ 843 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 844 if (j < n) ncols = j-i; 845 else ncols = n-i; 846 /* Now assemble all these values with a single function call */ 847 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 848 i = j; 849 } 850 } 851 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 852 /* Now process the block-stash. Since the values are stashed column-oriented, 853 set the roworiented flag to column oriented, and after MatSetValues() 854 restore the original flags */ 855 r1 = baij->roworiented; 856 r2 = a->roworiented; 857 r3 = b->roworiented; 858 baij->roworiented = PETSC_FALSE; 859 a->roworiented = PETSC_FALSE; 860 b->roworiented = PETSC_FALSE; 861 while (1) { 862 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 863 if (!flg) break; 864 865 for (i=0; i<n;) { 866 /* Now identify the consecutive vals belonging to the same row */ 867 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 868 if (j < n) ncols = j-i; 869 else ncols = n-i; 870 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 871 i = j; 872 } 873 } 874 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 875 baij->roworiented = r1; 876 a->roworiented = r2; 877 b->roworiented = r3; 878 } 879 880 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 881 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 882 883 /* determine if any processor has disassembled, if so we must 884 also disassemble ourselfs, in order that we may reassemble. */ 885 /* 886 if nonzero structure of submatrix B cannot change then we know that 887 no processor disassembled thus we can skip this stuff 888 */ 889 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 890 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 891 if (mat->was_assembled && !other_disassembled) { 892 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 893 } 894 } 895 896 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 897 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 898 } 899 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 900 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 901 902 #if defined(PETSC_USE_BOPT_g) 903 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 904 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct); 905 baij->ht_total_ct = 0; 906 baij->ht_insert_ct = 0; 907 } 908 #endif 909 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 910 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 911 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 912 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 913 } 914 915 if (baij->rowvalues) { 916 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 917 baij->rowvalues = 0; 918 } 919 PetscFunctionReturn(0); 920 } 921 922 /* 923 #undef __FUNC__ 924 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ_Binary" 925 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 926 { 927 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 928 int ierr; 929 930 PetscFunctionBegin; 931 if (baij->size == 1) { 932 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 933 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 934 PetscFunctionReturn(0); 935 } 936 */ 937 938 #undef __FUNC__ 939 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ_ASCIIorDraworSocket" 940 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 941 { 942 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 943 int ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank; 944 PetscTruth isascii,isdraw; 945 946 PetscFunctionBegin; 947 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 948 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 949 if (isascii) { 950 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 951 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 952 MatInfo info; 953 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 954 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 955 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 956 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 957 baij->bs,(int)info.memory);CHKERRQ(ierr); 958 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 959 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 960 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 961 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 962 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 963 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 964 PetscFunctionReturn(0); 965 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 966 ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 967 PetscFunctionReturn(0); 968 } 969 } 970 971 if (isdraw) { 972 Draw draw; 973 PetscTruth isnull; 974 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 975 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 976 } 977 978 if (size == 1) { 979 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 980 } else { 981 /* assemble the entire matrix onto first processor. */ 982 Mat A; 983 Mat_SeqBAIJ *Aloc; 984 int M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 985 MatScalar *a; 986 #if defined(PETSC_USE_MAT_SINGLE) 987 Scalar *atmp = (Scalar*)PetscMalloc(baij->bs*sizeof(Scalar)); 988 int l; 989 #endif 990 991 if (!rank) { 992 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 993 } else { 994 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 995 } 996 PLogObjectParent(mat,A); 997 998 /* copy over the A part */ 999 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1000 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1001 rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1002 1003 for (i=0; i<mbs; i++) { 1004 rvals[0] = bs*(baij->rstart + i); 1005 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1006 for (j=ai[i]; j<ai[i+1]; j++) { 1007 col = (baij->cstart+aj[j])*bs; 1008 for (k=0; k<bs; k++) { 1009 #if defined(PETSC_USE_MAT_SINGLE) 1010 for (l=0; l<bs; l++) atmp[l] = a[l]; 1011 #else 1012 atmp = a; 1013 #endif 1014 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,atmp,INSERT_VALUES);CHKERRQ(ierr); 1015 col++; a += bs; 1016 } 1017 } 1018 } 1019 /* copy over the B part */ 1020 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1021 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1022 for (i=0; i<mbs; i++) { 1023 rvals[0] = bs*(baij->rstart + i); 1024 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1025 for (j=ai[i]; j<ai[i+1]; j++) { 1026 col = baij->garray[aj[j]]*bs; 1027 for (k=0; k<bs; k++) { 1028 #if defined(PETSC_USE_MAT_SINGLE) 1029 for (l=0; l<bs; l++) atmp[l] = a[l]; 1030 #else 1031 atmp = a; 1032 #endif 1033 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,atmp,INSERT_VALUES);CHKERRQ(ierr); 1034 col++; a += bs; 1035 } 1036 } 1037 } 1038 #if defined(PETSC_USE_MAT_SINGLE) 1039 ierr = PetscFree(atmp);CHKERRQ(ierr); 1040 #endif 1041 ierr = PetscFree(rvals);CHKERRQ(ierr); 1042 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1043 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1044 /* 1045 Everyone has to call to draw the matrix since the graphics waits are 1046 synchronized across all processors that share the Draw object 1047 */ 1048 if (!rank) { 1049 Viewer sviewer; 1050 ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1051 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1052 ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1053 } 1054 ierr = MatDestroy(A);CHKERRQ(ierr); 1055 } 1056 PetscFunctionReturn(0); 1057 } 1058 1059 1060 1061 #undef __FUNC__ 1062 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ" 1063 int MatView_MPIBAIJ(Mat mat,Viewer viewer) 1064 { 1065 int ierr; 1066 PetscTruth isascii,isdraw,issocket,isbinary; 1067 1068 PetscFunctionBegin; 1069 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 1070 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 1071 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 1072 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 1073 if (isascii || isdraw || issocket || isbinary) { 1074 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1075 } else { 1076 SETERRQ1(1,1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1077 } 1078 PetscFunctionReturn(0); 1079 } 1080 1081 #undef __FUNC__ 1082 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIBAIJ" 1083 int MatDestroy_MPIBAIJ(Mat mat) 1084 { 1085 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1086 int ierr; 1087 1088 PetscFunctionBegin; 1089 1090 if (mat->mapping) { 1091 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 1092 } 1093 if (mat->bmapping) { 1094 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 1095 } 1096 if (mat->rmap) { 1097 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 1098 } 1099 if (mat->cmap) { 1100 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 1101 } 1102 #if defined(PETSC_USE_LOG) 1103 PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N); 1104 #endif 1105 1106 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1107 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1108 1109 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1110 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1111 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1112 #if defined (PETSC_USE_CTABLE) 1113 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1114 #else 1115 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1116 #endif 1117 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1118 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1119 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1120 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1121 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1122 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1123 ierr = PetscFree(baij);CHKERRQ(ierr); 1124 PLogObjectDestroy(mat); 1125 PetscHeaderDestroy(mat); 1126 PetscFunctionReturn(0); 1127 } 1128 1129 #undef __FUNC__ 1130 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPIBAIJ" 1131 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1132 { 1133 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1134 int ierr,nt; 1135 1136 PetscFunctionBegin; 1137 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1138 if (nt != a->n) { 1139 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1140 } 1141 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1142 if (nt != a->m) { 1143 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1144 } 1145 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1146 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1147 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1148 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1149 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1150 PetscFunctionReturn(0); 1151 } 1152 1153 #undef __FUNC__ 1154 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIBAIJ" 1155 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1156 { 1157 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1158 int ierr; 1159 1160 PetscFunctionBegin; 1161 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1162 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1163 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1164 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1165 PetscFunctionReturn(0); 1166 } 1167 1168 #undef __FUNC__ 1169 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIBAIJ" 1170 int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1171 { 1172 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1173 int ierr; 1174 1175 PetscFunctionBegin; 1176 /* do nondiagonal part */ 1177 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1178 /* send it on its way */ 1179 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1180 /* do local part */ 1181 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1182 /* receive remote parts: note this assumes the values are not actually */ 1183 /* inserted in yy until the next line, which is true for my implementation*/ 1184 /* but is not perhaps always true. */ 1185 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1186 PetscFunctionReturn(0); 1187 } 1188 1189 #undef __FUNC__ 1190 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIBAIJ" 1191 int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1192 { 1193 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1194 int ierr; 1195 1196 PetscFunctionBegin; 1197 /* do nondiagonal part */ 1198 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1199 /* send it on its way */ 1200 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1201 /* do local part */ 1202 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1203 /* receive remote parts: note this assumes the values are not actually */ 1204 /* inserted in yy until the next line, which is true for my implementation*/ 1205 /* but is not perhaps always true. */ 1206 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1207 PetscFunctionReturn(0); 1208 } 1209 1210 /* 1211 This only works correctly for square matrices where the subblock A->A is the 1212 diagonal block 1213 */ 1214 #undef __FUNC__ 1215 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIBAIJ" 1216 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1217 { 1218 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1219 int ierr; 1220 1221 PetscFunctionBegin; 1222 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1223 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 #undef __FUNC__ 1228 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPIBAIJ" 1229 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1230 { 1231 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1232 int ierr; 1233 1234 PetscFunctionBegin; 1235 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1236 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 #undef __FUNC__ 1241 #define __FUNC__ /*<a name=""></a>*/"MatGetSize_MPIBAIJ" 1242 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1243 { 1244 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1245 1246 PetscFunctionBegin; 1247 if (m) *m = mat->M; 1248 if (n) *n = mat->N; 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNC__ 1253 #define __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPIBAIJ" 1254 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1255 { 1256 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1257 1258 PetscFunctionBegin; 1259 *m = mat->m; *n = mat->n; 1260 PetscFunctionReturn(0); 1261 } 1262 1263 #undef __FUNC__ 1264 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIBAIJ" 1265 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1266 { 1267 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1268 1269 PetscFunctionBegin; 1270 if (m) *m = mat->rstart*mat->bs; 1271 if (n) *n = mat->rend*mat->bs; 1272 PetscFunctionReturn(0); 1273 } 1274 1275 #undef __FUNC__ 1276 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIBAIJ" 1277 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1278 { 1279 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1280 Scalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1281 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1282 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1283 int *cmap,*idx_p,cstart = mat->cstart; 1284 1285 PetscFunctionBegin; 1286 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1287 mat->getrowactive = PETSC_TRUE; 1288 1289 if (!mat->rowvalues && (idx || v)) { 1290 /* 1291 allocate enough space to hold information from the longest row. 1292 */ 1293 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1294 int max = 1,mbs = mat->mbs,tmp; 1295 for (i=0; i<mbs; i++) { 1296 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1297 if (max < tmp) { max = tmp; } 1298 } 1299 mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1300 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1301 } 1302 1303 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1304 lrow = row - brstart; 1305 1306 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1307 if (!v) {pvA = 0; pvB = 0;} 1308 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1309 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1310 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1311 nztot = nzA + nzB; 1312 1313 cmap = mat->garray; 1314 if (v || idx) { 1315 if (nztot) { 1316 /* Sort by increasing column numbers, assuming A and B already sorted */ 1317 int imark = -1; 1318 if (v) { 1319 *v = v_p = mat->rowvalues; 1320 for (i=0; i<nzB; i++) { 1321 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1322 else break; 1323 } 1324 imark = i; 1325 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1326 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1327 } 1328 if (idx) { 1329 *idx = idx_p = mat->rowindices; 1330 if (imark > -1) { 1331 for (i=0; i<imark; i++) { 1332 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1333 } 1334 } else { 1335 for (i=0; i<nzB; i++) { 1336 if (cmap[cworkB[i]/bs] < cstart) 1337 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1338 else break; 1339 } 1340 imark = i; 1341 } 1342 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1343 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1344 } 1345 } else { 1346 if (idx) *idx = 0; 1347 if (v) *v = 0; 1348 } 1349 } 1350 *nz = nztot; 1351 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1352 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1353 PetscFunctionReturn(0); 1354 } 1355 1356 #undef __FUNC__ 1357 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIBAIJ" 1358 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1359 { 1360 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1361 1362 PetscFunctionBegin; 1363 if (baij->getrowactive == PETSC_FALSE) { 1364 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1365 } 1366 baij->getrowactive = PETSC_FALSE; 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #undef __FUNC__ 1371 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIBAIJ" 1372 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1373 { 1374 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1375 1376 PetscFunctionBegin; 1377 *bs = baij->bs; 1378 PetscFunctionReturn(0); 1379 } 1380 1381 #undef __FUNC__ 1382 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIBAIJ" 1383 int MatZeroEntries_MPIBAIJ(Mat A) 1384 { 1385 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1386 int ierr; 1387 1388 PetscFunctionBegin; 1389 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1390 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1391 PetscFunctionReturn(0); 1392 } 1393 1394 #undef __FUNC__ 1395 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIBAIJ" 1396 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1397 { 1398 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1399 Mat A = a->A,B = a->B; 1400 int ierr; 1401 PetscReal isend[5],irecv[5]; 1402 1403 PetscFunctionBegin; 1404 info->block_size = (double)a->bs; 1405 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1406 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1407 isend[3] = info->memory; isend[4] = info->mallocs; 1408 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1409 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1410 isend[3] += info->memory; isend[4] += info->mallocs; 1411 if (flag == MAT_LOCAL) { 1412 info->nz_used = isend[0]; 1413 info->nz_allocated = isend[1]; 1414 info->nz_unneeded = isend[2]; 1415 info->memory = isend[3]; 1416 info->mallocs = isend[4]; 1417 } else if (flag == MAT_GLOBAL_MAX) { 1418 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1419 info->nz_used = irecv[0]; 1420 info->nz_allocated = irecv[1]; 1421 info->nz_unneeded = irecv[2]; 1422 info->memory = irecv[3]; 1423 info->mallocs = irecv[4]; 1424 } else if (flag == MAT_GLOBAL_SUM) { 1425 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1426 info->nz_used = irecv[0]; 1427 info->nz_allocated = irecv[1]; 1428 info->nz_unneeded = irecv[2]; 1429 info->memory = irecv[3]; 1430 info->mallocs = irecv[4]; 1431 } else { 1432 SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 1433 } 1434 info->rows_global = (double)a->M; 1435 info->columns_global = (double)a->N; 1436 info->rows_local = (double)a->m; 1437 info->columns_local = (double)a->N; 1438 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1439 info->fill_ratio_needed = 0; 1440 info->factor_mallocs = 0; 1441 PetscFunctionReturn(0); 1442 } 1443 1444 #undef __FUNC__ 1445 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIBAIJ" 1446 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1447 { 1448 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1449 int ierr; 1450 1451 PetscFunctionBegin; 1452 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1453 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1454 op == MAT_COLUMNS_UNSORTED || 1455 op == MAT_COLUMNS_SORTED || 1456 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1457 op == MAT_KEEP_ZEROED_ROWS || 1458 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1459 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1460 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1461 } else if (op == MAT_ROW_ORIENTED) { 1462 a->roworiented = PETSC_TRUE; 1463 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1464 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1465 } else if (op == MAT_ROWS_SORTED || 1466 op == MAT_ROWS_UNSORTED || 1467 op == MAT_SYMMETRIC || 1468 op == MAT_STRUCTURALLY_SYMMETRIC || 1469 op == MAT_YES_NEW_DIAGONALS || 1470 op == MAT_USE_HASH_TABLE) { 1471 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1472 } else if (op == MAT_COLUMN_ORIENTED) { 1473 a->roworiented = PETSC_FALSE; 1474 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1475 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1476 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1477 a->donotstash = PETSC_TRUE; 1478 } else if (op == MAT_NO_NEW_DIAGONALS) { 1479 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1480 } else if (op == MAT_USE_HASH_TABLE) { 1481 a->ht_flag = PETSC_TRUE; 1482 } else { 1483 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1484 } 1485 PetscFunctionReturn(0); 1486 } 1487 1488 #undef __FUNC__ 1489 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIBAIJ(" 1490 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1491 { 1492 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1493 Mat_SeqBAIJ *Aloc; 1494 Mat B; 1495 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1496 int bs=baij->bs,mbs=baij->mbs; 1497 MatScalar *a; 1498 #if defined(PETSC_USE_MAT_SINGLE) 1499 Scalar *atmp = (Scalar*)PetscMalloc(baij->bs*sizeof(Scalar)); 1500 int l; 1501 #endif 1502 1503 PetscFunctionBegin; 1504 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1505 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1506 1507 /* copy over the A part */ 1508 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1509 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1510 rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1511 1512 for (i=0; i<mbs; i++) { 1513 rvals[0] = bs*(baij->rstart + i); 1514 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1515 for (j=ai[i]; j<ai[i+1]; j++) { 1516 col = (baij->cstart+aj[j])*bs; 1517 for (k=0; k<bs; k++) { 1518 #if defined(PETSC_USE_MAT_SINGLE) 1519 for (l=0; l<bs; l++) atmp[l] = a[l]; 1520 #else 1521 atmp = a; 1522 #endif 1523 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,atmp,INSERT_VALUES);CHKERRQ(ierr); 1524 col++; a += bs; 1525 } 1526 } 1527 } 1528 /* copy over the B part */ 1529 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1530 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1531 for (i=0; i<mbs; i++) { 1532 rvals[0] = bs*(baij->rstart + i); 1533 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1534 for (j=ai[i]; j<ai[i+1]; j++) { 1535 col = baij->garray[aj[j]]*bs; 1536 for (k=0; k<bs; k++) { 1537 #if defined(PETSC_USE_MAT_SINGLE) 1538 for (l=0; l<bs; l++) atmp[l] = a[l]; 1539 #else 1540 atmp = a; 1541 #endif 1542 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,atmp,INSERT_VALUES);CHKERRQ(ierr); 1543 col++; a += bs; 1544 } 1545 } 1546 } 1547 #if defined(PETSC_USE_MAT_SINGLE) 1548 ierr = PetscFree(atmp);CHKERRQ(ierr); 1549 #endif 1550 ierr = PetscFree(rvals);CHKERRQ(ierr); 1551 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1552 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1553 1554 if (matout) { 1555 *matout = B; 1556 } else { 1557 PetscOps *Abops; 1558 MatOps Aops; 1559 1560 /* This isn't really an in-place transpose .... but free data structures from baij */ 1561 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1562 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1563 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1564 #if defined (PETSC_USE_CTABLE) 1565 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1566 #else 1567 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1568 #endif 1569 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1570 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1571 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1572 ierr = PetscFree(baij);CHKERRQ(ierr); 1573 1574 /* 1575 This is horrible, horrible code. We need to keep the 1576 A pointers for the bops and ops but copy everything 1577 else from C. 1578 */ 1579 Abops = A->bops; 1580 Aops = A->ops; 1581 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1582 A->bops = Abops; 1583 A->ops = Aops; 1584 1585 PetscHeaderDestroy(B); 1586 } 1587 PetscFunctionReturn(0); 1588 } 1589 1590 #undef __FUNC__ 1591 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIBAIJ" 1592 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1593 { 1594 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1595 Mat a = baij->A,b = baij->B; 1596 int ierr,s1,s2,s3; 1597 1598 PetscFunctionBegin; 1599 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1600 if (rr) { 1601 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1602 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1603 /* Overlap communication with computation. */ 1604 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1605 } 1606 if (ll) { 1607 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1608 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1609 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1610 } 1611 /* scale the diagonal block */ 1612 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1613 1614 if (rr) { 1615 /* Do a scatter end and then right scale the off-diagonal block */ 1616 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1617 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1618 } 1619 1620 PetscFunctionReturn(0); 1621 } 1622 1623 #undef __FUNC__ 1624 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIBAIJ" 1625 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1626 { 1627 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1628 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1629 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1630 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1631 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1632 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1633 MPI_Comm comm = A->comm; 1634 MPI_Request *send_waits,*recv_waits; 1635 MPI_Status recv_status,*send_status; 1636 IS istmp; 1637 1638 PetscFunctionBegin; 1639 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 1640 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1641 1642 /* first count number of contributors to each processor */ 1643 nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs); 1644 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1645 procs = nprocs + size; 1646 owner = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 1647 for (i=0; i<N; i++) { 1648 idx = rows[i]; 1649 found = 0; 1650 for (j=0; j<size; j++) { 1651 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1652 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1653 } 1654 } 1655 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1656 } 1657 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 1658 1659 /* inform other processors of number of messages and max length*/ 1660 work = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work); 1661 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 1662 nmax = work[rank]; 1663 nrecvs = work[size+rank]; 1664 ierr = PetscFree(work);CHKERRQ(ierr); 1665 1666 /* post receives: */ 1667 rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 1668 recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1669 for (i=0; i<nrecvs; i++) { 1670 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1671 } 1672 1673 /* do sends: 1674 1) starts[i] gives the starting index in svalues for stuff going to 1675 the ith processor 1676 */ 1677 svalues = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues); 1678 send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1679 starts = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts); 1680 starts[0] = 0; 1681 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1682 for (i=0; i<N; i++) { 1683 svalues[starts[owner[i]]++] = rows[i]; 1684 } 1685 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1686 1687 starts[0] = 0; 1688 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1689 count = 0; 1690 for (i=0; i<size; i++) { 1691 if (procs[i]) { 1692 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1693 } 1694 } 1695 ierr = PetscFree(starts);CHKERRQ(ierr); 1696 1697 base = owners[rank]*bs; 1698 1699 /* wait on receives */ 1700 lens = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens); 1701 source = lens + nrecvs; 1702 count = nrecvs; slen = 0; 1703 while (count) { 1704 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1705 /* unpack receives into our local space */ 1706 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1707 source[imdex] = recv_status.MPI_SOURCE; 1708 lens[imdex] = n; 1709 slen += n; 1710 count--; 1711 } 1712 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1713 1714 /* move the data into the send scatter */ 1715 lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows); 1716 count = 0; 1717 for (i=0; i<nrecvs; i++) { 1718 values = rvalues + i*nmax; 1719 for (j=0; j<lens[i]; j++) { 1720 lrows[count++] = values[j] - base; 1721 } 1722 } 1723 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1724 ierr = PetscFree(lens);CHKERRQ(ierr); 1725 ierr = PetscFree(owner);CHKERRQ(ierr); 1726 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1727 1728 /* actually zap the local rows */ 1729 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1730 PLogObjectParent(A,istmp); 1731 1732 /* 1733 Zero the required rows. If the "diagonal block" of the matrix 1734 is square and the user wishes to set the diagonal we use seperate 1735 code so that MatSetValues() is not called for each diagonal allocating 1736 new memory, thus calling lots of mallocs and slowing things down. 1737 1738 Contributed by: Mathew Knepley 1739 */ 1740 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1741 ierr = MatZeroRows_SeqAIJ(l->B,istmp,0);CHKERRQ(ierr); 1742 if (diag && (l->A->M == l->A->N)) { 1743 ierr = MatZeroRows_SeqAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1744 } else if (diag) { 1745 ierr = MatZeroRows_SeqAIJ(l->A,istmp,0);CHKERRQ(ierr); 1746 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1747 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1748 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1749 } 1750 for (i = 0; i < slen; i++) { 1751 row = lrows[i] + rstart_bs; 1752 ierr = MatSetValues_MPIBAIJ(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1753 } 1754 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1755 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1756 } else { 1757 ierr = MatZeroRows_SeqAIJ(l->A,istmp,0);CHKERRQ(ierr); 1758 } 1759 1760 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1761 ierr = PetscFree(lrows);CHKERRQ(ierr); 1762 1763 /* wait on sends */ 1764 if (nsends) { 1765 send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1766 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1767 ierr = PetscFree(send_status);CHKERRQ(ierr); 1768 } 1769 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1770 ierr = PetscFree(svalues);CHKERRQ(ierr); 1771 1772 PetscFunctionReturn(0); 1773 } 1774 1775 #undef __FUNC__ 1776 #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPIBAIJ" 1777 int MatPrintHelp_MPIBAIJ(Mat A) 1778 { 1779 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1780 MPI_Comm comm = A->comm; 1781 static int called = 0; 1782 int ierr; 1783 1784 PetscFunctionBegin; 1785 if (!a->rank) { 1786 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1787 } 1788 if (called) {PetscFunctionReturn(0);} else called = 1; 1789 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1790 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1791 PetscFunctionReturn(0); 1792 } 1793 1794 #undef __FUNC__ 1795 #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPIBAIJ" 1796 int MatSetUnfactored_MPIBAIJ(Mat A) 1797 { 1798 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1799 int ierr; 1800 1801 PetscFunctionBegin; 1802 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1803 PetscFunctionReturn(0); 1804 } 1805 1806 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1807 1808 #undef __FUNC__ 1809 #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPIBAIJ" 1810 int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1811 { 1812 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1813 Mat a,b,c,d; 1814 PetscTruth flg; 1815 int ierr; 1816 1817 PetscFunctionBegin; 1818 if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1819 a = matA->A; b = matA->B; 1820 c = matB->A; d = matB->B; 1821 1822 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1823 if (flg == PETSC_TRUE) { 1824 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1825 } 1826 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1827 PetscFunctionReturn(0); 1828 } 1829 1830 /* -------------------------------------------------------------------*/ 1831 static struct _MatOps MatOps_Values = { 1832 MatSetValues_MPIBAIJ, 1833 MatGetRow_MPIBAIJ, 1834 MatRestoreRow_MPIBAIJ, 1835 MatMult_MPIBAIJ, 1836 MatMultAdd_MPIBAIJ, 1837 MatMultTranspose_MPIBAIJ, 1838 MatMultTransposeAdd_MPIBAIJ, 1839 0, 1840 0, 1841 0, 1842 0, 1843 0, 1844 0, 1845 0, 1846 MatTranspose_MPIBAIJ, 1847 MatGetInfo_MPIBAIJ, 1848 MatEqual_MPIBAIJ, 1849 MatGetDiagonal_MPIBAIJ, 1850 MatDiagonalScale_MPIBAIJ, 1851 MatNorm_MPIBAIJ, 1852 MatAssemblyBegin_MPIBAIJ, 1853 MatAssemblyEnd_MPIBAIJ, 1854 0, 1855 MatSetOption_MPIBAIJ, 1856 MatZeroEntries_MPIBAIJ, 1857 MatZeroRows_MPIBAIJ, 1858 0, 1859 0, 1860 0, 1861 0, 1862 MatGetSize_MPIBAIJ, 1863 MatGetLocalSize_MPIBAIJ, 1864 MatGetOwnershipRange_MPIBAIJ, 1865 0, 1866 0, 1867 0, 1868 0, 1869 MatDuplicate_MPIBAIJ, 1870 0, 1871 0, 1872 0, 1873 0, 1874 0, 1875 MatGetSubMatrices_MPIBAIJ, 1876 MatIncreaseOverlap_MPIBAIJ, 1877 MatGetValues_MPIBAIJ, 1878 0, 1879 MatPrintHelp_MPIBAIJ, 1880 MatScale_MPIBAIJ, 1881 0, 1882 0, 1883 0, 1884 MatGetBlockSize_MPIBAIJ, 1885 0, 1886 0, 1887 0, 1888 0, 1889 0, 1890 0, 1891 MatSetUnfactored_MPIBAIJ, 1892 0, 1893 MatSetValuesBlocked_MPIBAIJ, 1894 0, 1895 0, 1896 0, 1897 MatGetMaps_Petsc}; 1898 1899 1900 EXTERN_C_BEGIN 1901 #undef __FUNC__ 1902 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPIBAIJ" 1903 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1904 { 1905 PetscFunctionBegin; 1906 *a = ((Mat_MPIBAIJ *)A->data)->A; 1907 *iscopy = PETSC_FALSE; 1908 PetscFunctionReturn(0); 1909 } 1910 EXTERN_C_END 1911 1912 #undef __FUNC__ 1913 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPIBAIJ" 1914 /*@C 1915 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1916 (block compressed row). For good matrix assembly performance 1917 the user should preallocate the matrix storage by setting the parameters 1918 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1919 performance can be increased by more than a factor of 50. 1920 1921 Collective on MPI_Comm 1922 1923 Input Parameters: 1924 + comm - MPI communicator 1925 . bs - size of blockk 1926 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1927 This value should be the same as the local size used in creating the 1928 y vector for the matrix-vector product y = Ax. 1929 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1930 This value should be the same as the local size used in creating the 1931 x vector for the matrix-vector product y = Ax. 1932 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1933 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1934 . d_nz - number of block nonzeros per block row in diagonal portion of local 1935 submatrix (same for all local rows) 1936 . d_nnz - array containing the number of block nonzeros in the various block rows 1937 of the in diagonal portion of the local (possibly different for each block 1938 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1939 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1940 submatrix (same for all local rows). 1941 - o_nnz - array containing the number of nonzeros in the various block rows of the 1942 off-diagonal portion of the local submatrix (possibly different for 1943 each block row) or PETSC_NULL. 1944 1945 Output Parameter: 1946 . A - the matrix 1947 1948 Options Database Keys: 1949 . -mat_no_unroll - uses code that does not unroll the loops in the 1950 block calculations (much slower) 1951 . -mat_block_size - size of the blocks to use 1952 . -mat_mpi - use the parallel matrix data structures even on one processor 1953 (defaults to using SeqBAIJ format on one processor) 1954 1955 Notes: 1956 The user MUST specify either the local or global matrix dimensions 1957 (possibly both). 1958 1959 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1960 than it must be used on all processors that share the object for that argument. 1961 1962 Storage Information: 1963 For a square global matrix we define each processor's diagonal portion 1964 to be its local rows and the corresponding columns (a square submatrix); 1965 each processor's off-diagonal portion encompasses the remainder of the 1966 local matrix (a rectangular submatrix). 1967 1968 The user can specify preallocated storage for the diagonal part of 1969 the local submatrix with either d_nz or d_nnz (not both). Set 1970 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1971 memory allocation. Likewise, specify preallocated storage for the 1972 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1973 1974 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1975 the figure below we depict these three local rows and all columns (0-11). 1976 1977 .vb 1978 0 1 2 3 4 5 6 7 8 9 10 11 1979 ------------------- 1980 row 3 | o o o d d d o o o o o o 1981 row 4 | o o o d d d o o o o o o 1982 row 5 | o o o d d d o o o o o o 1983 ------------------- 1984 .ve 1985 1986 Thus, any entries in the d locations are stored in the d (diagonal) 1987 submatrix, and any entries in the o locations are stored in the 1988 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1989 stored simply in the MATSEQBAIJ format for compressed row storage. 1990 1991 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1992 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1993 In general, for PDE problems in which most nonzeros are near the diagonal, 1994 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1995 or you will get TERRIBLE performance; see the users' manual chapter on 1996 matrices. 1997 1998 Level: intermediate 1999 2000 .keywords: matrix, block, aij, compressed row, sparse, parallel 2001 2002 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2003 @*/ 2004 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2005 { 2006 Mat B; 2007 Mat_MPIBAIJ *b; 2008 int ierr,i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 2009 PetscTruth flag1,flag2,flg; 2010 2011 PetscFunctionBegin; 2012 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2013 2014 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 2015 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -1: value %d",d_nz); 2016 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -1: value %d",o_nz); 2017 if (d_nnz) { 2018 for (i=0; i<m/bs; i++) { 2019 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 2020 } 2021 } 2022 if (o_nnz) { 2023 for (i=0; i<m/bs; i++) { 2024 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 2025 } 2026 } 2027 2028 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2029 ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr); 2030 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 2031 if (!flag1 && !flag2 && size == 1) { 2032 if (M == PETSC_DECIDE) M = m; 2033 if (N == PETSC_DECIDE) N = n; 2034 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 2035 PetscFunctionReturn(0); 2036 } 2037 2038 *A = 0; 2039 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 2040 PLogObjectCreate(B); 2041 B->data = (void*)(b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b); 2042 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2043 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2044 2045 B->ops->destroy = MatDestroy_MPIBAIJ; 2046 B->ops->view = MatView_MPIBAIJ; 2047 B->mapping = 0; 2048 B->factor = 0; 2049 B->assembled = PETSC_FALSE; 2050 2051 B->insertmode = NOT_SET_VALUES; 2052 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 2053 ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr); 2054 2055 if (m == PETSC_DECIDE && (d_nnz || o_nnz)) { 2056 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 2057 } 2058 if (M == PETSC_DECIDE && m == PETSC_DECIDE) { 2059 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 2060 } 2061 if (N == PETSC_DECIDE && n == PETSC_DECIDE) { 2062 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 2063 } 2064 if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 2065 if (N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 2066 2067 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 2068 work[0] = m; work[1] = n; 2069 mbs = m/bs; nbs = n/bs; 2070 ierr = MPI_Allreduce(work,sum,2,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2071 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 2072 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 2073 } 2074 if (m == PETSC_DECIDE) { 2075 Mbs = M/bs; 2076 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 2077 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 2078 m = mbs*bs; 2079 } 2080 if (n == PETSC_DECIDE) { 2081 Nbs = N/bs; 2082 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 2083 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 2084 n = nbs*bs; 2085 } 2086 if (mbs*bs != m || nbs*bs != n) { 2087 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 2088 } 2089 2090 b->m = m; B->m = m; 2091 b->n = n; B->n = n; 2092 b->N = N; B->N = N; 2093 b->M = M; B->M = M; 2094 b->bs = bs; 2095 b->bs2 = bs*bs; 2096 b->mbs = mbs; 2097 b->nbs = nbs; 2098 b->Mbs = Mbs; 2099 b->Nbs = Nbs; 2100 2101 /* the information in the maps duplicates the information computed below, eventually 2102 we should remove the duplicate information that is not contained in the maps */ 2103 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 2104 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 2105 2106 /* build local table of row and column ownerships */ 2107 b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 2108 PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2109 b->cowners = b->rowners + b->size + 2; 2110 b->rowners_bs = b->cowners + b->size + 2; 2111 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2112 b->rowners[0] = 0; 2113 for (i=2; i<=b->size; i++) { 2114 b->rowners[i] += b->rowners[i-1]; 2115 } 2116 for (i=0; i<=b->size; i++) { 2117 b->rowners_bs[i] = b->rowners[i]*bs; 2118 } 2119 b->rstart = b->rowners[b->rank]; 2120 b->rend = b->rowners[b->rank+1]; 2121 b->rstart_bs = b->rstart * bs; 2122 b->rend_bs = b->rend * bs; 2123 2124 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2125 b->cowners[0] = 0; 2126 for (i=2; i<=b->size; i++) { 2127 b->cowners[i] += b->cowners[i-1]; 2128 } 2129 b->cstart = b->cowners[b->rank]; 2130 b->cend = b->cowners[b->rank+1]; 2131 b->cstart_bs = b->cstart * bs; 2132 b->cend_bs = b->cend * bs; 2133 2134 2135 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2136 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2137 PLogObjectParent(B,b->A); 2138 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2139 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2140 PLogObjectParent(B,b->B); 2141 2142 /* build cache for off array entries formed */ 2143 ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr); 2144 ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr); 2145 b->donotstash = PETSC_FALSE; 2146 b->colmap = PETSC_NULL; 2147 b->garray = PETSC_NULL; 2148 b->roworiented = PETSC_TRUE; 2149 2150 /* stuff used in block assembly */ 2151 b->barray = 0; 2152 2153 /* stuff used for matrix vector multiply */ 2154 b->lvec = 0; 2155 b->Mvctx = 0; 2156 2157 /* stuff for MatGetRow() */ 2158 b->rowindices = 0; 2159 b->rowvalues = 0; 2160 b->getrowactive = PETSC_FALSE; 2161 2162 /* hash table stuff */ 2163 b->ht = 0; 2164 b->hd = 0; 2165 b->ht_size = 0; 2166 b->ht_flag = PETSC_FALSE; 2167 b->ht_fact = 0; 2168 b->ht_total_ct = 0; 2169 b->ht_insert_ct = 0; 2170 2171 *A = B; 2172 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2173 if (flg) { 2174 double fact = 1.39; 2175 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2176 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2177 if (fact <= 1.0) fact = 1.39; 2178 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2179 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2180 } 2181 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2182 "MatStoreValues_MPIBAIJ", 2183 (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2184 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2185 "MatRetrieveValues_MPIBAIJ", 2186 (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2187 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2188 "MatGetDiagonalBlock_MPIBAIJ", 2189 (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2190 PetscFunctionReturn(0); 2191 } 2192 2193 #undef __FUNC__ 2194 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIBAIJ" 2195 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2196 { 2197 Mat mat; 2198 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2199 int ierr,len=0; 2200 PetscTruth flg; 2201 2202 PetscFunctionBegin; 2203 *newmat = 0; 2204 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 2205 PLogObjectCreate(mat); 2206 mat->data = (void*)(a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a); 2207 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2208 mat->ops->destroy = MatDestroy_MPIBAIJ; 2209 mat->ops->view = MatView_MPIBAIJ; 2210 mat->factor = matin->factor; 2211 mat->assembled = PETSC_TRUE; 2212 mat->insertmode = NOT_SET_VALUES; 2213 2214 a->m = mat->m = oldmat->m; 2215 a->n = mat->n = oldmat->n; 2216 a->M = mat->M = oldmat->M; 2217 a->N = mat->N = oldmat->N; 2218 2219 a->bs = oldmat->bs; 2220 a->bs2 = oldmat->bs2; 2221 a->mbs = oldmat->mbs; 2222 a->nbs = oldmat->nbs; 2223 a->Mbs = oldmat->Mbs; 2224 a->Nbs = oldmat->Nbs; 2225 2226 a->rstart = oldmat->rstart; 2227 a->rend = oldmat->rend; 2228 a->cstart = oldmat->cstart; 2229 a->cend = oldmat->cend; 2230 a->size = oldmat->size; 2231 a->rank = oldmat->rank; 2232 a->donotstash = oldmat->donotstash; 2233 a->roworiented = oldmat->roworiented; 2234 a->rowindices = 0; 2235 a->rowvalues = 0; 2236 a->getrowactive = PETSC_FALSE; 2237 a->barray = 0; 2238 a->rstart_bs = oldmat->rstart_bs; 2239 a->rend_bs = oldmat->rend_bs; 2240 a->cstart_bs = oldmat->cstart_bs; 2241 a->cend_bs = oldmat->cend_bs; 2242 2243 /* hash table stuff */ 2244 a->ht = 0; 2245 a->hd = 0; 2246 a->ht_size = 0; 2247 a->ht_flag = oldmat->ht_flag; 2248 a->ht_fact = oldmat->ht_fact; 2249 a->ht_total_ct = 0; 2250 a->ht_insert_ct = 0; 2251 2252 2253 a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 2254 PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2255 a->cowners = a->rowners + a->size + 2; 2256 a->rowners_bs = a->cowners + a->size + 2; 2257 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2258 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2259 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2260 if (oldmat->colmap) { 2261 #if defined (PETSC_USE_CTABLE) 2262 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2263 #else 2264 a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2265 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2266 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2267 #endif 2268 } else a->colmap = 0; 2269 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2270 a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray); 2271 PLogObjectMemory(mat,len*sizeof(int)); 2272 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2273 } else a->garray = 0; 2274 2275 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2276 PLogObjectParent(mat,a->lvec); 2277 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2278 2279 PLogObjectParent(mat,a->Mvctx); 2280 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2281 PLogObjectParent(mat,a->A); 2282 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2283 PLogObjectParent(mat,a->B); 2284 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 2285 ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2286 if (flg) { 2287 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 2288 } 2289 *newmat = mat; 2290 PetscFunctionReturn(0); 2291 } 2292 2293 #include "sys.h" 2294 2295 #undef __FUNC__ 2296 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIBAIJ" 2297 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2298 { 2299 Mat A; 2300 int i,nz,ierr,j,rstart,rend,fd; 2301 Scalar *vals,*buf; 2302 MPI_Comm comm = ((PetscObject)viewer)->comm; 2303 MPI_Status status; 2304 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2305 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2306 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2307 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2308 int dcount,kmax,k,nzcount,tmp; 2309 2310 PetscFunctionBegin; 2311 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2312 2313 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2314 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2315 if (!rank) { 2316 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2317 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2318 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2319 if (header[3] < 0) { 2320 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2321 } 2322 } 2323 2324 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2325 M = header[1]; N = header[2]; 2326 2327 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2328 2329 /* 2330 This code adds extra rows to make sure the number of rows is 2331 divisible by the blocksize 2332 */ 2333 Mbs = M/bs; 2334 extra_rows = bs - M + bs*(Mbs); 2335 if (extra_rows == bs) extra_rows = 0; 2336 else Mbs++; 2337 if (extra_rows &&!rank) { 2338 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2339 } 2340 2341 /* determine ownership of all rows */ 2342 mbs = Mbs/size + ((Mbs % size) > rank); 2343 m = mbs * bs; 2344 rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners); 2345 browners = rowners + size + 1; 2346 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2347 rowners[0] = 0; 2348 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2349 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2350 rstart = rowners[rank]; 2351 rend = rowners[rank+1]; 2352 2353 /* distribute row lengths to all processors */ 2354 locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens); 2355 if (!rank) { 2356 rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2357 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2358 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2359 sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts); 2360 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2361 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2362 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2363 } else { 2364 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2365 } 2366 2367 if (!rank) { 2368 /* calculate the number of nonzeros on each processor */ 2369 procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz); 2370 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2371 for (i=0; i<size; i++) { 2372 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2373 procsnz[i] += rowlengths[j]; 2374 } 2375 } 2376 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2377 2378 /* determine max buffer needed and allocate it */ 2379 maxnz = 0; 2380 for (i=0; i<size; i++) { 2381 maxnz = PetscMax(maxnz,procsnz[i]); 2382 } 2383 cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols); 2384 2385 /* read in my part of the matrix column indices */ 2386 nz = procsnz[0]; 2387 ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 2388 mycols = ibuf; 2389 if (size == 1) nz -= extra_rows; 2390 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2391 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2392 2393 /* read in every ones (except the last) and ship off */ 2394 for (i=1; i<size-1; i++) { 2395 nz = procsnz[i]; 2396 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2397 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2398 } 2399 /* read in the stuff for the last proc */ 2400 if (size != 1) { 2401 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2402 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2403 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2404 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2405 } 2406 ierr = PetscFree(cols);CHKERRQ(ierr); 2407 } else { 2408 /* determine buffer space needed for message */ 2409 nz = 0; 2410 for (i=0; i<m; i++) { 2411 nz += locrowlens[i]; 2412 } 2413 ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 2414 mycols = ibuf; 2415 /* receive message of column indices*/ 2416 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2417 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2418 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2419 } 2420 2421 /* loop over local rows, determining number of off diagonal entries */ 2422 dlens = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens); 2423 odlens = dlens + (rend-rstart); 2424 mask = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask); 2425 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2426 masked1 = mask + Mbs; 2427 masked2 = masked1 + Mbs; 2428 rowcount = 0; nzcount = 0; 2429 for (i=0; i<mbs; i++) { 2430 dcount = 0; 2431 odcount = 0; 2432 for (j=0; j<bs; j++) { 2433 kmax = locrowlens[rowcount]; 2434 for (k=0; k<kmax; k++) { 2435 tmp = mycols[nzcount++]/bs; 2436 if (!mask[tmp]) { 2437 mask[tmp] = 1; 2438 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2439 else masked1[dcount++] = tmp; 2440 } 2441 } 2442 rowcount++; 2443 } 2444 2445 dlens[i] = dcount; 2446 odlens[i] = odcount; 2447 2448 /* zero out the mask elements we set */ 2449 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2450 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2451 } 2452 2453 /* create our matrix */ 2454 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 2455 A = *newmat; 2456 MatSetOption(A,MAT_COLUMNS_SORTED); 2457 2458 if (!rank) { 2459 buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf); 2460 /* read in my part of the matrix numerical values */ 2461 nz = procsnz[0]; 2462 vals = buf; 2463 mycols = ibuf; 2464 if (size == 1) nz -= extra_rows; 2465 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2466 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2467 2468 /* insert into matrix */ 2469 jj = rstart*bs; 2470 for (i=0; i<m; i++) { 2471 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2472 mycols += locrowlens[i]; 2473 vals += locrowlens[i]; 2474 jj++; 2475 } 2476 /* read in other processors (except the last one) and ship out */ 2477 for (i=1; i<size-1; i++) { 2478 nz = procsnz[i]; 2479 vals = buf; 2480 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2481 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2482 } 2483 /* the last proc */ 2484 if (size != 1){ 2485 nz = procsnz[i] - extra_rows; 2486 vals = buf; 2487 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2488 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2489 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2490 } 2491 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2492 } else { 2493 /* receive numeric values */ 2494 buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf); 2495 2496 /* receive message of values*/ 2497 vals = buf; 2498 mycols = ibuf; 2499 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2500 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2501 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2502 2503 /* insert into matrix */ 2504 jj = rstart*bs; 2505 for (i=0; i<m; i++) { 2506 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2507 mycols += locrowlens[i]; 2508 vals += locrowlens[i]; 2509 jj++; 2510 } 2511 } 2512 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2513 ierr = PetscFree(buf);CHKERRQ(ierr); 2514 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2515 ierr = PetscFree(rowners);CHKERRQ(ierr); 2516 ierr = PetscFree(dlens);CHKERRQ(ierr); 2517 ierr = PetscFree(mask);CHKERRQ(ierr); 2518 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2519 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2520 PetscFunctionReturn(0); 2521 } 2522 2523 #undef __FUNC__ 2524 #define __FUNC__ /*<a name=""></a>*/"MatMPIBAIJSetHashTableFactor" 2525 /*@ 2526 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2527 2528 Input Parameters: 2529 . mat - the matrix 2530 . fact - factor 2531 2532 Collective on Mat 2533 2534 Level: advanced 2535 2536 Notes: 2537 This can also be set by the command line option: -mat_use_hash_table fact 2538 2539 .keywords: matrix, hashtable, factor, HT 2540 2541 .seealso: MatSetOption() 2542 @*/ 2543 int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2544 { 2545 Mat_MPIBAIJ *baij; 2546 2547 PetscFunctionBegin; 2548 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2549 if (mat->type != MATMPIBAIJ) { 2550 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2551 } 2552 baij = (Mat_MPIBAIJ*)mat->data; 2553 baij->ht_fact = fact; 2554 PetscFunctionReturn(0); 2555 } 2556