1 /*$Id: mpibaij.c,v 1.186 2000/01/11 21:00:57 bsmith Exp bsmith $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "mat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 6 extern int MatSetUpMultiply_MPIBAIJ(Mat); 7 extern int DisAssemble_MPIBAIJ(Mat); 8 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 9 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 10 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 11 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 12 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 13 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 14 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 15 extern int MatPrintHelp_SeqBAIJ(Mat); 16 17 EXTERN_C_BEGIN 18 #undef __FUNC__ 19 #define __FUNC__ "MatStoreValues_MPIBAIJ" 20 int MatStoreValues_MPIBAIJ(Mat mat) 21 { 22 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 23 int ierr; 24 25 PetscFunctionBegin; 26 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 27 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 28 PetscFunctionReturn(0); 29 } 30 EXTERN_C_END 31 32 EXTERN_C_BEGIN 33 #undef __FUNC__ 34 #define __FUNC__ "MatRetrieveValues_MPIBAIJ" 35 int MatRetrieveValues_MPIBAIJ(Mat mat) 36 { 37 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 38 int ierr; 39 40 PetscFunctionBegin; 41 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 42 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 43 PetscFunctionReturn(0); 44 } 45 EXTERN_C_END 46 47 /* 48 Local utility routine that creates a mapping from the global column 49 number to the local number in the off-diagonal part of the local 50 storage of the matrix. This is done in a non scable way since the 51 length of colmap equals the global matrix length. 52 */ 53 #undef __FUNC__ 54 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 55 static int CreateColmap_MPIBAIJ_Private(Mat mat) 56 { 57 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 58 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 59 int nbs = B->nbs,i,bs=B->bs,ierr; 60 61 PetscFunctionBegin; 62 #if defined (PETSC_USE_CTABLE) 63 ierr = PetscTableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr); 64 for (i=0; i<nbs; i++){ 65 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 66 } 67 #else 68 baij->colmap = (int*)PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 69 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 70 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 71 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 72 #endif 73 PetscFunctionReturn(0); 74 } 75 76 #define CHUNKSIZE 10 77 78 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 79 { \ 80 \ 81 brow = row/bs; \ 82 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 83 rmax = aimax[brow]; nrow = ailen[brow]; \ 84 bcol = col/bs; \ 85 ridx = row % bs; cidx = col % bs; \ 86 low = 0; high = nrow; \ 87 while (high-low > 3) { \ 88 t = (low+high)/2; \ 89 if (rp[t] > bcol) high = t; \ 90 else low = t; \ 91 } \ 92 for (_i=low; _i<high; _i++) { \ 93 if (rp[_i] > bcol) break; \ 94 if (rp[_i] == bcol) { \ 95 bap = ap + bs2*_i + bs*cidx + ridx; \ 96 if (addv == ADD_VALUES) *bap += value; \ 97 else *bap = value; \ 98 goto a_noinsert; \ 99 } \ 100 } \ 101 if (a->nonew == 1) goto a_noinsert; \ 102 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 103 if (nrow >= rmax) { \ 104 /* there is no extra room in row, therefore enlarge */ \ 105 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 106 Scalar *new_a; \ 107 \ 108 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 109 \ 110 /* malloc new storage space */ \ 111 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 112 new_a = (Scalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 113 new_j = (int*)(new_a + bs2*new_nz); \ 114 new_i = new_j + new_nz; \ 115 \ 116 /* copy over old data into new slots */ \ 117 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 118 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 119 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 120 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 121 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 122 len*sizeof(int));CHKERRQ(ierr); \ 123 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \ 124 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 125 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 126 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr); \ 127 /* free up old matrix storage */ \ 128 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 129 if (!a->singlemalloc) { \ 130 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 131 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 132 } \ 133 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 134 a->singlemalloc = PETSC_TRUE; \ 135 \ 136 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 137 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 138 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 139 a->maxnz += bs2*CHUNKSIZE; \ 140 a->reallocs++; \ 141 a->nz++; \ 142 } \ 143 N = nrow++ - 1; \ 144 /* shift up all the later entries in this row */ \ 145 for (ii=N; ii>=_i; ii--) { \ 146 rp[ii+1] = rp[ii]; \ 147 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \ 148 } \ 149 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr); } \ 150 rp[_i] = bcol; \ 151 ap[bs2*_i + bs*cidx + ridx] = value; \ 152 a_noinsert:; \ 153 ailen[brow] = nrow; \ 154 } 155 156 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 157 { \ 158 \ 159 brow = row/bs; \ 160 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 161 rmax = bimax[brow]; nrow = bilen[brow]; \ 162 bcol = col/bs; \ 163 ridx = row % bs; cidx = col % bs; \ 164 low = 0; high = nrow; \ 165 while (high-low > 3) { \ 166 t = (low+high)/2; \ 167 if (rp[t] > bcol) high = t; \ 168 else low = t; \ 169 } \ 170 for (_i=low; _i<high; _i++) { \ 171 if (rp[_i] > bcol) break; \ 172 if (rp[_i] == bcol) { \ 173 bap = ap + bs2*_i + bs*cidx + ridx; \ 174 if (addv == ADD_VALUES) *bap += value; \ 175 else *bap = value; \ 176 goto b_noinsert; \ 177 } \ 178 } \ 179 if (b->nonew == 1) goto b_noinsert; \ 180 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 181 if (nrow >= rmax) { \ 182 /* there is no extra room in row, therefore enlarge */ \ 183 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 184 Scalar *new_a; \ 185 \ 186 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 187 \ 188 /* malloc new storage space */ \ 189 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 190 new_a = (Scalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 191 new_j = (int*)(new_a + bs2*new_nz); \ 192 new_i = new_j + new_nz; \ 193 \ 194 /* copy over old data into new slots */ \ 195 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 196 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 197 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 198 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 199 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 200 len*sizeof(int));CHKERRQ(ierr); \ 201 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \ 202 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 203 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 204 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr); \ 205 /* free up old matrix storage */ \ 206 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 207 if (!b->singlemalloc) { \ 208 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 209 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 210 } \ 211 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 212 b->singlemalloc = PETSC_TRUE; \ 213 \ 214 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 215 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 216 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 217 b->maxnz += bs2*CHUNKSIZE; \ 218 b->reallocs++; \ 219 b->nz++; \ 220 } \ 221 N = nrow++ - 1; \ 222 /* shift up all the later entries in this row */ \ 223 for (ii=N; ii>=_i; ii--) { \ 224 rp[ii+1] = rp[ii]; \ 225 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \ 226 } \ 227 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr);} \ 228 rp[_i] = bcol; \ 229 ap[bs2*_i + bs*cidx + ridx] = value; \ 230 b_noinsert:; \ 231 bilen[brow] = nrow; \ 232 } 233 234 #undef __FUNC__ 235 #define __FUNC__ "MatSetValues_MPIBAIJ" 236 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 237 { 238 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 239 Scalar value; 240 int ierr,i,j,row,col; 241 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 242 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 243 int cend_orig=baij->cend_bs,bs=baij->bs; 244 245 /* Some Variables required in the macro */ 246 Mat A = baij->A; 247 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 248 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 249 Scalar *aa=a->a; 250 251 Mat B = baij->B; 252 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 253 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 254 Scalar *ba=b->a; 255 256 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 257 int low,high,t,ridx,cidx,bs2=a->bs2; 258 Scalar *ap,*bap; 259 260 PetscFunctionBegin; 261 for (i=0; i<m; i++) { 262 if (im[i] < 0) continue; 263 #if defined(PETSC_USE_BOPT_g) 264 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 265 #endif 266 if (im[i] >= rstart_orig && im[i] < rend_orig) { 267 row = im[i] - rstart_orig; 268 for (j=0; j<n; j++) { 269 if (in[j] >= cstart_orig && in[j] < cend_orig){ 270 col = in[j] - cstart_orig; 271 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 272 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 273 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 274 } else if (in[j] < 0) continue; 275 #if defined(PETSC_USE_BOPT_g) 276 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 277 #endif 278 else { 279 if (mat->was_assembled) { 280 if (!baij->colmap) { 281 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 282 } 283 #if defined (PETSC_USE_CTABLE) 284 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 285 col = col - 1 + in[j]%bs; 286 #else 287 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 288 #endif 289 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 290 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 291 col = in[j]; 292 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 293 B = baij->B; 294 b = (Mat_SeqBAIJ*)(B)->data; 295 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 296 ba=b->a; 297 } 298 } else col = in[j]; 299 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 300 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 301 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 302 } 303 } 304 } else { 305 if (!baij->donotstash) { 306 if (roworiented) { 307 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 308 } else { 309 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 310 } 311 } 312 } 313 } 314 PetscFunctionReturn(0); 315 } 316 317 #undef __FUNC__ 318 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 319 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 320 { 321 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 322 Scalar *value,*barray=baij->barray; 323 int ierr,i,j,ii,jj,row,col; 324 int roworiented = baij->roworiented,rstart=baij->rstart ; 325 int rend=baij->rend,cstart=baij->cstart,stepval; 326 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 327 328 PetscFunctionBegin; 329 if(!barray) { 330 baij->barray = barray = (Scalar*)PetscMalloc(bs2*sizeof(Scalar));CHKPTRQ(barray); 331 } 332 333 if (roworiented) { 334 stepval = (n-1)*bs; 335 } else { 336 stepval = (m-1)*bs; 337 } 338 for (i=0; i<m; i++) { 339 if (im[i] < 0) continue; 340 #if defined(PETSC_USE_BOPT_g) 341 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 342 #endif 343 if (im[i] >= rstart && im[i] < rend) { 344 row = im[i] - rstart; 345 for (j=0; j<n; j++) { 346 /* If NumCol = 1 then a copy is not required */ 347 if ((roworiented) && (n == 1)) { 348 barray = v + i*bs2; 349 } else if((!roworiented) && (m == 1)) { 350 barray = v + j*bs2; 351 } else { /* Here a copy is required */ 352 if (roworiented) { 353 value = v + i*(stepval+bs)*bs + j*bs; 354 } else { 355 value = v + j*(stepval+bs)*bs + i*bs; 356 } 357 for (ii=0; ii<bs; ii++,value+=stepval) { 358 for (jj=0; jj<bs; jj++) { 359 *barray++ = *value++; 360 } 361 } 362 barray -=bs2; 363 } 364 365 if (in[j] >= cstart && in[j] < cend){ 366 col = in[j] - cstart; 367 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 368 } 369 else if (in[j] < 0) continue; 370 #if defined(PETSC_USE_BOPT_g) 371 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 372 #endif 373 else { 374 if (mat->was_assembled) { 375 if (!baij->colmap) { 376 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 377 } 378 379 #if defined(PETSC_USE_BOPT_g) 380 #if defined (PETSC_USE_CTABLE) 381 { int data; 382 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 383 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap"); 384 } 385 #else 386 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap"); 387 #endif 388 #endif 389 #if defined (PETSC_USE_CTABLE) 390 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 391 col = (col - 1)/bs; 392 #else 393 col = (baij->colmap[in[j]] - 1)/bs; 394 #endif 395 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 396 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 397 col = in[j]; 398 } 399 } 400 else col = in[j]; 401 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 402 } 403 } 404 } else { 405 if (!baij->donotstash) { 406 if (roworiented) { 407 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 408 } else { 409 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 410 } 411 } 412 } 413 } 414 PetscFunctionReturn(0); 415 } 416 #define HASH_KEY 0.6180339887 417 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 418 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 419 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 420 #undef __FUNC__ 421 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 422 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 423 { 424 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 425 int ierr,i,j,row,col; 426 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 427 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 428 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 429 PetscReal tmp; 430 Scalar ** HD = baij->hd,value; 431 #if defined(PETSC_USE_BOPT_g) 432 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 433 #endif 434 435 PetscFunctionBegin; 436 437 for (i=0; i<m; i++) { 438 #if defined(PETSC_USE_BOPT_g) 439 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 440 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 441 #endif 442 row = im[i]; 443 if (row >= rstart_orig && row < rend_orig) { 444 for (j=0; j<n; j++) { 445 col = in[j]; 446 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 447 /* Look up into the Hash Table */ 448 key = (row/bs)*Nbs+(col/bs)+1; 449 h1 = HASH(size,key,tmp); 450 451 452 idx = h1; 453 #if defined(PETSC_USE_BOPT_g) 454 insert_ct++; 455 total_ct++; 456 if (HT[idx] != key) { 457 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 458 if (idx == size) { 459 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 460 if (idx == h1) { 461 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 462 } 463 } 464 } 465 #else 466 if (HT[idx] != key) { 467 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 468 if (idx == size) { 469 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 470 if (idx == h1) { 471 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 472 } 473 } 474 } 475 #endif 476 /* A HASH table entry is found, so insert the values at the correct address */ 477 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 478 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 479 } 480 } else { 481 if (!baij->donotstash) { 482 if (roworiented) { 483 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 484 } else { 485 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 486 } 487 } 488 } 489 } 490 #if defined(PETSC_USE_BOPT_g) 491 baij->ht_total_ct = total_ct; 492 baij->ht_insert_ct = insert_ct; 493 #endif 494 PetscFunctionReturn(0); 495 } 496 497 #undef __FUNC__ 498 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 499 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 500 { 501 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 502 int ierr,i,j,ii,jj,row,col; 503 int roworiented = baij->roworiented,rstart=baij->rstart ; 504 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 505 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 506 PetscReal tmp; 507 Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 508 #if defined(PETSC_USE_BOPT_g) 509 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 510 #endif 511 512 PetscFunctionBegin; 513 514 if (roworiented) { 515 stepval = (n-1)*bs; 516 } else { 517 stepval = (m-1)*bs; 518 } 519 for (i=0; i<m; i++) { 520 #if defined(PETSC_USE_BOPT_g) 521 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 522 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 523 #endif 524 row = im[i]; 525 v_t = v + i*bs2; 526 if (row >= rstart && row < rend) { 527 for (j=0; j<n; j++) { 528 col = in[j]; 529 530 /* Look up into the Hash Table */ 531 key = row*Nbs+col+1; 532 h1 = HASH(size,key,tmp); 533 534 idx = h1; 535 #if defined(PETSC_USE_BOPT_g) 536 total_ct++; 537 insert_ct++; 538 if (HT[idx] != key) { 539 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 540 if (idx == size) { 541 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 542 if (idx == h1) { 543 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 544 } 545 } 546 } 547 #else 548 if (HT[idx] != key) { 549 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 550 if (idx == size) { 551 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 552 if (idx == h1) { 553 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 554 } 555 } 556 } 557 #endif 558 baij_a = HD[idx]; 559 if (roworiented) { 560 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 561 /* value = v + (i*(stepval+bs)+j)*bs; */ 562 value = v_t; 563 v_t += bs; 564 if (addv == ADD_VALUES) { 565 for (ii=0; ii<bs; ii++,value+=stepval) { 566 for (jj=ii; jj<bs2; jj+=bs) { 567 baij_a[jj] += *value++; 568 } 569 } 570 } else { 571 for (ii=0; ii<bs; ii++,value+=stepval) { 572 for (jj=ii; jj<bs2; jj+=bs) { 573 baij_a[jj] = *value++; 574 } 575 } 576 } 577 } else { 578 value = v + j*(stepval+bs)*bs + i*bs; 579 if (addv == ADD_VALUES) { 580 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 581 for (jj=0; jj<bs; jj++) { 582 baij_a[jj] += *value++; 583 } 584 } 585 } else { 586 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 587 for (jj=0; jj<bs; jj++) { 588 baij_a[jj] = *value++; 589 } 590 } 591 } 592 } 593 } 594 } else { 595 if (!baij->donotstash) { 596 if (roworiented) { 597 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 598 } else { 599 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 600 } 601 } 602 } 603 } 604 #if defined(PETSC_USE_BOPT_g) 605 baij->ht_total_ct = total_ct; 606 baij->ht_insert_ct = insert_ct; 607 #endif 608 PetscFunctionReturn(0); 609 } 610 611 #undef __FUNC__ 612 #define __FUNC__ "MatGetValues_MPIBAIJ" 613 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 614 { 615 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 616 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 617 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 618 619 PetscFunctionBegin; 620 for (i=0; i<m; i++) { 621 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 622 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 623 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 624 row = idxm[i] - bsrstart; 625 for (j=0; j<n; j++) { 626 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 627 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 628 if (idxn[j] >= bscstart && idxn[j] < bscend){ 629 col = idxn[j] - bscstart; 630 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 631 } else { 632 if (!baij->colmap) { 633 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 634 } 635 #if defined (PETSC_USE_CTABLE) 636 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 637 data --; 638 #else 639 data = baij->colmap[idxn[j]/bs]-1; 640 #endif 641 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 642 else { 643 col = data + idxn[j]%bs; 644 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 645 } 646 } 647 } 648 } else { 649 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 650 } 651 } 652 PetscFunctionReturn(0); 653 } 654 655 #undef __FUNC__ 656 #define __FUNC__ "MatNorm_MPIBAIJ" 657 int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *norm) 658 { 659 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 660 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 661 int ierr,i,bs2=baij->bs2; 662 PetscReal sum = 0.0; 663 Scalar *v; 664 665 PetscFunctionBegin; 666 if (baij->size == 1) { 667 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 668 } else { 669 if (type == NORM_FROBENIUS) { 670 v = amat->a; 671 for (i=0; i<amat->nz*bs2; i++) { 672 #if defined(PETSC_USE_COMPLEX) 673 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 674 #else 675 sum += (*v)*(*v); v++; 676 #endif 677 } 678 v = bmat->a; 679 for (i=0; i<bmat->nz*bs2; i++) { 680 #if defined(PETSC_USE_COMPLEX) 681 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 682 #else 683 sum += (*v)*(*v); v++; 684 #endif 685 } 686 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 687 *norm = sqrt(*norm); 688 } else { 689 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 690 } 691 } 692 PetscFunctionReturn(0); 693 } 694 695 696 /* 697 Creates the hash table, and sets the table 698 This table is created only once. 699 If new entried need to be added to the matrix 700 then the hash table has to be destroyed and 701 recreated. 702 */ 703 #undef __FUNC__ 704 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 705 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 706 { 707 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 708 Mat A = baij->A,B=baij->B; 709 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 710 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 711 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 712 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 713 int *HT,key; 714 Scalar **HD; 715 PetscReal tmp; 716 #if defined(PETSC_USE_BOPT_g) 717 int ct=0,max=0; 718 #endif 719 720 PetscFunctionBegin; 721 baij->ht_size=(int)(factor*nz); 722 size = baij->ht_size; 723 724 if (baij->ht) { 725 PetscFunctionReturn(0); 726 } 727 728 /* Allocate Memory for Hash Table */ 729 baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1);CHKPTRQ(baij->hd); 730 baij->ht = (int*)(baij->hd + size); 731 HD = baij->hd; 732 HT = baij->ht; 733 734 735 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr); 736 737 738 /* Loop Over A */ 739 for (i=0; i<a->mbs; i++) { 740 for (j=ai[i]; j<ai[i+1]; j++) { 741 row = i+rstart; 742 col = aj[j]+cstart; 743 744 key = row*Nbs + col + 1; 745 h1 = HASH(size,key,tmp); 746 for (k=0; k<size; k++){ 747 if (HT[(h1+k)%size] == 0.0) { 748 HT[(h1+k)%size] = key; 749 HD[(h1+k)%size] = a->a + j*bs2; 750 break; 751 #if defined(PETSC_USE_BOPT_g) 752 } else { 753 ct++; 754 #endif 755 } 756 } 757 #if defined(PETSC_USE_BOPT_g) 758 if (k> max) max = k; 759 #endif 760 } 761 } 762 /* Loop Over B */ 763 for (i=0; i<b->mbs; i++) { 764 for (j=bi[i]; j<bi[i+1]; j++) { 765 row = i+rstart; 766 col = garray[bj[j]]; 767 key = row*Nbs + col + 1; 768 h1 = HASH(size,key,tmp); 769 for (k=0; k<size; k++){ 770 if (HT[(h1+k)%size] == 0.0) { 771 HT[(h1+k)%size] = key; 772 HD[(h1+k)%size] = b->a + j*bs2; 773 break; 774 #if defined(PETSC_USE_BOPT_g) 775 } else { 776 ct++; 777 #endif 778 } 779 } 780 #if defined(PETSC_USE_BOPT_g) 781 if (k> max) max = k; 782 #endif 783 } 784 } 785 786 /* Print Summary */ 787 #if defined(PETSC_USE_BOPT_g) 788 for (i=0,j=0; i<size; i++) { 789 if (HT[i]) {j++;} 790 } 791 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max); 792 #endif 793 PetscFunctionReturn(0); 794 } 795 796 #undef __FUNC__ 797 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 798 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 799 { 800 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 801 int ierr,nstash,reallocs; 802 InsertMode addv; 803 804 PetscFunctionBegin; 805 if (baij->donotstash) { 806 PetscFunctionReturn(0); 807 } 808 809 /* make sure all processors are either in INSERTMODE or ADDMODE */ 810 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 811 if (addv == (ADD_VALUES|INSERT_VALUES)) { 812 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 813 } 814 mat->insertmode = addv; /* in case this processor had no cache */ 815 816 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 817 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 818 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 819 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 820 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 821 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 822 PetscFunctionReturn(0); 823 } 824 825 #undef __FUNC__ 826 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 827 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 828 { 829 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 830 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 831 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 832 int *row,*col,other_disassembled; 833 PetscTruth r1,r2,r3; 834 Scalar *val; 835 InsertMode addv = mat->insertmode; 836 837 PetscFunctionBegin; 838 if (!baij->donotstash) { 839 while (1) { 840 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 841 if (!flg) break; 842 843 for (i=0; i<n;) { 844 /* Now identify the consecutive vals belonging to the same row */ 845 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 846 if (j < n) ncols = j-i; 847 else ncols = n-i; 848 /* Now assemble all these values with a single function call */ 849 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 850 i = j; 851 } 852 } 853 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 854 /* Now process the block-stash. Since the values are stashed column-oriented, 855 set the roworiented flag to column oriented, and after MatSetValues() 856 restore the original flags */ 857 r1 = baij->roworiented; 858 r2 = a->roworiented; 859 r3 = b->roworiented; 860 baij->roworiented = PETSC_FALSE; 861 a->roworiented = PETSC_FALSE; 862 b->roworiented = PETSC_FALSE; 863 while (1) { 864 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 865 if (!flg) break; 866 867 for (i=0; i<n;) { 868 /* Now identify the consecutive vals belonging to the same row */ 869 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 870 if (j < n) ncols = j-i; 871 else ncols = n-i; 872 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 873 i = j; 874 } 875 } 876 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 877 baij->roworiented = r1; 878 a->roworiented = r2; 879 b->roworiented = r3; 880 } 881 882 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 883 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 884 885 /* determine if any processor has disassembled, if so we must 886 also disassemble ourselfs, in order that we may reassemble. */ 887 /* 888 if nonzero structure of submatrix B cannot change then we know that 889 no processor disassembled thus we can skip this stuff 890 */ 891 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 892 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 893 if (mat->was_assembled && !other_disassembled) { 894 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 895 } 896 } 897 898 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 899 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 900 } 901 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 902 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 903 904 #if defined(PETSC_USE_BOPT_g) 905 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 906 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct); 907 baij->ht_total_ct = 0; 908 baij->ht_insert_ct = 0; 909 } 910 #endif 911 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 912 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 913 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 914 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 915 } 916 917 if (baij->rowvalues) { 918 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 919 baij->rowvalues = 0; 920 } 921 PetscFunctionReturn(0); 922 } 923 924 /* 925 #undef __FUNC__ 926 #define __FUNC__ "MatView_MPIBAIJ_Binary" 927 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 928 { 929 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 930 int ierr; 931 932 PetscFunctionBegin; 933 if (baij->size == 1) { 934 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 935 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 936 PetscFunctionReturn(0); 937 } 938 */ 939 940 #undef __FUNC__ 941 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 942 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 943 { 944 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 945 int ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank; 946 PetscTruth isascii,isdraw; 947 948 PetscFunctionBegin; 949 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 950 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 951 if (isascii) { 952 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 953 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 954 MatInfo info; 955 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 956 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 957 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 958 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 959 baij->bs,(int)info.memory);CHKERRQ(ierr); 960 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 961 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 962 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 963 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 964 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 965 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 968 ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } 971 } 972 973 if (isdraw) { 974 Draw draw; 975 PetscTruth isnull; 976 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 977 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 978 } 979 980 if (size == 1) { 981 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 982 } else { 983 /* assemble the entire matrix onto first processor. */ 984 Mat A; 985 Mat_SeqBAIJ *Aloc; 986 int M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals; 987 int mbs = baij->mbs; 988 Scalar *a; 989 990 if (!rank) { 991 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 992 } else { 993 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 994 } 995 PLogObjectParent(mat,A); 996 997 /* copy over the A part */ 998 Aloc = (Mat_SeqBAIJ*)baij->A->data; 999 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1000 rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1001 1002 for (i=0; i<mbs; i++) { 1003 rvals[0] = bs*(baij->rstart + i); 1004 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1005 for (j=ai[i]; j<ai[i+1]; j++) { 1006 col = (baij->cstart+aj[j])*bs; 1007 for (k=0; k<bs; k++) { 1008 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1009 col++; a += bs; 1010 } 1011 } 1012 } 1013 /* copy over the B part */ 1014 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1015 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1016 for (i=0; i<mbs; i++) { 1017 rvals[0] = bs*(baij->rstart + i); 1018 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1019 for (j=ai[i]; j<ai[i+1]; j++) { 1020 col = baij->garray[aj[j]]*bs; 1021 for (k=0; k<bs; k++) { 1022 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1023 col++; a += bs; 1024 } 1025 } 1026 } 1027 ierr = PetscFree(rvals);CHKERRQ(ierr); 1028 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1029 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1030 /* 1031 Everyone has to call to draw the matrix since the graphics waits are 1032 synchronized across all processors that share the Draw object 1033 */ 1034 if (!rank) { 1035 Viewer sviewer; 1036 ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1037 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1038 ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1039 } 1040 ierr = MatDestroy(A);CHKERRQ(ierr); 1041 } 1042 PetscFunctionReturn(0); 1043 } 1044 1045 1046 1047 #undef __FUNC__ 1048 #define __FUNC__ "MatView_MPIBAIJ" 1049 int MatView_MPIBAIJ(Mat mat,Viewer viewer) 1050 { 1051 int ierr; 1052 PetscTruth isascii,isdraw,issocket,isbinary; 1053 1054 PetscFunctionBegin; 1055 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 1056 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 1057 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 1058 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 1059 if (isascii || isdraw || issocket || isbinary) { 1060 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1061 } else { 1062 SETERRQ1(1,1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1063 } 1064 PetscFunctionReturn(0); 1065 } 1066 1067 #undef __FUNC__ 1068 #define __FUNC__ "MatDestroy_MPIBAIJ" 1069 int MatDestroy_MPIBAIJ(Mat mat) 1070 { 1071 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1072 int ierr; 1073 1074 PetscFunctionBegin; 1075 1076 if (mat->mapping) { 1077 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 1078 } 1079 if (mat->bmapping) { 1080 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 1081 } 1082 if (mat->rmap) { 1083 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 1084 } 1085 if (mat->cmap) { 1086 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 1087 } 1088 #if defined(PETSC_USE_LOG) 1089 PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N); 1090 #endif 1091 1092 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1093 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1094 1095 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1096 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1097 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1098 #if defined (PETSC_USE_CTABLE) 1099 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1100 #else 1101 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1102 #endif 1103 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1104 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1105 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1106 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1107 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1108 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1109 ierr = PetscFree(baij);CHKERRQ(ierr); 1110 PLogObjectDestroy(mat); 1111 PetscHeaderDestroy(mat); 1112 PetscFunctionReturn(0); 1113 } 1114 1115 #undef __FUNC__ 1116 #define __FUNC__ "MatMult_MPIBAIJ" 1117 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1118 { 1119 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1120 int ierr,nt; 1121 1122 PetscFunctionBegin; 1123 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1124 if (nt != a->n) { 1125 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1126 } 1127 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1128 if (nt != a->m) { 1129 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1130 } 1131 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1132 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1133 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1134 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1135 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1136 PetscFunctionReturn(0); 1137 } 1138 1139 #undef __FUNC__ 1140 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1141 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1142 { 1143 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1144 int ierr; 1145 1146 PetscFunctionBegin; 1147 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1148 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1149 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1150 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1151 PetscFunctionReturn(0); 1152 } 1153 1154 #undef __FUNC__ 1155 #define __FUNC__ "MatMultTranspose_MPIBAIJ" 1156 int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1157 { 1158 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1159 int ierr; 1160 1161 PetscFunctionBegin; 1162 /* do nondiagonal part */ 1163 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1164 /* send it on its way */ 1165 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1166 /* do local part */ 1167 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1168 /* receive remote parts: note this assumes the values are not actually */ 1169 /* inserted in yy until the next line, which is true for my implementation*/ 1170 /* but is not perhaps always true. */ 1171 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1172 PetscFunctionReturn(0); 1173 } 1174 1175 #undef __FUNC__ 1176 #define __FUNC__ "MatMultTransposeAdd_MPIBAIJ" 1177 int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1178 { 1179 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1180 int ierr; 1181 1182 PetscFunctionBegin; 1183 /* do nondiagonal part */ 1184 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1185 /* send it on its way */ 1186 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1187 /* do local part */ 1188 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1189 /* receive remote parts: note this assumes the values are not actually */ 1190 /* inserted in yy until the next line, which is true for my implementation*/ 1191 /* but is not perhaps always true. */ 1192 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1193 PetscFunctionReturn(0); 1194 } 1195 1196 /* 1197 This only works correctly for square matrices where the subblock A->A is the 1198 diagonal block 1199 */ 1200 #undef __FUNC__ 1201 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1202 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1203 { 1204 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1205 int ierr; 1206 1207 PetscFunctionBegin; 1208 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1209 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1210 PetscFunctionReturn(0); 1211 } 1212 1213 #undef __FUNC__ 1214 #define __FUNC__ "MatScale_MPIBAIJ" 1215 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1216 { 1217 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1218 int ierr; 1219 1220 PetscFunctionBegin; 1221 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1222 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1223 PetscFunctionReturn(0); 1224 } 1225 1226 #undef __FUNC__ 1227 #define __FUNC__ "MatGetSize_MPIBAIJ" 1228 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1229 { 1230 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1231 1232 PetscFunctionBegin; 1233 if (m) *m = mat->M; 1234 if (n) *n = mat->N; 1235 PetscFunctionReturn(0); 1236 } 1237 1238 #undef __FUNC__ 1239 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1240 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1241 { 1242 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1243 1244 PetscFunctionBegin; 1245 *m = mat->m; *n = mat->n; 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNC__ 1250 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1251 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1252 { 1253 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1254 1255 PetscFunctionBegin; 1256 if (m) *m = mat->rstart*mat->bs; 1257 if (n) *n = mat->rend*mat->bs; 1258 PetscFunctionReturn(0); 1259 } 1260 1261 #undef __FUNC__ 1262 #define __FUNC__ "MatGetRow_MPIBAIJ" 1263 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1264 { 1265 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1266 Scalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1267 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1268 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1269 int *cmap,*idx_p,cstart = mat->cstart; 1270 1271 PetscFunctionBegin; 1272 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1273 mat->getrowactive = PETSC_TRUE; 1274 1275 if (!mat->rowvalues && (idx || v)) { 1276 /* 1277 allocate enough space to hold information from the longest row. 1278 */ 1279 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1280 int max = 1,mbs = mat->mbs,tmp; 1281 for (i=0; i<mbs; i++) { 1282 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1283 if (max < tmp) { max = tmp; } 1284 } 1285 mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1286 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1287 } 1288 1289 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1290 lrow = row - brstart; 1291 1292 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1293 if (!v) {pvA = 0; pvB = 0;} 1294 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1295 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1296 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1297 nztot = nzA + nzB; 1298 1299 cmap = mat->garray; 1300 if (v || idx) { 1301 if (nztot) { 1302 /* Sort by increasing column numbers, assuming A and B already sorted */ 1303 int imark = -1; 1304 if (v) { 1305 *v = v_p = mat->rowvalues; 1306 for (i=0; i<nzB; i++) { 1307 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1308 else break; 1309 } 1310 imark = i; 1311 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1312 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1313 } 1314 if (idx) { 1315 *idx = idx_p = mat->rowindices; 1316 if (imark > -1) { 1317 for (i=0; i<imark; i++) { 1318 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1319 } 1320 } else { 1321 for (i=0; i<nzB; i++) { 1322 if (cmap[cworkB[i]/bs] < cstart) 1323 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1324 else break; 1325 } 1326 imark = i; 1327 } 1328 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1329 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1330 } 1331 } else { 1332 if (idx) *idx = 0; 1333 if (v) *v = 0; 1334 } 1335 } 1336 *nz = nztot; 1337 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1338 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } 1341 1342 #undef __FUNC__ 1343 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1344 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1345 { 1346 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1347 1348 PetscFunctionBegin; 1349 if (baij->getrowactive == PETSC_FALSE) { 1350 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1351 } 1352 baij->getrowactive = PETSC_FALSE; 1353 PetscFunctionReturn(0); 1354 } 1355 1356 #undef __FUNC__ 1357 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1358 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1359 { 1360 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1361 1362 PetscFunctionBegin; 1363 *bs = baij->bs; 1364 PetscFunctionReturn(0); 1365 } 1366 1367 #undef __FUNC__ 1368 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1369 int MatZeroEntries_MPIBAIJ(Mat A) 1370 { 1371 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1372 int ierr; 1373 1374 PetscFunctionBegin; 1375 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1376 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1377 PetscFunctionReturn(0); 1378 } 1379 1380 #undef __FUNC__ 1381 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1382 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1383 { 1384 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1385 Mat A = a->A,B = a->B; 1386 int ierr; 1387 PetscReal isend[5],irecv[5]; 1388 1389 PetscFunctionBegin; 1390 info->block_size = (double)a->bs; 1391 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1392 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1393 isend[3] = info->memory; isend[4] = info->mallocs; 1394 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1395 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1396 isend[3] += info->memory; isend[4] += info->mallocs; 1397 if (flag == MAT_LOCAL) { 1398 info->nz_used = isend[0]; 1399 info->nz_allocated = isend[1]; 1400 info->nz_unneeded = isend[2]; 1401 info->memory = isend[3]; 1402 info->mallocs = isend[4]; 1403 } else if (flag == MAT_GLOBAL_MAX) { 1404 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1405 info->nz_used = irecv[0]; 1406 info->nz_allocated = irecv[1]; 1407 info->nz_unneeded = irecv[2]; 1408 info->memory = irecv[3]; 1409 info->mallocs = irecv[4]; 1410 } else if (flag == MAT_GLOBAL_SUM) { 1411 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1412 info->nz_used = irecv[0]; 1413 info->nz_allocated = irecv[1]; 1414 info->nz_unneeded = irecv[2]; 1415 info->memory = irecv[3]; 1416 info->mallocs = irecv[4]; 1417 } else { 1418 SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 1419 } 1420 info->rows_global = (double)a->M; 1421 info->columns_global = (double)a->N; 1422 info->rows_local = (double)a->m; 1423 info->columns_local = (double)a->N; 1424 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1425 info->fill_ratio_needed = 0; 1426 info->factor_mallocs = 0; 1427 PetscFunctionReturn(0); 1428 } 1429 1430 #undef __FUNC__ 1431 #define __FUNC__ "MatSetOption_MPIBAIJ" 1432 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1433 { 1434 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1435 int ierr; 1436 1437 PetscFunctionBegin; 1438 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1439 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1440 op == MAT_COLUMNS_UNSORTED || 1441 op == MAT_COLUMNS_SORTED || 1442 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1443 op == MAT_KEEP_ZEROED_ROWS || 1444 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1445 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1446 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1447 } else if (op == MAT_ROW_ORIENTED) { 1448 a->roworiented = PETSC_TRUE; 1449 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1450 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1451 } else if (op == MAT_ROWS_SORTED || 1452 op == MAT_ROWS_UNSORTED || 1453 op == MAT_SYMMETRIC || 1454 op == MAT_STRUCTURALLY_SYMMETRIC || 1455 op == MAT_YES_NEW_DIAGONALS || 1456 op == MAT_USE_HASH_TABLE) { 1457 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1458 } else if (op == MAT_COLUMN_ORIENTED) { 1459 a->roworiented = PETSC_FALSE; 1460 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1461 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1462 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1463 a->donotstash = PETSC_TRUE; 1464 } else if (op == MAT_NO_NEW_DIAGONALS) { 1465 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1466 } else if (op == MAT_USE_HASH_TABLE) { 1467 a->ht_flag = PETSC_TRUE; 1468 } else { 1469 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1470 } 1471 PetscFunctionReturn(0); 1472 } 1473 1474 #undef __FUNC__ 1475 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1476 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1477 { 1478 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1479 Mat_SeqBAIJ *Aloc; 1480 Mat B; 1481 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1482 int bs=baij->bs,mbs=baij->mbs; 1483 Scalar *a; 1484 1485 PetscFunctionBegin; 1486 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1487 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1488 1489 /* copy over the A part */ 1490 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1491 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1492 rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1493 1494 for (i=0; i<mbs; i++) { 1495 rvals[0] = bs*(baij->rstart + i); 1496 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1497 for (j=ai[i]; j<ai[i+1]; j++) { 1498 col = (baij->cstart+aj[j])*bs; 1499 for (k=0; k<bs; k++) { 1500 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1501 col++; a += bs; 1502 } 1503 } 1504 } 1505 /* copy over the B part */ 1506 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1507 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1508 for (i=0; i<mbs; i++) { 1509 rvals[0] = bs*(baij->rstart + i); 1510 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1511 for (j=ai[i]; j<ai[i+1]; j++) { 1512 col = baij->garray[aj[j]]*bs; 1513 for (k=0; k<bs; k++) { 1514 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1515 col++; a += bs; 1516 } 1517 } 1518 } 1519 ierr = PetscFree(rvals);CHKERRQ(ierr); 1520 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1521 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1522 1523 if (matout) { 1524 *matout = B; 1525 } else { 1526 PetscOps *Abops; 1527 MatOps Aops; 1528 1529 /* This isn't really an in-place transpose .... but free data structures from baij */ 1530 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1531 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1532 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1533 #if defined (PETSC_USE_CTABLE) 1534 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1535 #else 1536 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1537 #endif 1538 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1539 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1540 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1541 ierr = PetscFree(baij);CHKERRQ(ierr); 1542 1543 /* 1544 This is horrible, horrible code. We need to keep the 1545 A pointers for the bops and ops but copy everything 1546 else from C. 1547 */ 1548 Abops = A->bops; 1549 Aops = A->ops; 1550 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1551 A->bops = Abops; 1552 A->ops = Aops; 1553 1554 PetscHeaderDestroy(B); 1555 } 1556 PetscFunctionReturn(0); 1557 } 1558 1559 #undef __FUNC__ 1560 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1561 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1562 { 1563 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1564 Mat a = baij->A,b = baij->B; 1565 int ierr,s1,s2,s3; 1566 1567 PetscFunctionBegin; 1568 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1569 if (rr) { 1570 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1571 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1572 /* Overlap communication with computation. */ 1573 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1574 } 1575 if (ll) { 1576 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1577 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1578 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1579 } 1580 /* scale the diagonal block */ 1581 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1582 1583 if (rr) { 1584 /* Do a scatter end and then right scale the off-diagonal block */ 1585 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1586 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1587 } 1588 1589 PetscFunctionReturn(0); 1590 } 1591 1592 #undef __FUNC__ 1593 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1594 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1595 { 1596 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1597 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1598 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1599 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1600 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1601 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1602 MPI_Comm comm = A->comm; 1603 MPI_Request *send_waits,*recv_waits; 1604 MPI_Status recv_status,*send_status; 1605 IS istmp; 1606 1607 PetscFunctionBegin; 1608 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 1609 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1610 1611 /* first count number of contributors to each processor */ 1612 nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs); 1613 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1614 procs = nprocs + size; 1615 owner = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 1616 for (i=0; i<N; i++) { 1617 idx = rows[i]; 1618 found = 0; 1619 for (j=0; j<size; j++) { 1620 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1621 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1622 } 1623 } 1624 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1625 } 1626 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 1627 1628 /* inform other processors of number of messages and max length*/ 1629 work = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work); 1630 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 1631 nmax = work[rank]; 1632 nrecvs = work[size+rank]; 1633 ierr = PetscFree(work);CHKERRQ(ierr); 1634 1635 /* post receives: */ 1636 rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 1637 recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1638 for (i=0; i<nrecvs; i++) { 1639 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1640 } 1641 1642 /* do sends: 1643 1) starts[i] gives the starting index in svalues for stuff going to 1644 the ith processor 1645 */ 1646 svalues = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues); 1647 send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1648 starts = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts); 1649 starts[0] = 0; 1650 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1651 for (i=0; i<N; i++) { 1652 svalues[starts[owner[i]]++] = rows[i]; 1653 } 1654 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1655 1656 starts[0] = 0; 1657 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1658 count = 0; 1659 for (i=0; i<size; i++) { 1660 if (procs[i]) { 1661 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1662 } 1663 } 1664 ierr = PetscFree(starts);CHKERRQ(ierr); 1665 1666 base = owners[rank]*bs; 1667 1668 /* wait on receives */ 1669 lens = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens); 1670 source = lens + nrecvs; 1671 count = nrecvs; slen = 0; 1672 while (count) { 1673 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1674 /* unpack receives into our local space */ 1675 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1676 source[imdex] = recv_status.MPI_SOURCE; 1677 lens[imdex] = n; 1678 slen += n; 1679 count--; 1680 } 1681 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1682 1683 /* move the data into the send scatter */ 1684 lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows); 1685 count = 0; 1686 for (i=0; i<nrecvs; i++) { 1687 values = rvalues + i*nmax; 1688 for (j=0; j<lens[i]; j++) { 1689 lrows[count++] = values[j] - base; 1690 } 1691 } 1692 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1693 ierr = PetscFree(lens);CHKERRQ(ierr); 1694 ierr = PetscFree(owner);CHKERRQ(ierr); 1695 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1696 1697 /* actually zap the local rows */ 1698 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1699 PLogObjectParent(A,istmp); 1700 1701 /* 1702 Zero the required rows. If the "diagonal block" of the matrix 1703 is square and the user wishes to set the diagonal we use seperate 1704 code so that MatSetValues() is not called for each diagonal allocating 1705 new memory, thus calling lots of mallocs and slowing things down. 1706 1707 Contributed by: Mathew Knepley 1708 */ 1709 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1710 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 1711 if (diag && (l->A->M == l->A->N)) { 1712 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 1713 } else if (diag) { 1714 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 1715 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1716 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1717 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1718 } 1719 for (i = 0; i < slen; i++) { 1720 row = lrows[i] + rstart_bs; 1721 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1722 } 1723 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1724 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1725 } else { 1726 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 1727 } 1728 1729 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1730 ierr = PetscFree(lrows);CHKERRQ(ierr); 1731 1732 /* wait on sends */ 1733 if (nsends) { 1734 send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1735 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1736 ierr = PetscFree(send_status);CHKERRQ(ierr); 1737 } 1738 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1739 ierr = PetscFree(svalues);CHKERRQ(ierr); 1740 1741 PetscFunctionReturn(0); 1742 } 1743 1744 #undef __FUNC__ 1745 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1746 int MatPrintHelp_MPIBAIJ(Mat A) 1747 { 1748 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1749 MPI_Comm comm = A->comm; 1750 static int called = 0; 1751 int ierr; 1752 1753 PetscFunctionBegin; 1754 if (!a->rank) { 1755 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1756 } 1757 if (called) {PetscFunctionReturn(0);} else called = 1; 1758 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1759 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1760 PetscFunctionReturn(0); 1761 } 1762 1763 #undef __FUNC__ 1764 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1765 int MatSetUnfactored_MPIBAIJ(Mat A) 1766 { 1767 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1768 int ierr; 1769 1770 PetscFunctionBegin; 1771 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1772 PetscFunctionReturn(0); 1773 } 1774 1775 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1776 1777 #undef __FUNC__ 1778 #define __FUNC__ "MatEqual_MPIBAIJ" 1779 int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1780 { 1781 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1782 Mat a,b,c,d; 1783 PetscTruth flg; 1784 int ierr; 1785 1786 PetscFunctionBegin; 1787 if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1788 a = matA->A; b = matA->B; 1789 c = matB->A; d = matB->B; 1790 1791 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1792 if (flg == PETSC_TRUE) { 1793 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1794 } 1795 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1796 PetscFunctionReturn(0); 1797 } 1798 1799 /* -------------------------------------------------------------------*/ 1800 static struct _MatOps MatOps_Values = { 1801 MatSetValues_MPIBAIJ, 1802 MatGetRow_MPIBAIJ, 1803 MatRestoreRow_MPIBAIJ, 1804 MatMult_MPIBAIJ, 1805 MatMultAdd_MPIBAIJ, 1806 MatMultTranspose_MPIBAIJ, 1807 MatMultTransposeAdd_MPIBAIJ, 1808 0, 1809 0, 1810 0, 1811 0, 1812 0, 1813 0, 1814 0, 1815 MatTranspose_MPIBAIJ, 1816 MatGetInfo_MPIBAIJ, 1817 MatEqual_MPIBAIJ, 1818 MatGetDiagonal_MPIBAIJ, 1819 MatDiagonalScale_MPIBAIJ, 1820 MatNorm_MPIBAIJ, 1821 MatAssemblyBegin_MPIBAIJ, 1822 MatAssemblyEnd_MPIBAIJ, 1823 0, 1824 MatSetOption_MPIBAIJ, 1825 MatZeroEntries_MPIBAIJ, 1826 MatZeroRows_MPIBAIJ, 1827 0, 1828 0, 1829 0, 1830 0, 1831 MatGetSize_MPIBAIJ, 1832 MatGetLocalSize_MPIBAIJ, 1833 MatGetOwnershipRange_MPIBAIJ, 1834 0, 1835 0, 1836 0, 1837 0, 1838 MatDuplicate_MPIBAIJ, 1839 0, 1840 0, 1841 0, 1842 0, 1843 0, 1844 MatGetSubMatrices_MPIBAIJ, 1845 MatIncreaseOverlap_MPIBAIJ, 1846 MatGetValues_MPIBAIJ, 1847 0, 1848 MatPrintHelp_MPIBAIJ, 1849 MatScale_MPIBAIJ, 1850 0, 1851 0, 1852 0, 1853 MatGetBlockSize_MPIBAIJ, 1854 0, 1855 0, 1856 0, 1857 0, 1858 0, 1859 0, 1860 MatSetUnfactored_MPIBAIJ, 1861 0, 1862 MatSetValuesBlocked_MPIBAIJ, 1863 0, 1864 0, 1865 0, 1866 MatGetMaps_Petsc}; 1867 1868 1869 EXTERN_C_BEGIN 1870 #undef __FUNC__ 1871 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ" 1872 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1873 { 1874 PetscFunctionBegin; 1875 *a = ((Mat_MPIBAIJ *)A->data)->A; 1876 *iscopy = PETSC_FALSE; 1877 PetscFunctionReturn(0); 1878 } 1879 EXTERN_C_END 1880 1881 #undef __FUNC__ 1882 #define __FUNC__ "MatCreateMPIBAIJ" 1883 /*@C 1884 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1885 (block compressed row). For good matrix assembly performance 1886 the user should preallocate the matrix storage by setting the parameters 1887 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1888 performance can be increased by more than a factor of 50. 1889 1890 Collective on MPI_Comm 1891 1892 Input Parameters: 1893 + comm - MPI communicator 1894 . bs - size of blockk 1895 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1896 This value should be the same as the local size used in creating the 1897 y vector for the matrix-vector product y = Ax. 1898 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1899 This value should be the same as the local size used in creating the 1900 x vector for the matrix-vector product y = Ax. 1901 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1902 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1903 . d_nz - number of block nonzeros per block row in diagonal portion of local 1904 submatrix (same for all local rows) 1905 . d_nnz - array containing the number of block nonzeros in the various block rows 1906 of the in diagonal portion of the local (possibly different for each block 1907 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1908 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1909 submatrix (same for all local rows). 1910 - o_nnz - array containing the number of nonzeros in the various block rows of the 1911 off-diagonal portion of the local submatrix (possibly different for 1912 each block row) or PETSC_NULL. 1913 1914 Output Parameter: 1915 . A - the matrix 1916 1917 Options Database Keys: 1918 . -mat_no_unroll - uses code that does not unroll the loops in the 1919 block calculations (much slower) 1920 . -mat_block_size - size of the blocks to use 1921 . -mat_mpi - use the parallel matrix data structures even on one processor 1922 (defaults to using SeqBAIJ format on one processor) 1923 1924 Notes: 1925 The user MUST specify either the local or global matrix dimensions 1926 (possibly both). 1927 1928 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1929 than it must be used on all processors that share the object for that argument. 1930 1931 Storage Information: 1932 For a square global matrix we define each processor's diagonal portion 1933 to be its local rows and the corresponding columns (a square submatrix); 1934 each processor's off-diagonal portion encompasses the remainder of the 1935 local matrix (a rectangular submatrix). 1936 1937 The user can specify preallocated storage for the diagonal part of 1938 the local submatrix with either d_nz or d_nnz (not both). Set 1939 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1940 memory allocation. Likewise, specify preallocated storage for the 1941 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1942 1943 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1944 the figure below we depict these three local rows and all columns (0-11). 1945 1946 .vb 1947 0 1 2 3 4 5 6 7 8 9 10 11 1948 ------------------- 1949 row 3 | o o o d d d o o o o o o 1950 row 4 | o o o d d d o o o o o o 1951 row 5 | o o o d d d o o o o o o 1952 ------------------- 1953 .ve 1954 1955 Thus, any entries in the d locations are stored in the d (diagonal) 1956 submatrix, and any entries in the o locations are stored in the 1957 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1958 stored simply in the MATSEQBAIJ format for compressed row storage. 1959 1960 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1961 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1962 In general, for PDE problems in which most nonzeros are near the diagonal, 1963 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1964 or you will get TERRIBLE performance; see the users' manual chapter on 1965 matrices. 1966 1967 Level: intermediate 1968 1969 .keywords: matrix, block, aij, compressed row, sparse, parallel 1970 1971 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ() 1972 @*/ 1973 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1974 { 1975 Mat B; 1976 Mat_MPIBAIJ *b; 1977 int ierr,i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 1978 PetscTruth flag1,flag2,flg; 1979 1980 PetscFunctionBegin; 1981 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1982 1983 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1984 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -1: value %d",d_nz); 1985 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -1: value %d",o_nz); 1986 if (d_nnz) { 1987 for (i=0; i<m/bs; i++) { 1988 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1989 } 1990 } 1991 if (o_nnz) { 1992 for (i=0; i<m/bs; i++) { 1993 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1994 } 1995 } 1996 1997 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1998 ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr); 1999 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 2000 if (!flag1 && !flag2 && size == 1) { 2001 if (M == PETSC_DECIDE) M = m; 2002 if (N == PETSC_DECIDE) N = n; 2003 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 2004 PetscFunctionReturn(0); 2005 } 2006 2007 *A = 0; 2008 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 2009 PLogObjectCreate(B); 2010 B->data = (void*)(b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b); 2011 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2012 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2013 2014 B->ops->destroy = MatDestroy_MPIBAIJ; 2015 B->ops->view = MatView_MPIBAIJ; 2016 B->mapping = 0; 2017 B->factor = 0; 2018 B->assembled = PETSC_FALSE; 2019 2020 B->insertmode = NOT_SET_VALUES; 2021 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 2022 ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr); 2023 2024 if (m == PETSC_DECIDE && (d_nnz || o_nnz)) { 2025 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 2026 } 2027 if (M == PETSC_DECIDE && m == PETSC_DECIDE) { 2028 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 2029 } 2030 if (N == PETSC_DECIDE && n == PETSC_DECIDE) { 2031 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 2032 } 2033 if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 2034 if (N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 2035 2036 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 2037 work[0] = m; work[1] = n; 2038 mbs = m/bs; nbs = n/bs; 2039 ierr = MPI_Allreduce(work,sum,2,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2040 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 2041 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 2042 } 2043 if (m == PETSC_DECIDE) { 2044 Mbs = M/bs; 2045 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 2046 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 2047 m = mbs*bs; 2048 } 2049 if (n == PETSC_DECIDE) { 2050 Nbs = N/bs; 2051 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 2052 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 2053 n = nbs*bs; 2054 } 2055 if (mbs*bs != m || nbs*bs != n) { 2056 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 2057 } 2058 2059 b->m = m; B->m = m; 2060 b->n = n; B->n = n; 2061 b->N = N; B->N = N; 2062 b->M = M; B->M = M; 2063 b->bs = bs; 2064 b->bs2 = bs*bs; 2065 b->mbs = mbs; 2066 b->nbs = nbs; 2067 b->Mbs = Mbs; 2068 b->Nbs = Nbs; 2069 2070 /* the information in the maps duplicates the information computed below, eventually 2071 we should remove the duplicate information that is not contained in the maps */ 2072 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 2073 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 2074 2075 /* build local table of row and column ownerships */ 2076 b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 2077 PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2078 b->cowners = b->rowners + b->size + 2; 2079 b->rowners_bs = b->cowners + b->size + 2; 2080 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2081 b->rowners[0] = 0; 2082 for (i=2; i<=b->size; i++) { 2083 b->rowners[i] += b->rowners[i-1]; 2084 } 2085 for (i=0; i<=b->size; i++) { 2086 b->rowners_bs[i] = b->rowners[i]*bs; 2087 } 2088 b->rstart = b->rowners[b->rank]; 2089 b->rend = b->rowners[b->rank+1]; 2090 b->rstart_bs = b->rstart * bs; 2091 b->rend_bs = b->rend * bs; 2092 2093 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2094 b->cowners[0] = 0; 2095 for (i=2; i<=b->size; i++) { 2096 b->cowners[i] += b->cowners[i-1]; 2097 } 2098 b->cstart = b->cowners[b->rank]; 2099 b->cend = b->cowners[b->rank+1]; 2100 b->cstart_bs = b->cstart * bs; 2101 b->cend_bs = b->cend * bs; 2102 2103 2104 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2105 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2106 PLogObjectParent(B,b->A); 2107 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2108 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2109 PLogObjectParent(B,b->B); 2110 2111 /* build cache for off array entries formed */ 2112 ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr); 2113 ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr); 2114 b->donotstash = PETSC_FALSE; 2115 b->colmap = PETSC_NULL; 2116 b->garray = PETSC_NULL; 2117 b->roworiented = PETSC_TRUE; 2118 2119 /* stuff used in block assembly */ 2120 b->barray = 0; 2121 2122 /* stuff used for matrix vector multiply */ 2123 b->lvec = 0; 2124 b->Mvctx = 0; 2125 2126 /* stuff for MatGetRow() */ 2127 b->rowindices = 0; 2128 b->rowvalues = 0; 2129 b->getrowactive = PETSC_FALSE; 2130 2131 /* hash table stuff */ 2132 b->ht = 0; 2133 b->hd = 0; 2134 b->ht_size = 0; 2135 b->ht_flag = PETSC_FALSE; 2136 b->ht_fact = 0; 2137 b->ht_total_ct = 0; 2138 b->ht_insert_ct = 0; 2139 2140 *A = B; 2141 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2142 if (flg) { 2143 double fact = 1.39; 2144 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2145 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2146 if (fact <= 1.0) fact = 1.39; 2147 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2148 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2149 } 2150 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2151 "MatStoreValues_MPIBAIJ", 2152 (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2153 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2154 "MatRetrieveValues_MPIBAIJ", 2155 (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2156 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2157 "MatGetDiagonalBlock_MPIBAIJ", 2158 (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2159 PetscFunctionReturn(0); 2160 } 2161 2162 #undef __FUNC__ 2163 #define __FUNC__ "MatDuplicate_MPIBAIJ" 2164 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2165 { 2166 Mat mat; 2167 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2168 int ierr,len=0; 2169 PetscTruth flg; 2170 2171 PetscFunctionBegin; 2172 *newmat = 0; 2173 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 2174 PLogObjectCreate(mat); 2175 mat->data = (void*)(a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a); 2176 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2177 mat->ops->destroy = MatDestroy_MPIBAIJ; 2178 mat->ops->view = MatView_MPIBAIJ; 2179 mat->factor = matin->factor; 2180 mat->assembled = PETSC_TRUE; 2181 mat->insertmode = NOT_SET_VALUES; 2182 2183 a->m = mat->m = oldmat->m; 2184 a->n = mat->n = oldmat->n; 2185 a->M = mat->M = oldmat->M; 2186 a->N = mat->N = oldmat->N; 2187 2188 a->bs = oldmat->bs; 2189 a->bs2 = oldmat->bs2; 2190 a->mbs = oldmat->mbs; 2191 a->nbs = oldmat->nbs; 2192 a->Mbs = oldmat->Mbs; 2193 a->Nbs = oldmat->Nbs; 2194 2195 a->rstart = oldmat->rstart; 2196 a->rend = oldmat->rend; 2197 a->cstart = oldmat->cstart; 2198 a->cend = oldmat->cend; 2199 a->size = oldmat->size; 2200 a->rank = oldmat->rank; 2201 a->donotstash = oldmat->donotstash; 2202 a->roworiented = oldmat->roworiented; 2203 a->rowindices = 0; 2204 a->rowvalues = 0; 2205 a->getrowactive = PETSC_FALSE; 2206 a->barray = 0; 2207 a->rstart_bs = oldmat->rstart_bs; 2208 a->rend_bs = oldmat->rend_bs; 2209 a->cstart_bs = oldmat->cstart_bs; 2210 a->cend_bs = oldmat->cend_bs; 2211 2212 /* hash table stuff */ 2213 a->ht = 0; 2214 a->hd = 0; 2215 a->ht_size = 0; 2216 a->ht_flag = oldmat->ht_flag; 2217 a->ht_fact = oldmat->ht_fact; 2218 a->ht_total_ct = 0; 2219 a->ht_insert_ct = 0; 2220 2221 2222 a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 2223 PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2224 a->cowners = a->rowners + a->size + 2; 2225 a->rowners_bs = a->cowners + a->size + 2; 2226 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2227 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2228 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2229 if (oldmat->colmap) { 2230 #if defined (PETSC_USE_CTABLE) 2231 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2232 #else 2233 a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2234 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2235 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2236 #endif 2237 } else a->colmap = 0; 2238 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2239 a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray); 2240 PLogObjectMemory(mat,len*sizeof(int)); 2241 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2242 } else a->garray = 0; 2243 2244 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2245 PLogObjectParent(mat,a->lvec); 2246 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2247 2248 PLogObjectParent(mat,a->Mvctx); 2249 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2250 PLogObjectParent(mat,a->A); 2251 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2252 PLogObjectParent(mat,a->B); 2253 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 2254 ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2255 if (flg) { 2256 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 2257 } 2258 *newmat = mat; 2259 PetscFunctionReturn(0); 2260 } 2261 2262 #include "sys.h" 2263 2264 #undef __FUNC__ 2265 #define __FUNC__ "MatLoad_MPIBAIJ" 2266 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2267 { 2268 Mat A; 2269 int i,nz,ierr,j,rstart,rend,fd; 2270 Scalar *vals,*buf; 2271 MPI_Comm comm = ((PetscObject)viewer)->comm; 2272 MPI_Status status; 2273 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2274 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2275 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2276 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2277 int dcount,kmax,k,nzcount,tmp; 2278 2279 PetscFunctionBegin; 2280 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2281 2282 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2283 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2284 if (!rank) { 2285 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2286 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2287 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2288 if (header[3] < 0) { 2289 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2290 } 2291 } 2292 2293 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2294 M = header[1]; N = header[2]; 2295 2296 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2297 2298 /* 2299 This code adds extra rows to make sure the number of rows is 2300 divisible by the blocksize 2301 */ 2302 Mbs = M/bs; 2303 extra_rows = bs - M + bs*(Mbs); 2304 if (extra_rows == bs) extra_rows = 0; 2305 else Mbs++; 2306 if (extra_rows &&!rank) { 2307 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2308 } 2309 2310 /* determine ownership of all rows */ 2311 mbs = Mbs/size + ((Mbs % size) > rank); 2312 m = mbs * bs; 2313 rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners); 2314 browners = rowners + size + 1; 2315 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2316 rowners[0] = 0; 2317 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2318 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2319 rstart = rowners[rank]; 2320 rend = rowners[rank+1]; 2321 2322 /* distribute row lengths to all processors */ 2323 locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens); 2324 if (!rank) { 2325 rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2326 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2327 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2328 sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts); 2329 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2330 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2331 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2332 } else { 2333 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2334 } 2335 2336 if (!rank) { 2337 /* calculate the number of nonzeros on each processor */ 2338 procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz); 2339 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2340 for (i=0; i<size; i++) { 2341 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2342 procsnz[i] += rowlengths[j]; 2343 } 2344 } 2345 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2346 2347 /* determine max buffer needed and allocate it */ 2348 maxnz = 0; 2349 for (i=0; i<size; i++) { 2350 maxnz = PetscMax(maxnz,procsnz[i]); 2351 } 2352 cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols); 2353 2354 /* read in my part of the matrix column indices */ 2355 nz = procsnz[0]; 2356 ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 2357 mycols = ibuf; 2358 if (size == 1) nz -= extra_rows; 2359 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2360 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2361 2362 /* read in every ones (except the last) and ship off */ 2363 for (i=1; i<size-1; i++) { 2364 nz = procsnz[i]; 2365 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2366 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2367 } 2368 /* read in the stuff for the last proc */ 2369 if (size != 1) { 2370 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2371 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2372 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2373 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2374 } 2375 ierr = PetscFree(cols);CHKERRQ(ierr); 2376 } else { 2377 /* determine buffer space needed for message */ 2378 nz = 0; 2379 for (i=0; i<m; i++) { 2380 nz += locrowlens[i]; 2381 } 2382 ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 2383 mycols = ibuf; 2384 /* receive message of column indices*/ 2385 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2386 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2387 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2388 } 2389 2390 /* loop over local rows, determining number of off diagonal entries */ 2391 dlens = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens); 2392 odlens = dlens + (rend-rstart); 2393 mask = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask); 2394 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2395 masked1 = mask + Mbs; 2396 masked2 = masked1 + Mbs; 2397 rowcount = 0; nzcount = 0; 2398 for (i=0; i<mbs; i++) { 2399 dcount = 0; 2400 odcount = 0; 2401 for (j=0; j<bs; j++) { 2402 kmax = locrowlens[rowcount]; 2403 for (k=0; k<kmax; k++) { 2404 tmp = mycols[nzcount++]/bs; 2405 if (!mask[tmp]) { 2406 mask[tmp] = 1; 2407 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2408 else masked1[dcount++] = tmp; 2409 } 2410 } 2411 rowcount++; 2412 } 2413 2414 dlens[i] = dcount; 2415 odlens[i] = odcount; 2416 2417 /* zero out the mask elements we set */ 2418 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2419 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2420 } 2421 2422 /* create our matrix */ 2423 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 2424 A = *newmat; 2425 MatSetOption(A,MAT_COLUMNS_SORTED); 2426 2427 if (!rank) { 2428 buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf); 2429 /* read in my part of the matrix numerical values */ 2430 nz = procsnz[0]; 2431 vals = buf; 2432 mycols = ibuf; 2433 if (size == 1) nz -= extra_rows; 2434 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2435 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2436 2437 /* insert into matrix */ 2438 jj = rstart*bs; 2439 for (i=0; i<m; i++) { 2440 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2441 mycols += locrowlens[i]; 2442 vals += locrowlens[i]; 2443 jj++; 2444 } 2445 /* read in other processors (except the last one) and ship out */ 2446 for (i=1; i<size-1; i++) { 2447 nz = procsnz[i]; 2448 vals = buf; 2449 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2450 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2451 } 2452 /* the last proc */ 2453 if (size != 1){ 2454 nz = procsnz[i] - extra_rows; 2455 vals = buf; 2456 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2457 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2458 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2459 } 2460 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2461 } else { 2462 /* receive numeric values */ 2463 buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf); 2464 2465 /* receive message of values*/ 2466 vals = buf; 2467 mycols = ibuf; 2468 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2469 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2470 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2471 2472 /* insert into matrix */ 2473 jj = rstart*bs; 2474 for (i=0; i<m; i++) { 2475 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2476 mycols += locrowlens[i]; 2477 vals += locrowlens[i]; 2478 jj++; 2479 } 2480 } 2481 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2482 ierr = PetscFree(buf);CHKERRQ(ierr); 2483 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2484 ierr = PetscFree(rowners);CHKERRQ(ierr); 2485 ierr = PetscFree(dlens);CHKERRQ(ierr); 2486 ierr = PetscFree(mask);CHKERRQ(ierr); 2487 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2488 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2489 PetscFunctionReturn(0); 2490 } 2491 2492 #undef __FUNC__ 2493 #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2494 /*@ 2495 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2496 2497 Input Parameters: 2498 . mat - the matrix 2499 . fact - factor 2500 2501 Collective on Mat 2502 2503 Level: advanced 2504 2505 Notes: 2506 This can also be set by the command line option: -mat_use_hash_table fact 2507 2508 .keywords: matrix, hashtable, factor, HT 2509 2510 .seealso: MatSetOption() 2511 @*/ 2512 int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2513 { 2514 Mat_MPIBAIJ *baij; 2515 2516 PetscFunctionBegin; 2517 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2518 if (mat->type != MATMPIBAIJ) { 2519 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2520 } 2521 baij = (Mat_MPIBAIJ*)mat->data; 2522 baij->ht_fact = fact; 2523 PetscFunctionReturn(0); 2524 } 2525