1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpibaij.c,v 1.176 1999/09/14 18:35:25 bsmith Exp balay $"; 3 #endif 4 5 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "mat.h" I*/ 6 #include "src/vec/vecimpl.h" 7 8 extern int MatSetUpMultiply_MPIBAIJ(Mat); 9 extern int DisAssemble_MPIBAIJ(Mat); 10 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 11 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 12 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 13 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 15 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 16 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 17 extern int MatPrintHelp_SeqBAIJ(Mat); 18 19 EXTERN_C_BEGIN 20 #undef __FUNC__ 21 #define __FUNC__ "MatStoreValues_MPIBAIJ" 22 int MatStoreValues_MPIBAIJ(Mat mat) 23 { 24 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 25 int ierr; 26 27 PetscFunctionBegin; 28 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 29 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 30 PetscFunctionReturn(0); 31 } 32 EXTERN_C_END 33 34 EXTERN_C_BEGIN 35 #undef __FUNC__ 36 #define __FUNC__ "MatRetrieveValues_MPIBAIJ" 37 int MatRetrieveValues_MPIBAIJ(Mat mat) 38 { 39 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 40 int ierr; 41 42 PetscFunctionBegin; 43 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 44 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 45 PetscFunctionReturn(0); 46 } 47 EXTERN_C_END 48 49 /* 50 Local utility routine that creates a mapping from the global column 51 number to the local number in the off-diagonal part of the local 52 storage of the matrix. This is done in a non scable way since the 53 length of colmap equals the global matrix length. 54 */ 55 #undef __FUNC__ 56 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 57 static int CreateColmap_MPIBAIJ_Private(Mat mat) 58 { 59 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 60 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 61 int nbs = B->nbs,i,bs=B->bs,ierr; 62 63 PetscFunctionBegin; 64 #if defined (PETSC_USE_CTABLE) 65 ierr = TableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr); 66 for ( i=0; i<nbs; i++ ){ 67 ierr = TableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 68 } 69 #else 70 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 71 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 72 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 73 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 74 #endif 75 PetscFunctionReturn(0); 76 } 77 78 #define CHUNKSIZE 10 79 80 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 81 { \ 82 \ 83 brow = row/bs; \ 84 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 85 rmax = aimax[brow]; nrow = ailen[brow]; \ 86 bcol = col/bs; \ 87 ridx = row % bs; cidx = col % bs; \ 88 low = 0; high = nrow; \ 89 while (high-low > 3) { \ 90 t = (low+high)/2; \ 91 if (rp[t] > bcol) high = t; \ 92 else low = t; \ 93 } \ 94 for ( _i=low; _i<high; _i++ ) { \ 95 if (rp[_i] > bcol) break; \ 96 if (rp[_i] == bcol) { \ 97 bap = ap + bs2*_i + bs*cidx + ridx; \ 98 if (addv == ADD_VALUES) *bap += value; \ 99 else *bap = value; \ 100 goto a_noinsert; \ 101 } \ 102 } \ 103 if (a->nonew == 1) goto a_noinsert; \ 104 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 105 if (nrow >= rmax) { \ 106 /* there is no extra room in row, therefore enlarge */ \ 107 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 108 Scalar *new_a; \ 109 \ 110 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 111 \ 112 /* malloc new storage space */ \ 113 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 114 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 115 new_j = (int *) (new_a + bs2*new_nz); \ 116 new_i = new_j + new_nz; \ 117 \ 118 /* copy over old data into new slots */ \ 119 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 120 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 121 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 122 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 123 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 124 len*sizeof(int));CHKERRQ(ierr); \ 125 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \ 126 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 127 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 128 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr); \ 129 /* free up old matrix storage */ \ 130 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 131 if (!a->singlemalloc) { \ 132 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 133 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 134 } \ 135 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 136 a->singlemalloc = 1; \ 137 \ 138 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 139 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 140 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 141 a->maxnz += bs2*CHUNKSIZE; \ 142 a->reallocs++; \ 143 a->nz++; \ 144 } \ 145 N = nrow++ - 1; \ 146 /* shift up all the later entries in this row */ \ 147 for ( ii=N; ii>=_i; ii-- ) { \ 148 rp[ii+1] = rp[ii]; \ 149 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \ 150 } \ 151 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr); } \ 152 rp[_i] = bcol; \ 153 ap[bs2*_i + bs*cidx + ridx] = value; \ 154 a_noinsert:; \ 155 ailen[brow] = nrow; \ 156 } 157 158 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 159 { \ 160 \ 161 brow = row/bs; \ 162 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 163 rmax = bimax[brow]; nrow = bilen[brow]; \ 164 bcol = col/bs; \ 165 ridx = row % bs; cidx = col % bs; \ 166 low = 0; high = nrow; \ 167 while (high-low > 3) { \ 168 t = (low+high)/2; \ 169 if (rp[t] > bcol) high = t; \ 170 else low = t; \ 171 } \ 172 for ( _i=low; _i<high; _i++ ) { \ 173 if (rp[_i] > bcol) break; \ 174 if (rp[_i] == bcol) { \ 175 bap = ap + bs2*_i + bs*cidx + ridx; \ 176 if (addv == ADD_VALUES) *bap += value; \ 177 else *bap = value; \ 178 goto b_noinsert; \ 179 } \ 180 } \ 181 if (b->nonew == 1) goto b_noinsert; \ 182 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 183 if (nrow >= rmax) { \ 184 /* there is no extra room in row, therefore enlarge */ \ 185 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 186 Scalar *new_a; \ 187 \ 188 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 189 \ 190 /* malloc new storage space */ \ 191 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 192 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 193 new_j = (int *) (new_a + bs2*new_nz); \ 194 new_i = new_j + new_nz; \ 195 \ 196 /* copy over old data into new slots */ \ 197 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 198 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 199 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 200 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 201 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 202 len*sizeof(int));CHKERRQ(ierr); \ 203 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \ 204 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 205 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 206 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr); \ 207 /* free up old matrix storage */ \ 208 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 209 if (!b->singlemalloc) { \ 210 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 211 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 212 } \ 213 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 214 b->singlemalloc = 1; \ 215 \ 216 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 217 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 218 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 219 b->maxnz += bs2*CHUNKSIZE; \ 220 b->reallocs++; \ 221 b->nz++; \ 222 } \ 223 N = nrow++ - 1; \ 224 /* shift up all the later entries in this row */ \ 225 for ( ii=N; ii>=_i; ii-- ) { \ 226 rp[ii+1] = rp[ii]; \ 227 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \ 228 } \ 229 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr);} \ 230 rp[_i] = bcol; \ 231 ap[bs2*_i + bs*cidx + ridx] = value; \ 232 b_noinsert:; \ 233 bilen[brow] = nrow; \ 234 } 235 236 #undef __FUNC__ 237 #define __FUNC__ "MatSetValues_MPIBAIJ" 238 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 239 { 240 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 241 Scalar value; 242 int ierr,i,j,row,col; 243 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 244 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 245 int cend_orig=baij->cend_bs,bs=baij->bs; 246 247 /* Some Variables required in the macro */ 248 Mat A = baij->A; 249 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 250 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 251 Scalar *aa=a->a; 252 253 Mat B = baij->B; 254 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 255 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 256 Scalar *ba=b->a; 257 258 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 259 int low,high,t,ridx,cidx,bs2=a->bs2; 260 Scalar *ap,*bap; 261 262 PetscFunctionBegin; 263 for ( i=0; i<m; i++ ) { 264 if (im[i] < 0) continue; 265 #if defined(PETSC_USE_BOPT_g) 266 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 267 #endif 268 if (im[i] >= rstart_orig && im[i] < rend_orig) { 269 row = im[i] - rstart_orig; 270 for ( j=0; j<n; j++ ) { 271 if (in[j] >= cstart_orig && in[j] < cend_orig){ 272 col = in[j] - cstart_orig; 273 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 274 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 275 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 276 } else if (in[j] < 0) continue; 277 #if defined(PETSC_USE_BOPT_g) 278 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 279 #endif 280 else { 281 if (mat->was_assembled) { 282 if (!baij->colmap) { 283 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 284 } 285 #if defined (PETSC_USE_CTABLE) 286 ierr = TableFind(baij->colmap, in[j]/bs + 1,&col);CHKERRQ(ierr); 287 col = col - 1 + in[j]%bs; 288 #else 289 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 290 #endif 291 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 292 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 293 col = in[j]; 294 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 295 B = baij->B; 296 b = (Mat_SeqBAIJ *) (B)->data; 297 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 298 ba=b->a; 299 } 300 } else col = in[j]; 301 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 302 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 303 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 304 } 305 } 306 } else { 307 if ( !baij->donotstash) { 308 if (roworiented) { 309 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 310 } else { 311 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 312 } 313 } 314 } 315 } 316 PetscFunctionReturn(0); 317 } 318 319 #undef __FUNC__ 320 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 321 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 322 { 323 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 324 Scalar *value,*barray=baij->barray; 325 int ierr,i,j,ii,jj,row,col; 326 int roworiented = baij->roworiented,rstart=baij->rstart ; 327 int rend=baij->rend,cstart=baij->cstart,stepval; 328 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 329 330 if(!barray) { 331 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar));CHKPTRQ(barray); 332 } 333 334 if (roworiented) { 335 stepval = (n-1)*bs; 336 } else { 337 stepval = (m-1)*bs; 338 } 339 for ( i=0; i<m; i++ ) { 340 if (im[i] < 0) continue; 341 #if defined(PETSC_USE_BOPT_g) 342 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 343 #endif 344 if (im[i] >= rstart && im[i] < rend) { 345 row = im[i] - rstart; 346 for ( j=0; j<n; j++ ) { 347 /* If NumCol = 1 then a copy is not required */ 348 if ((roworiented) && (n == 1)) { 349 barray = v + i*bs2; 350 } else if((!roworiented) && (m == 1)) { 351 barray = v + j*bs2; 352 } else { /* Here a copy is required */ 353 if (roworiented) { 354 value = v + i*(stepval+bs)*bs + j*bs; 355 } else { 356 value = v + j*(stepval+bs)*bs + i*bs; 357 } 358 for ( ii=0; ii<bs; ii++,value+=stepval ) { 359 for (jj=0; jj<bs; jj++ ) { 360 *barray++ = *value++; 361 } 362 } 363 barray -=bs2; 364 } 365 366 if (in[j] >= cstart && in[j] < cend){ 367 col = in[j] - cstart; 368 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 369 } 370 else if (in[j] < 0) continue; 371 #if defined(PETSC_USE_BOPT_g) 372 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 373 #endif 374 else { 375 if (mat->was_assembled) { 376 if (!baij->colmap) { 377 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 378 } 379 380 #if defined(PETSC_USE_BOPT_g) 381 #if defined (PETSC_USE_CTABLE) 382 { int data; 383 ierr = TableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 384 if((data - 1) % bs) 385 {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 386 } 387 #else 388 if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 389 #endif 390 #endif 391 #if defined (PETSC_USE_CTABLE) 392 ierr = TableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 393 col = (col - 1)/bs; 394 #else 395 col = (baij->colmap[in[j]] - 1)/bs; 396 #endif 397 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 398 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 399 col = in[j]; 400 } 401 } 402 else col = in[j]; 403 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 404 } 405 } 406 } else { 407 if (!baij->donotstash) { 408 if (roworiented) { 409 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 410 } else { 411 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 412 } 413 } 414 } 415 } 416 PetscFunctionReturn(0); 417 } 418 #define HASH_KEY 0.6180339887 419 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 420 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 421 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 422 #undef __FUNC__ 423 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 424 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 425 { 426 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 427 int ierr,i,j,row,col; 428 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 429 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 430 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 431 double tmp; 432 Scalar ** HD = baij->hd,value; 433 #if defined(PETSC_USE_BOPT_g) 434 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 435 #endif 436 437 PetscFunctionBegin; 438 439 for ( i=0; i<m; i++ ) { 440 #if defined(PETSC_USE_BOPT_g) 441 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 442 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 443 #endif 444 row = im[i]; 445 if (row >= rstart_orig && row < rend_orig) { 446 for ( j=0; j<n; j++ ) { 447 col = in[j]; 448 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 449 /* Look up into the Hash Table */ 450 key = (row/bs)*Nbs+(col/bs)+1; 451 h1 = HASH(size,key,tmp); 452 453 454 idx = h1; 455 #if defined(PETSC_USE_BOPT_g) 456 insert_ct++; 457 total_ct++; 458 if (HT[idx] != key) { 459 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 460 if (idx == size) { 461 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 462 if (idx == h1) { 463 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 464 } 465 } 466 } 467 #else 468 if (HT[idx] != key) { 469 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 470 if (idx == size) { 471 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 472 if (idx == h1) { 473 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 474 } 475 } 476 } 477 #endif 478 /* A HASH table entry is found, so insert the values at the correct address */ 479 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 480 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 481 } 482 } else { 483 if (!baij->donotstash) { 484 if (roworiented) { 485 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 486 } else { 487 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 488 } 489 } 490 } 491 } 492 #if defined(PETSC_USE_BOPT_g) 493 baij->ht_total_ct = total_ct; 494 baij->ht_insert_ct = insert_ct; 495 #endif 496 PetscFunctionReturn(0); 497 } 498 499 #undef __FUNC__ 500 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 501 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 502 { 503 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 504 int ierr,i,j,ii,jj,row,col; 505 int roworiented = baij->roworiented,rstart=baij->rstart ; 506 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 507 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 508 double tmp; 509 Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 510 #if defined(PETSC_USE_BOPT_g) 511 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 512 #endif 513 514 PetscFunctionBegin; 515 516 if (roworiented) { 517 stepval = (n-1)*bs; 518 } else { 519 stepval = (m-1)*bs; 520 } 521 for ( i=0; i<m; i++ ) { 522 #if defined(PETSC_USE_BOPT_g) 523 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 524 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 525 #endif 526 row = im[i]; 527 v_t = v + i*bs2; 528 if (row >= rstart && row < rend) { 529 for ( j=0; j<n; j++ ) { 530 col = in[j]; 531 532 /* Look up into the Hash Table */ 533 key = row*Nbs+col+1; 534 h1 = HASH(size,key,tmp); 535 536 idx = h1; 537 #if defined(PETSC_USE_BOPT_g) 538 total_ct++; 539 insert_ct++; 540 if (HT[idx] != key) { 541 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 542 if (idx == size) { 543 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 544 if (idx == h1) { 545 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 546 } 547 } 548 } 549 #else 550 if (HT[idx] != key) { 551 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 552 if (idx == size) { 553 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 554 if (idx == h1) { 555 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 556 } 557 } 558 } 559 #endif 560 baij_a = HD[idx]; 561 if (roworiented) { 562 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 563 /* value = v + (i*(stepval+bs)+j)*bs; */ 564 value = v_t; 565 v_t += bs; 566 if (addv == ADD_VALUES) { 567 for ( ii=0; ii<bs; ii++,value+=stepval) { 568 for ( jj=ii; jj<bs2; jj+=bs ) { 569 baij_a[jj] += *value++; 570 } 571 } 572 } else { 573 for ( ii=0; ii<bs; ii++,value+=stepval) { 574 for ( jj=ii; jj<bs2; jj+=bs ) { 575 baij_a[jj] = *value++; 576 } 577 } 578 } 579 } else { 580 value = v + j*(stepval+bs)*bs + i*bs; 581 if (addv == ADD_VALUES) { 582 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 583 for ( jj=0; jj<bs; jj++ ) { 584 baij_a[jj] += *value++; 585 } 586 } 587 } else { 588 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 589 for ( jj=0; jj<bs; jj++ ) { 590 baij_a[jj] = *value++; 591 } 592 } 593 } 594 } 595 } 596 } else { 597 if (!baij->donotstash) { 598 if (roworiented) { 599 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 600 } else { 601 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 602 } 603 } 604 } 605 } 606 #if defined(PETSC_USE_BOPT_g) 607 baij->ht_total_ct = total_ct; 608 baij->ht_insert_ct = insert_ct; 609 #endif 610 PetscFunctionReturn(0); 611 } 612 613 #undef __FUNC__ 614 #define __FUNC__ "MatGetValues_MPIBAIJ" 615 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 616 { 617 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 618 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 619 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data; 620 621 PetscFunctionBegin; 622 for ( i=0; i<m; i++ ) { 623 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 624 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 625 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 626 row = idxm[i] - bsrstart; 627 for ( j=0; j<n; j++ ) { 628 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 629 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 630 if (idxn[j] >= bscstart && idxn[j] < bscend){ 631 col = idxn[j] - bscstart; 632 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 633 } else { 634 if (!baij->colmap) { 635 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 636 } 637 #if defined (PETSC_USE_CTABLE) 638 ierr = TableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 639 data --; 640 #else 641 data = baij->colmap[idxn[j]/bs]-1; 642 #endif 643 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 644 else { 645 col = data + idxn[j]%bs; 646 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 647 } 648 } 649 } 650 } else { 651 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 652 } 653 } 654 PetscFunctionReturn(0); 655 } 656 657 #undef __FUNC__ 658 #define __FUNC__ "MatNorm_MPIBAIJ" 659 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 660 { 661 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 662 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 663 int ierr, i,bs2=baij->bs2; 664 double sum = 0.0; 665 Scalar *v; 666 667 PetscFunctionBegin; 668 if (baij->size == 1) { 669 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 670 } else { 671 if (type == NORM_FROBENIUS) { 672 v = amat->a; 673 for (i=0; i<amat->nz*bs2; i++ ) { 674 #if defined(PETSC_USE_COMPLEX) 675 sum += PetscReal(PetscConj(*v)*(*v)); v++; 676 #else 677 sum += (*v)*(*v); v++; 678 #endif 679 } 680 v = bmat->a; 681 for (i=0; i<bmat->nz*bs2; i++ ) { 682 #if defined(PETSC_USE_COMPLEX) 683 sum += PetscReal(PetscConj(*v)*(*v)); v++; 684 #else 685 sum += (*v)*(*v); v++; 686 #endif 687 } 688 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 689 *norm = sqrt(*norm); 690 } else { 691 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 692 } 693 } 694 PetscFunctionReturn(0); 695 } 696 697 698 /* 699 Creates the hash table, and sets the table 700 This table is created only once. 701 If new entried need to be added to the matrix 702 then the hash table has to be destroyed and 703 recreated. 704 */ 705 #undef __FUNC__ 706 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 707 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 708 { 709 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 710 Mat A = baij->A, B=baij->B; 711 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 712 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 713 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 714 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 715 int *HT,key; 716 Scalar **HD; 717 double tmp; 718 #if defined(PETSC_USE_BOPT_g) 719 int ct=0,max=0; 720 #endif 721 722 PetscFunctionBegin; 723 baij->ht_size=(int)(factor*nz); 724 size = baij->ht_size; 725 726 if (baij->ht) { 727 PetscFunctionReturn(0); 728 } 729 730 /* Allocate Memory for Hash Table */ 731 baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1);CHKPTRQ(baij->hd); 732 baij->ht = (int*)(baij->hd + size); 733 HD = baij->hd; 734 HT = baij->ht; 735 736 737 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr); 738 739 740 /* Loop Over A */ 741 for ( i=0; i<a->mbs; i++ ) { 742 for ( j=ai[i]; j<ai[i+1]; j++ ) { 743 row = i+rstart; 744 col = aj[j]+cstart; 745 746 key = row*Nbs + col + 1; 747 h1 = HASH(size,key,tmp); 748 for ( k=0; k<size; k++ ){ 749 if (HT[(h1+k)%size] == 0.0) { 750 HT[(h1+k)%size] = key; 751 HD[(h1+k)%size] = a->a + j*bs2; 752 break; 753 #if defined(PETSC_USE_BOPT_g) 754 } else { 755 ct++; 756 #endif 757 } 758 } 759 #if defined(PETSC_USE_BOPT_g) 760 if (k> max) max = k; 761 #endif 762 } 763 } 764 /* Loop Over B */ 765 for ( i=0; i<b->mbs; i++ ) { 766 for ( j=bi[i]; j<bi[i+1]; j++ ) { 767 row = i+rstart; 768 col = garray[bj[j]]; 769 key = row*Nbs + col + 1; 770 h1 = HASH(size,key,tmp); 771 for ( k=0; k<size; k++ ){ 772 if (HT[(h1+k)%size] == 0.0) { 773 HT[(h1+k)%size] = key; 774 HD[(h1+k)%size] = b->a + j*bs2; 775 break; 776 #if defined(PETSC_USE_BOPT_g) 777 } else { 778 ct++; 779 #endif 780 } 781 } 782 #if defined(PETSC_USE_BOPT_g) 783 if (k> max) max = k; 784 #endif 785 } 786 } 787 788 /* Print Summary */ 789 #if defined(PETSC_USE_BOPT_g) 790 for ( i=0,j=0; i<size; i++) 791 if (HT[i]) {j++;} 792 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n", 793 (j== 0)? 0.0:((double)(ct+j))/j,max); 794 #endif 795 PetscFunctionReturn(0); 796 } 797 798 #undef __FUNC__ 799 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 800 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 801 { 802 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 803 int ierr,nstash,reallocs; 804 InsertMode addv; 805 806 PetscFunctionBegin; 807 if (baij->donotstash) { 808 PetscFunctionReturn(0); 809 } 810 811 /* make sure all processors are either in INSERTMODE or ADDMODE */ 812 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 813 if (addv == (ADD_VALUES|INSERT_VALUES)) { 814 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 815 } 816 mat->insertmode = addv; /* in case this processor had no cache */ 817 818 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 819 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 820 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 821 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 822 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 823 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 824 PetscFunctionReturn(0); 825 } 826 827 #undef __FUNC__ 828 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 829 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 830 { 831 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data; 832 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 833 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 834 int *row,*col,other_disassembled,r1,r2,r3; 835 Scalar *val; 836 InsertMode addv = mat->insertmode; 837 838 PetscFunctionBegin; 839 if (!baij->donotstash) { 840 while (1) { 841 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 842 if (!flg) break; 843 844 for ( i=0; i<n; ) { 845 /* Now identify the consecutive vals belonging to the same row */ 846 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 847 if (j < n) ncols = j-i; 848 else ncols = n-i; 849 /* Now assemble all these values with a single function call */ 850 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 851 i = j; 852 } 853 } 854 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 855 /* Now process the block-stash. Since the values are stashed column-oriented, 856 set the roworiented flag to column oriented, and after MatSetValues() 857 restore the original flags */ 858 r1 = baij->roworiented; 859 r2 = a->roworiented; 860 r3 = b->roworiented; 861 baij->roworiented = 0; 862 a->roworiented = 0; 863 b->roworiented = 0; 864 while (1) { 865 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 866 if (!flg) break; 867 868 for ( i=0; i<n; ) { 869 /* Now identify the consecutive vals belonging to the same row */ 870 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 871 if (j < n) ncols = j-i; 872 else ncols = n-i; 873 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 874 i = j; 875 } 876 } 877 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 878 baij->roworiented = r1; 879 a->roworiented = r2; 880 b->roworiented = r3; 881 } 882 883 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 884 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 885 886 /* determine if any processor has disassembled, if so we must 887 also disassemble ourselfs, in order that we may reassemble. */ 888 /* 889 if nonzero structure of submatrix B cannot change then we know that 890 no processor disassembled thus we can skip this stuff 891 */ 892 if (!((Mat_SeqBAIJ*) baij->B->data)->nonew) { 893 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 894 if (mat->was_assembled && !other_disassembled) { 895 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 896 } 897 } 898 899 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 900 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 901 } 902 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 903 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 904 905 #if defined(PETSC_USE_BOPT_g) 906 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 907 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 908 ((double)baij->ht_total_ct)/baij->ht_insert_ct); 909 baij->ht_total_ct = 0; 910 baij->ht_insert_ct = 0; 911 } 912 #endif 913 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 914 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 915 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 916 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 917 } 918 919 if (baij->rowvalues) { 920 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 921 baij->rowvalues = 0; 922 } 923 PetscFunctionReturn(0); 924 } 925 926 /* 927 #undef __FUNC__ 928 #define __FUNC__ "MatView_MPIBAIJ_Binary" 929 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 930 { 931 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 932 int ierr; 933 934 PetscFunctionBegin; 935 if (baij->size == 1) { 936 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 937 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 938 PetscFunctionReturn(0); 939 } 940 */ 941 942 #undef __FUNC__ 943 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 944 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 945 { 946 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 947 int ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank; 948 FILE *fd; 949 ViewerType vtype; 950 951 PetscFunctionBegin; 952 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 953 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 954 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 955 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 956 MatInfo info; 957 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 958 ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr); 959 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 960 ierr = PetscSequentialPhaseBegin(mat->comm,1);CHKERRQ(ierr); 961 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 962 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 963 baij->bs,(int)info.memory); 964 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 965 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 966 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 967 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 968 fflush(fd); 969 ierr = PetscSequentialPhaseEnd(mat->comm,1);CHKERRQ(ierr); 970 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 971 PetscFunctionReturn(0); 972 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 973 ierr = PetscPrintf(mat->comm," block size is %d\n",bs);CHKERRQ(ierr); 974 PetscFunctionReturn(0); 975 } 976 } 977 978 if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 979 Draw draw; 980 PetscTruth isnull; 981 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 982 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 983 } 984 985 if (size == 1) { 986 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 987 } else { 988 /* assemble the entire matrix onto first processor. */ 989 Mat A; 990 Mat_SeqBAIJ *Aloc; 991 int M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals; 992 int mbs = baij->mbs; 993 Scalar *a; 994 995 if (!rank) { 996 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 997 } else { 998 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 999 } 1000 PLogObjectParent(mat,A); 1001 1002 /* copy over the A part */ 1003 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1004 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1005 rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1006 1007 for ( i=0; i<mbs; i++ ) { 1008 rvals[0] = bs*(baij->rstart + i); 1009 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1010 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1011 col = (baij->cstart+aj[j])*bs; 1012 for (k=0; k<bs; k++ ) { 1013 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1014 col++; a += bs; 1015 } 1016 } 1017 } 1018 /* copy over the B part */ 1019 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1020 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1021 for ( i=0; i<mbs; i++ ) { 1022 rvals[0] = bs*(baij->rstart + i); 1023 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1024 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1025 col = baij->garray[aj[j]]*bs; 1026 for (k=0; k<bs; k++ ) { 1027 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1028 col++; a += bs; 1029 } 1030 } 1031 } 1032 ierr = PetscFree(rvals);CHKERRQ(ierr); 1033 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1034 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1035 /* 1036 Everyone has to call to draw the matrix since the graphics waits are 1037 synchronized across all processors that share the Draw object 1038 */ 1039 if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 1040 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer);CHKERRQ(ierr); 1041 } 1042 ierr = MatDestroy(A);CHKERRQ(ierr); 1043 } 1044 PetscFunctionReturn(0); 1045 } 1046 1047 1048 1049 #undef __FUNC__ 1050 #define __FUNC__ "MatView_MPIBAIJ" 1051 int MatView_MPIBAIJ(Mat mat,Viewer viewer) 1052 { 1053 int ierr; 1054 ViewerType vtype; 1055 1056 PetscFunctionBegin; 1057 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 1058 if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 1059 PetscTypeCompare(vtype,SOCKET_VIEWER)|| PetscTypeCompare(vtype,BINARY_VIEWER)) { 1060 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1061 /* 1062 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 1063 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1064 */ 1065 } else { 1066 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 1067 } 1068 PetscFunctionReturn(0); 1069 } 1070 1071 #undef __FUNC__ 1072 #define __FUNC__ "MatDestroy_MPIBAIJ" 1073 int MatDestroy_MPIBAIJ(Mat mat) 1074 { 1075 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1076 int ierr; 1077 1078 PetscFunctionBegin; 1079 1080 if (mat->mapping) { 1081 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 1082 } 1083 if (mat->bmapping) { 1084 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 1085 } 1086 if (mat->rmap) { 1087 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 1088 } 1089 if (mat->cmap) { 1090 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 1091 } 1092 #if defined(PETSC_USE_LOG) 1093 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N); 1094 #endif 1095 1096 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1097 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1098 1099 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1100 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1101 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1102 #if defined (PETSC_USE_CTABLE) 1103 if (baij->colmap) {ierr = TableDelete(baij->colmap);CHKERRQ(ierr);} 1104 #else 1105 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1106 #endif 1107 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1108 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1109 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1110 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1111 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1112 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1113 ierr = PetscFree(baij);CHKERRQ(ierr); 1114 PLogObjectDestroy(mat); 1115 PetscHeaderDestroy(mat); 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNC__ 1120 #define __FUNC__ "MatMult_MPIBAIJ" 1121 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1122 { 1123 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1124 int ierr, nt; 1125 1126 PetscFunctionBegin; 1127 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1128 if (nt != a->n) { 1129 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1130 } 1131 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1132 if (nt != a->m) { 1133 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1134 } 1135 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1136 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1137 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1138 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1139 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1140 PetscFunctionReturn(0); 1141 } 1142 1143 #undef __FUNC__ 1144 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1145 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1146 { 1147 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1148 int ierr; 1149 1150 PetscFunctionBegin; 1151 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1152 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1153 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1154 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1155 PetscFunctionReturn(0); 1156 } 1157 1158 #undef __FUNC__ 1159 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1160 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1161 { 1162 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1163 int ierr; 1164 1165 PetscFunctionBegin; 1166 /* do nondiagonal part */ 1167 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 1168 /* send it on its way */ 1169 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1170 /* do local part */ 1171 ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr); 1172 /* receive remote parts: note this assumes the values are not actually */ 1173 /* inserted in yy until the next line, which is true for my implementation*/ 1174 /* but is not perhaps always true. */ 1175 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1176 PetscFunctionReturn(0); 1177 } 1178 1179 #undef __FUNC__ 1180 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1181 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1182 { 1183 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1184 int ierr; 1185 1186 PetscFunctionBegin; 1187 /* do nondiagonal part */ 1188 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 1189 /* send it on its way */ 1190 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1191 /* do local part */ 1192 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1193 /* receive remote parts: note this assumes the values are not actually */ 1194 /* inserted in yy until the next line, which is true for my implementation*/ 1195 /* but is not perhaps always true. */ 1196 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1197 PetscFunctionReturn(0); 1198 } 1199 1200 /* 1201 This only works correctly for square matrices where the subblock A->A is the 1202 diagonal block 1203 */ 1204 #undef __FUNC__ 1205 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1206 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1207 { 1208 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1209 int ierr; 1210 1211 PetscFunctionBegin; 1212 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1213 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1214 PetscFunctionReturn(0); 1215 } 1216 1217 #undef __FUNC__ 1218 #define __FUNC__ "MatScale_MPIBAIJ" 1219 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1220 { 1221 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1222 int ierr; 1223 1224 PetscFunctionBegin; 1225 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1226 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1227 PetscFunctionReturn(0); 1228 } 1229 1230 #undef __FUNC__ 1231 #define __FUNC__ "MatGetSize_MPIBAIJ" 1232 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1233 { 1234 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1235 1236 PetscFunctionBegin; 1237 if (m) *m = mat->M; 1238 if (n) *n = mat->N; 1239 PetscFunctionReturn(0); 1240 } 1241 1242 #undef __FUNC__ 1243 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1244 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1245 { 1246 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1247 1248 PetscFunctionBegin; 1249 *m = mat->m; *n = mat->n; 1250 PetscFunctionReturn(0); 1251 } 1252 1253 #undef __FUNC__ 1254 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1255 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1256 { 1257 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1258 1259 PetscFunctionBegin; 1260 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1261 PetscFunctionReturn(0); 1262 } 1263 1264 #undef __FUNC__ 1265 #define __FUNC__ "MatGetRow_MPIBAIJ" 1266 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1267 { 1268 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1269 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1270 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1271 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1272 int *cmap, *idx_p,cstart = mat->cstart; 1273 1274 PetscFunctionBegin; 1275 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1276 mat->getrowactive = PETSC_TRUE; 1277 1278 if (!mat->rowvalues && (idx || v)) { 1279 /* 1280 allocate enough space to hold information from the longest row. 1281 */ 1282 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1283 int max = 1,mbs = mat->mbs,tmp; 1284 for ( i=0; i<mbs; i++ ) { 1285 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1286 if (max < tmp) { max = tmp; } 1287 } 1288 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1289 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1290 } 1291 1292 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1293 lrow = row - brstart; 1294 1295 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1296 if (!v) {pvA = 0; pvB = 0;} 1297 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1298 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1299 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1300 nztot = nzA + nzB; 1301 1302 cmap = mat->garray; 1303 if (v || idx) { 1304 if (nztot) { 1305 /* Sort by increasing column numbers, assuming A and B already sorted */ 1306 int imark = -1; 1307 if (v) { 1308 *v = v_p = mat->rowvalues; 1309 for ( i=0; i<nzB; i++ ) { 1310 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1311 else break; 1312 } 1313 imark = i; 1314 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1315 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1316 } 1317 if (idx) { 1318 *idx = idx_p = mat->rowindices; 1319 if (imark > -1) { 1320 for ( i=0; i<imark; i++ ) { 1321 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1322 } 1323 } else { 1324 for ( i=0; i<nzB; i++ ) { 1325 if (cmap[cworkB[i]/bs] < cstart) 1326 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1327 else break; 1328 } 1329 imark = i; 1330 } 1331 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1332 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1333 } 1334 } else { 1335 if (idx) *idx = 0; 1336 if (v) *v = 0; 1337 } 1338 } 1339 *nz = nztot; 1340 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1341 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1342 PetscFunctionReturn(0); 1343 } 1344 1345 #undef __FUNC__ 1346 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1347 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1348 { 1349 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1350 1351 PetscFunctionBegin; 1352 if (baij->getrowactive == PETSC_FALSE) { 1353 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1354 } 1355 baij->getrowactive = PETSC_FALSE; 1356 PetscFunctionReturn(0); 1357 } 1358 1359 #undef __FUNC__ 1360 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1361 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1362 { 1363 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1364 1365 PetscFunctionBegin; 1366 *bs = baij->bs; 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #undef __FUNC__ 1371 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1372 int MatZeroEntries_MPIBAIJ(Mat A) 1373 { 1374 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1375 int ierr; 1376 1377 PetscFunctionBegin; 1378 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1379 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1380 PetscFunctionReturn(0); 1381 } 1382 1383 #undef __FUNC__ 1384 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1385 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1386 { 1387 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1388 Mat A = a->A, B = a->B; 1389 int ierr; 1390 double isend[5], irecv[5]; 1391 1392 PetscFunctionBegin; 1393 info->block_size = (double)a->bs; 1394 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1395 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1396 isend[3] = info->memory; isend[4] = info->mallocs; 1397 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1398 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1399 isend[3] += info->memory; isend[4] += info->mallocs; 1400 if (flag == MAT_LOCAL) { 1401 info->nz_used = isend[0]; 1402 info->nz_allocated = isend[1]; 1403 info->nz_unneeded = isend[2]; 1404 info->memory = isend[3]; 1405 info->mallocs = isend[4]; 1406 } else if (flag == MAT_GLOBAL_MAX) { 1407 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1408 info->nz_used = irecv[0]; 1409 info->nz_allocated = irecv[1]; 1410 info->nz_unneeded = irecv[2]; 1411 info->memory = irecv[3]; 1412 info->mallocs = irecv[4]; 1413 } else if (flag == MAT_GLOBAL_SUM) { 1414 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1415 info->nz_used = irecv[0]; 1416 info->nz_allocated = irecv[1]; 1417 info->nz_unneeded = irecv[2]; 1418 info->memory = irecv[3]; 1419 info->mallocs = irecv[4]; 1420 } else { 1421 SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 1422 } 1423 info->rows_global = (double)a->M; 1424 info->columns_global = (double)a->N; 1425 info->rows_local = (double)a->m; 1426 info->columns_local = (double)a->N; 1427 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1428 info->fill_ratio_needed = 0; 1429 info->factor_mallocs = 0; 1430 PetscFunctionReturn(0); 1431 } 1432 1433 #undef __FUNC__ 1434 #define __FUNC__ "MatSetOption_MPIBAIJ" 1435 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1436 { 1437 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1438 int ierr; 1439 1440 PetscFunctionBegin; 1441 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1442 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1443 op == MAT_COLUMNS_UNSORTED || 1444 op == MAT_COLUMNS_SORTED || 1445 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1446 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1447 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1448 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1449 } else if (op == MAT_ROW_ORIENTED) { 1450 a->roworiented = 1; 1451 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1452 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1453 } else if (op == MAT_ROWS_SORTED || 1454 op == MAT_ROWS_UNSORTED || 1455 op == MAT_SYMMETRIC || 1456 op == MAT_STRUCTURALLY_SYMMETRIC || 1457 op == MAT_YES_NEW_DIAGONALS || 1458 op == MAT_USE_HASH_TABLE) { 1459 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1460 } else if (op == MAT_COLUMN_ORIENTED) { 1461 a->roworiented = 0; 1462 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1463 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1464 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1465 a->donotstash = 1; 1466 } else if (op == MAT_NO_NEW_DIAGONALS) { 1467 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1468 } else if (op == MAT_USE_HASH_TABLE) { 1469 a->ht_flag = 1; 1470 } else { 1471 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1472 } 1473 PetscFunctionReturn(0); 1474 } 1475 1476 #undef __FUNC__ 1477 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1478 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1479 { 1480 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1481 Mat_SeqBAIJ *Aloc; 1482 Mat B; 1483 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1484 int bs=baij->bs,mbs=baij->mbs; 1485 Scalar *a; 1486 1487 PetscFunctionBegin; 1488 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1489 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1490 CHKERRQ(ierr); 1491 1492 /* copy over the A part */ 1493 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1494 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1495 rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1496 1497 for ( i=0; i<mbs; i++ ) { 1498 rvals[0] = bs*(baij->rstart + i); 1499 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1500 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1501 col = (baij->cstart+aj[j])*bs; 1502 for (k=0; k<bs; k++ ) { 1503 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1504 col++; a += bs; 1505 } 1506 } 1507 } 1508 /* copy over the B part */ 1509 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1510 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1511 for ( i=0; i<mbs; i++ ) { 1512 rvals[0] = bs*(baij->rstart + i); 1513 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1514 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1515 col = baij->garray[aj[j]]*bs; 1516 for (k=0; k<bs; k++ ) { 1517 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1518 col++; a += bs; 1519 } 1520 } 1521 } 1522 ierr = PetscFree(rvals);CHKERRQ(ierr); 1523 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1524 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1525 1526 if (matout != PETSC_NULL) { 1527 *matout = B; 1528 } else { 1529 PetscOps *Abops; 1530 MatOps Aops; 1531 1532 /* This isn't really an in-place transpose .... but free data structures from baij */ 1533 ierr = PetscFree(baij->rowners); CHKERRQ(ierr); 1534 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1535 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1536 #if defined (PETSC_USE_CTABLE) 1537 if (baij->colmap) {ierr = TableDelete(baij->colmap);CHKERRQ(ierr);} 1538 #else 1539 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1540 #endif 1541 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1542 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1543 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1544 ierr = PetscFree(baij);CHKERRQ(ierr); 1545 1546 /* 1547 This is horrible, horrible code. We need to keep the 1548 A pointers for the bops and ops but copy everything 1549 else from C. 1550 */ 1551 Abops = A->bops; 1552 Aops = A->ops; 1553 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1554 A->bops = Abops; 1555 A->ops = Aops; 1556 1557 PetscHeaderDestroy(B); 1558 } 1559 PetscFunctionReturn(0); 1560 } 1561 1562 #undef __FUNC__ 1563 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1564 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1565 { 1566 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1567 Mat a = baij->A, b = baij->B; 1568 int ierr,s1,s2,s3; 1569 1570 PetscFunctionBegin; 1571 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1572 if (rr) { 1573 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1574 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1575 /* Overlap communication with computation. */ 1576 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1577 } 1578 if (ll) { 1579 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1580 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1581 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1582 } 1583 /* scale the diagonal block */ 1584 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1585 1586 if (rr) { 1587 /* Do a scatter end and then right scale the off-diagonal block */ 1588 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1589 ierr = (*b->ops->diagonalscale)(b,0,baij->lvec);CHKERRQ(ierr); 1590 } 1591 1592 PetscFunctionReturn(0); 1593 } 1594 1595 #undef __FUNC__ 1596 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1597 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1598 { 1599 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1600 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1601 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1602 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1603 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1604 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1605 MPI_Comm comm = A->comm; 1606 MPI_Request *send_waits,*recv_waits; 1607 MPI_Status recv_status,*send_status; 1608 IS istmp; 1609 1610 PetscFunctionBegin; 1611 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 1612 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1613 1614 /* first count number of contributors to each processor */ 1615 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 1616 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1617 procs = nprocs + size; 1618 owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 1619 for ( i=0; i<N; i++ ) { 1620 idx = rows[i]; 1621 found = 0; 1622 for ( j=0; j<size; j++ ) { 1623 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1624 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1625 } 1626 } 1627 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1628 } 1629 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1630 1631 /* inform other processors of number of messages and max length*/ 1632 work = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work); 1633 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1634 nrecvs = work[rank]; 1635 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 1636 nmax = work[rank]; 1637 ierr = PetscFree(work);CHKERRQ(ierr); 1638 1639 /* post receives: */ 1640 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 1641 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1642 for ( i=0; i<nrecvs; i++ ) { 1643 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1644 } 1645 1646 /* do sends: 1647 1) starts[i] gives the starting index in svalues for stuff going to 1648 the ith processor 1649 */ 1650 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 1651 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1652 starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 1653 starts[0] = 0; 1654 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1655 for ( i=0; i<N; i++ ) { 1656 svalues[starts[owner[i]]++] = rows[i]; 1657 } 1658 ISRestoreIndices(is,&rows); 1659 1660 starts[0] = 0; 1661 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1662 count = 0; 1663 for ( i=0; i<size; i++ ) { 1664 if (procs[i]) { 1665 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1666 } 1667 } 1668 ierr = PetscFree(starts);CHKERRQ(ierr); 1669 1670 base = owners[rank]*bs; 1671 1672 /* wait on receives */ 1673 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 1674 source = lens + nrecvs; 1675 count = nrecvs; slen = 0; 1676 while (count) { 1677 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1678 /* unpack receives into our local space */ 1679 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1680 source[imdex] = recv_status.MPI_SOURCE; 1681 lens[imdex] = n; 1682 slen += n; 1683 count--; 1684 } 1685 ierr = PetscFree(recv_waits); CHKERRQ(ierr); 1686 1687 /* move the data into the send scatter */ 1688 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 1689 count = 0; 1690 for ( i=0; i<nrecvs; i++ ) { 1691 values = rvalues + i*nmax; 1692 for ( j=0; j<lens[i]; j++ ) { 1693 lrows[count++] = values[j] - base; 1694 } 1695 } 1696 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1697 ierr = PetscFree(lens);CHKERRQ(ierr); 1698 ierr = PetscFree(owner);CHKERRQ(ierr); 1699 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1700 1701 /* actually zap the local rows */ 1702 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1703 PLogObjectParent(A,istmp); 1704 1705 /* 1706 Zero the required rows. If the "diagonal block" of the matrix 1707 is square and the user wishes to set the diagonal we use seperate 1708 code so that MatSetValues() is not called for each diagonal allocating 1709 new memory, thus calling lots of mallocs and slowing things down. 1710 1711 Contributed by: Mathew Knepley 1712 */ 1713 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1714 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 1715 if (diag && (l->A->M == l->A->N)) { 1716 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 1717 } else if (diag) { 1718 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 1719 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1720 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1721 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1722 } 1723 for ( i = 0; i < slen; i++ ) { 1724 row = lrows[i] + rstart_bs; 1725 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1726 } 1727 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1728 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1729 } else { 1730 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 1731 } 1732 1733 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1734 ierr = PetscFree(lrows);CHKERRQ(ierr); 1735 1736 /* wait on sends */ 1737 if (nsends) { 1738 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1739 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1740 ierr = PetscFree(send_status);CHKERRQ(ierr); 1741 } 1742 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1743 ierr = PetscFree(svalues);CHKERRQ(ierr); 1744 1745 PetscFunctionReturn(0); 1746 } 1747 1748 #undef __FUNC__ 1749 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1750 int MatPrintHelp_MPIBAIJ(Mat A) 1751 { 1752 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1753 MPI_Comm comm = A->comm; 1754 static int called = 0; 1755 int ierr; 1756 1757 PetscFunctionBegin; 1758 if (!a->rank) { 1759 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1760 } 1761 if (called) {PetscFunctionReturn(0);} else called = 1; 1762 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1763 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1764 PetscFunctionReturn(0); 1765 } 1766 1767 #undef __FUNC__ 1768 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1769 int MatSetUnfactored_MPIBAIJ(Mat A) 1770 { 1771 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1772 int ierr; 1773 1774 PetscFunctionBegin; 1775 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1776 PetscFunctionReturn(0); 1777 } 1778 1779 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1780 1781 #undef __FUNC__ 1782 #define __FUNC__ "MatEqual_MPIBAIJ" 1783 int MatEqual_MPIBAIJ(Mat A, Mat B, PetscTruth *flag) 1784 { 1785 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *) B->data,*matA = (Mat_MPIBAIJ *) A->data; 1786 Mat a, b, c, d; 1787 PetscTruth flg; 1788 int ierr; 1789 1790 PetscFunctionBegin; 1791 if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1792 a = matA->A; b = matA->B; 1793 c = matB->A; d = matB->B; 1794 1795 ierr = MatEqual(a, c, &flg);CHKERRQ(ierr); 1796 if (flg == PETSC_TRUE) { 1797 ierr = MatEqual(b, d, &flg);CHKERRQ(ierr); 1798 } 1799 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1800 PetscFunctionReturn(0); 1801 } 1802 1803 /* -------------------------------------------------------------------*/ 1804 static struct _MatOps MatOps_Values = { 1805 MatSetValues_MPIBAIJ, 1806 MatGetRow_MPIBAIJ, 1807 MatRestoreRow_MPIBAIJ, 1808 MatMult_MPIBAIJ, 1809 MatMultAdd_MPIBAIJ, 1810 MatMultTrans_MPIBAIJ, 1811 MatMultTransAdd_MPIBAIJ, 1812 0, 1813 0, 1814 0, 1815 0, 1816 0, 1817 0, 1818 0, 1819 MatTranspose_MPIBAIJ, 1820 MatGetInfo_MPIBAIJ, 1821 MatEqual_MPIBAIJ, 1822 MatGetDiagonal_MPIBAIJ, 1823 MatDiagonalScale_MPIBAIJ, 1824 MatNorm_MPIBAIJ, 1825 MatAssemblyBegin_MPIBAIJ, 1826 MatAssemblyEnd_MPIBAIJ, 1827 0, 1828 MatSetOption_MPIBAIJ, 1829 MatZeroEntries_MPIBAIJ, 1830 MatZeroRows_MPIBAIJ, 1831 0, 1832 0, 1833 0, 1834 0, 1835 MatGetSize_MPIBAIJ, 1836 MatGetLocalSize_MPIBAIJ, 1837 MatGetOwnershipRange_MPIBAIJ, 1838 0, 1839 0, 1840 0, 1841 0, 1842 MatDuplicate_MPIBAIJ, 1843 0, 1844 0, 1845 0, 1846 0, 1847 0, 1848 MatGetSubMatrices_MPIBAIJ, 1849 MatIncreaseOverlap_MPIBAIJ, 1850 MatGetValues_MPIBAIJ, 1851 0, 1852 MatPrintHelp_MPIBAIJ, 1853 MatScale_MPIBAIJ, 1854 0, 1855 0, 1856 0, 1857 MatGetBlockSize_MPIBAIJ, 1858 0, 1859 0, 1860 0, 1861 0, 1862 0, 1863 0, 1864 MatSetUnfactored_MPIBAIJ, 1865 0, 1866 MatSetValuesBlocked_MPIBAIJ, 1867 0, 1868 0, 1869 0, 1870 MatGetMaps_Petsc}; 1871 1872 1873 EXTERN_C_BEGIN 1874 #undef __FUNC__ 1875 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ" 1876 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1877 { 1878 PetscFunctionBegin; 1879 *a = ((Mat_MPIBAIJ *)A->data)->A; 1880 *iscopy = PETSC_FALSE; 1881 PetscFunctionReturn(0); 1882 } 1883 EXTERN_C_END 1884 1885 #undef __FUNC__ 1886 #define __FUNC__ "MatCreateMPIBAIJ" 1887 /*@C 1888 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1889 (block compressed row). For good matrix assembly performance 1890 the user should preallocate the matrix storage by setting the parameters 1891 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1892 performance can be increased by more than a factor of 50. 1893 1894 Collective on MPI_Comm 1895 1896 Input Parameters: 1897 + comm - MPI communicator 1898 . bs - size of blockk 1899 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1900 This value should be the same as the local size used in creating the 1901 y vector for the matrix-vector product y = Ax. 1902 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1903 This value should be the same as the local size used in creating the 1904 x vector for the matrix-vector product y = Ax. 1905 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1906 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1907 . d_nz - number of block nonzeros per block row in diagonal portion of local 1908 submatrix (same for all local rows) 1909 . d_nnz - array containing the number of block nonzeros in the various block rows 1910 of the in diagonal portion of the local (possibly different for each block 1911 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1912 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1913 submatrix (same for all local rows). 1914 - o_nnz - array containing the number of nonzeros in the various block rows of the 1915 off-diagonal portion of the local submatrix (possibly different for 1916 each block row) or PETSC_NULL. 1917 1918 Output Parameter: 1919 . A - the matrix 1920 1921 Options Database Keys: 1922 . -mat_no_unroll - uses code that does not unroll the loops in the 1923 block calculations (much slower) 1924 . -mat_block_size - size of the blocks to use 1925 . -mat_mpi - use the parallel matrix data structures even on one processor 1926 (defaults to using SeqBAIJ format on one processor) 1927 1928 Notes: 1929 The user MUST specify either the local or global matrix dimensions 1930 (possibly both). 1931 1932 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1933 than it must be used on all processors that share the object for that argument. 1934 1935 Storage Information: 1936 For a square global matrix we define each processor's diagonal portion 1937 to be its local rows and the corresponding columns (a square submatrix); 1938 each processor's off-diagonal portion encompasses the remainder of the 1939 local matrix (a rectangular submatrix). 1940 1941 The user can specify preallocated storage for the diagonal part of 1942 the local submatrix with either d_nz or d_nnz (not both). Set 1943 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1944 memory allocation. Likewise, specify preallocated storage for the 1945 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1946 1947 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1948 the figure below we depict these three local rows and all columns (0-11). 1949 1950 .vb 1951 0 1 2 3 4 5 6 7 8 9 10 11 1952 ------------------- 1953 row 3 | o o o d d d o o o o o o 1954 row 4 | o o o d d d o o o o o o 1955 row 5 | o o o d d d o o o o o o 1956 ------------------- 1957 .ve 1958 1959 Thus, any entries in the d locations are stored in the d (diagonal) 1960 submatrix, and any entries in the o locations are stored in the 1961 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1962 stored simply in the MATSEQBAIJ format for compressed row storage. 1963 1964 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1965 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1966 In general, for PDE problems in which most nonzeros are near the diagonal, 1967 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1968 or you will get TERRIBLE performance; see the users' manual chapter on 1969 matrices. 1970 1971 Level: intermediate 1972 1973 .keywords: matrix, block, aij, compressed row, sparse, parallel 1974 1975 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ() 1976 @*/ 1977 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1978 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1979 { 1980 Mat B; 1981 Mat_MPIBAIJ *b; 1982 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg; 1983 int flag1 = 0,flag2 = 0; 1984 1985 PetscFunctionBegin; 1986 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1987 1988 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1989 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -1: value %d",d_nz); 1990 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -1: value %d",o_nz); 1991 if (d_nnz) { 1992 for (i=0; i<m/bs; i++) { 1993 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1994 } 1995 } 1996 if (o_nnz) { 1997 for (i=0; i<m/bs; i++) { 1998 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1999 } 2000 } 2001 2002 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2003 ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr); 2004 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 2005 if (!flag1 && !flag2 && size == 1) { 2006 if (M == PETSC_DECIDE) M = m; 2007 if (N == PETSC_DECIDE) N = n; 2008 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 2009 PetscFunctionReturn(0); 2010 } 2011 2012 *A = 0; 2013 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 2014 PLogObjectCreate(B); 2015 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b); 2016 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2017 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2018 2019 B->ops->destroy = MatDestroy_MPIBAIJ; 2020 B->ops->view = MatView_MPIBAIJ; 2021 B->mapping = 0; 2022 B->factor = 0; 2023 B->assembled = PETSC_FALSE; 2024 2025 B->insertmode = NOT_SET_VALUES; 2026 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 2027 ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr); 2028 2029 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 2030 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 2031 } 2032 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 2033 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 2034 } 2035 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 2036 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 2037 } 2038 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 2039 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 2040 2041 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 2042 work[0] = m; work[1] = n; 2043 mbs = m/bs; nbs = n/bs; 2044 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 2045 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 2046 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 2047 } 2048 if (m == PETSC_DECIDE) { 2049 Mbs = M/bs; 2050 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 2051 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 2052 m = mbs*bs; 2053 } 2054 if (n == PETSC_DECIDE) { 2055 Nbs = N/bs; 2056 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 2057 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 2058 n = nbs*bs; 2059 } 2060 if (mbs*bs != m || nbs*bs != n) { 2061 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 2062 } 2063 2064 b->m = m; B->m = m; 2065 b->n = n; B->n = n; 2066 b->N = N; B->N = N; 2067 b->M = M; B->M = M; 2068 b->bs = bs; 2069 b->bs2 = bs*bs; 2070 b->mbs = mbs; 2071 b->nbs = nbs; 2072 b->Mbs = Mbs; 2073 b->Nbs = Nbs; 2074 2075 /* the information in the maps duplicates the information computed below, eventually 2076 we should remove the duplicate information that is not contained in the maps */ 2077 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 2078 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 2079 2080 /* build local table of row and column ownerships */ 2081 b->rowners = (int *) PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 2082 PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2083 b->cowners = b->rowners + b->size + 2; 2084 b->rowners_bs = b->cowners + b->size + 2; 2085 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2086 b->rowners[0] = 0; 2087 for ( i=2; i<=b->size; i++ ) { 2088 b->rowners[i] += b->rowners[i-1]; 2089 } 2090 for ( i=0; i<=b->size; i++ ) { 2091 b->rowners_bs[i] = b->rowners[i]*bs; 2092 } 2093 b->rstart = b->rowners[b->rank]; 2094 b->rend = b->rowners[b->rank+1]; 2095 b->rstart_bs = b->rstart * bs; 2096 b->rend_bs = b->rend * bs; 2097 2098 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2099 b->cowners[0] = 0; 2100 for ( i=2; i<=b->size; i++ ) { 2101 b->cowners[i] += b->cowners[i-1]; 2102 } 2103 b->cstart = b->cowners[b->rank]; 2104 b->cend = b->cowners[b->rank+1]; 2105 b->cstart_bs = b->cstart * bs; 2106 b->cend_bs = b->cend * bs; 2107 2108 2109 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2110 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2111 PLogObjectParent(B,b->A); 2112 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2113 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2114 PLogObjectParent(B,b->B); 2115 2116 /* build cache for off array entries formed */ 2117 ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr); 2118 ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr); 2119 b->donotstash = 0; 2120 b->colmap = 0; 2121 b->garray = 0; 2122 b->roworiented = 1; 2123 2124 /* stuff used in block assembly */ 2125 b->barray = 0; 2126 2127 /* stuff used for matrix vector multiply */ 2128 b->lvec = 0; 2129 b->Mvctx = 0; 2130 2131 /* stuff for MatGetRow() */ 2132 b->rowindices = 0; 2133 b->rowvalues = 0; 2134 b->getrowactive = PETSC_FALSE; 2135 2136 /* hash table stuff */ 2137 b->ht = 0; 2138 b->hd = 0; 2139 b->ht_size = 0; 2140 b->ht_flag = 0; 2141 b->ht_fact = 0; 2142 b->ht_total_ct = 0; 2143 b->ht_insert_ct = 0; 2144 2145 *A = B; 2146 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2147 if (flg) { 2148 double fact = 1.39; 2149 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2150 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg);CHKERRQ(ierr); 2151 if (fact <= 1.0) fact = 1.39; 2152 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2153 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2154 } 2155 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 2156 "MatStoreValues_MPIBAIJ", 2157 (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2158 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 2159 "MatRetrieveValues_MPIBAIJ", 2160 (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2161 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 2162 "MatGetDiagonalBlock_MPIBAIJ", 2163 (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2164 PetscFunctionReturn(0); 2165 } 2166 2167 #undef __FUNC__ 2168 #define __FUNC__ "MatDuplicate_MPIBAIJ" 2169 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2170 { 2171 Mat mat; 2172 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 2173 int ierr, len=0, flg; 2174 2175 PetscFunctionBegin; 2176 *newmat = 0; 2177 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 2178 PLogObjectCreate(mat); 2179 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a); 2180 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2181 mat->ops->destroy = MatDestroy_MPIBAIJ; 2182 mat->ops->view = MatView_MPIBAIJ; 2183 mat->factor = matin->factor; 2184 mat->assembled = PETSC_TRUE; 2185 mat->insertmode = NOT_SET_VALUES; 2186 2187 a->m = mat->m = oldmat->m; 2188 a->n = mat->n = oldmat->n; 2189 a->M = mat->M = oldmat->M; 2190 a->N = mat->N = oldmat->N; 2191 2192 a->bs = oldmat->bs; 2193 a->bs2 = oldmat->bs2; 2194 a->mbs = oldmat->mbs; 2195 a->nbs = oldmat->nbs; 2196 a->Mbs = oldmat->Mbs; 2197 a->Nbs = oldmat->Nbs; 2198 2199 a->rstart = oldmat->rstart; 2200 a->rend = oldmat->rend; 2201 a->cstart = oldmat->cstart; 2202 a->cend = oldmat->cend; 2203 a->size = oldmat->size; 2204 a->rank = oldmat->rank; 2205 a->donotstash = oldmat->donotstash; 2206 a->roworiented = oldmat->roworiented; 2207 a->rowindices = 0; 2208 a->rowvalues = 0; 2209 a->getrowactive = PETSC_FALSE; 2210 a->barray = 0; 2211 a->rstart_bs = oldmat->rstart_bs; 2212 a->rend_bs = oldmat->rend_bs; 2213 a->cstart_bs = oldmat->cstart_bs; 2214 a->cend_bs = oldmat->cend_bs; 2215 2216 /* hash table stuff */ 2217 a->ht = 0; 2218 a->hd = 0; 2219 a->ht_size = 0; 2220 a->ht_flag = oldmat->ht_flag; 2221 a->ht_fact = oldmat->ht_fact; 2222 a->ht_total_ct = 0; 2223 a->ht_insert_ct = 0; 2224 2225 2226 a->rowners = (int *) PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 2227 PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2228 a->cowners = a->rowners + a->size + 2; 2229 a->rowners_bs = a->cowners + a->size + 2; 2230 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2231 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2232 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2233 if (oldmat->colmap) { 2234 #if defined (PETSC_USE_CTABLE) 2235 ierr = TableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2236 #else 2237 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2238 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2239 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2240 #endif 2241 } else a->colmap = 0; 2242 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 2243 a->garray = (int *) PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray); 2244 PLogObjectMemory(mat,len*sizeof(int)); 2245 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2246 } else a->garray = 0; 2247 2248 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2249 PLogObjectParent(mat,a->lvec); 2250 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2251 2252 PLogObjectParent(mat,a->Mvctx); 2253 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2254 PLogObjectParent(mat,a->A); 2255 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2256 PLogObjectParent(mat,a->B); 2257 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 2258 ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2259 if (flg) { 2260 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 2261 } 2262 *newmat = mat; 2263 PetscFunctionReturn(0); 2264 } 2265 2266 #include "sys.h" 2267 2268 #undef __FUNC__ 2269 #define __FUNC__ "MatLoad_MPIBAIJ" 2270 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2271 { 2272 Mat A; 2273 int i, nz, ierr, j,rstart, rend, fd; 2274 Scalar *vals,*buf; 2275 MPI_Comm comm = ((PetscObject)viewer)->comm; 2276 MPI_Status status; 2277 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2278 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 2279 int flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2280 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2281 int dcount,kmax,k,nzcount,tmp; 2282 2283 PetscFunctionBegin; 2284 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2285 2286 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2287 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2288 if (!rank) { 2289 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2290 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2291 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2292 if (header[3] < 0) { 2293 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2294 } 2295 } 2296 2297 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2298 M = header[1]; N = header[2]; 2299 2300 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2301 2302 /* 2303 This code adds extra rows to make sure the number of rows is 2304 divisible by the blocksize 2305 */ 2306 Mbs = M/bs; 2307 extra_rows = bs - M + bs*(Mbs); 2308 if (extra_rows == bs) extra_rows = 0; 2309 else Mbs++; 2310 if (extra_rows &&!rank) { 2311 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2312 } 2313 2314 /* determine ownership of all rows */ 2315 mbs = Mbs/size + ((Mbs % size) > rank); 2316 m = mbs * bs; 2317 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners); 2318 browners = rowners + size + 1; 2319 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2320 rowners[0] = 0; 2321 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2322 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2323 rstart = rowners[rank]; 2324 rend = rowners[rank+1]; 2325 2326 /* distribute row lengths to all processors */ 2327 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) );CHKPTRQ(locrowlens); 2328 if (!rank) { 2329 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) );CHKPTRQ(rowlengths); 2330 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2331 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2332 sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 2333 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2334 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2335 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2336 } else { 2337 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2338 } 2339 2340 if (!rank) { 2341 /* calculate the number of nonzeros on each processor */ 2342 procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 2343 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2344 for ( i=0; i<size; i++ ) { 2345 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2346 procsnz[i] += rowlengths[j]; 2347 } 2348 } 2349 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2350 2351 /* determine max buffer needed and allocate it */ 2352 maxnz = 0; 2353 for ( i=0; i<size; i++ ) { 2354 maxnz = PetscMax(maxnz,procsnz[i]); 2355 } 2356 cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 2357 2358 /* read in my part of the matrix column indices */ 2359 nz = procsnz[0]; 2360 ibuf = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf); 2361 mycols = ibuf; 2362 if (size == 1) nz -= extra_rows; 2363 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2364 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2365 2366 /* read in every ones (except the last) and ship off */ 2367 for ( i=1; i<size-1; i++ ) { 2368 nz = procsnz[i]; 2369 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2370 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2371 } 2372 /* read in the stuff for the last proc */ 2373 if ( size != 1 ) { 2374 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2375 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2376 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2377 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2378 } 2379 ierr = PetscFree(cols);CHKERRQ(ierr); 2380 } else { 2381 /* determine buffer space needed for message */ 2382 nz = 0; 2383 for ( i=0; i<m; i++ ) { 2384 nz += locrowlens[i]; 2385 } 2386 ibuf = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf); 2387 mycols = ibuf; 2388 /* receive message of column indices*/ 2389 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2390 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2391 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2392 } 2393 2394 /* loop over local rows, determining number of off diagonal entries */ 2395 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) );CHKPTRQ(dlens); 2396 odlens = dlens + (rend-rstart); 2397 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) );CHKPTRQ(mask); 2398 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2399 masked1 = mask + Mbs; 2400 masked2 = masked1 + Mbs; 2401 rowcount = 0; nzcount = 0; 2402 for ( i=0; i<mbs; i++ ) { 2403 dcount = 0; 2404 odcount = 0; 2405 for ( j=0; j<bs; j++ ) { 2406 kmax = locrowlens[rowcount]; 2407 for ( k=0; k<kmax; k++ ) { 2408 tmp = mycols[nzcount++]/bs; 2409 if (!mask[tmp]) { 2410 mask[tmp] = 1; 2411 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2412 else masked1[dcount++] = tmp; 2413 } 2414 } 2415 rowcount++; 2416 } 2417 2418 dlens[i] = dcount; 2419 odlens[i] = odcount; 2420 2421 /* zero out the mask elements we set */ 2422 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2423 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2424 } 2425 2426 /* create our matrix */ 2427 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 2428 A = *newmat; 2429 MatSetOption(A,MAT_COLUMNS_SORTED); 2430 2431 if (!rank) { 2432 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(buf); 2433 /* read in my part of the matrix numerical values */ 2434 nz = procsnz[0]; 2435 vals = buf; 2436 mycols = ibuf; 2437 if (size == 1) nz -= extra_rows; 2438 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2439 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2440 2441 /* insert into matrix */ 2442 jj = rstart*bs; 2443 for ( i=0; i<m; i++ ) { 2444 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2445 mycols += locrowlens[i]; 2446 vals += locrowlens[i]; 2447 jj++; 2448 } 2449 /* read in other processors (except the last one) and ship out */ 2450 for ( i=1; i<size-1; i++ ) { 2451 nz = procsnz[i]; 2452 vals = buf; 2453 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2454 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2455 } 2456 /* the last proc */ 2457 if ( size != 1 ){ 2458 nz = procsnz[i] - extra_rows; 2459 vals = buf; 2460 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2461 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2462 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2463 } 2464 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2465 } else { 2466 /* receive numeric values */ 2467 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(buf); 2468 2469 /* receive message of values*/ 2470 vals = buf; 2471 mycols = ibuf; 2472 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2473 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2474 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2475 2476 /* insert into matrix */ 2477 jj = rstart*bs; 2478 for ( i=0; i<m; i++ ) { 2479 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2480 mycols += locrowlens[i]; 2481 vals += locrowlens[i]; 2482 jj++; 2483 } 2484 } 2485 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2486 ierr = PetscFree(buf);CHKERRQ(ierr); 2487 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2488 ierr = PetscFree(rowners);CHKERRQ(ierr); 2489 ierr = PetscFree(dlens);CHKERRQ(ierr); 2490 ierr = PetscFree(mask);CHKERRQ(ierr); 2491 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2492 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2493 PetscFunctionReturn(0); 2494 } 2495 2496 2497 2498 #undef __FUNC__ 2499 #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2500 /*@ 2501 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2502 2503 Input Parameters: 2504 . mat - the matrix 2505 . fact - factor 2506 2507 Collective on Mat 2508 2509 Level: advanced 2510 2511 Notes: 2512 This can also be set by the command line option: -mat_use_hash_table fact 2513 2514 .keywords: matrix, hashtable, factor, HT 2515 2516 .seealso: MatSetOption() 2517 @*/ 2518 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact) 2519 { 2520 Mat_MPIBAIJ *baij; 2521 2522 PetscFunctionBegin; 2523 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2524 if (mat->type != MATMPIBAIJ) { 2525 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2526 } 2527 baij = (Mat_MPIBAIJ*) mat->data; 2528 baij->ht_fact = fact; 2529 PetscFunctionReturn(0); 2530 } 2531