1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpibaij.c,v 1.178 1999/09/21 14:41:06 bsmith Exp bsmith $"; 3 #endif 4 5 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "mat.h" I*/ 6 #include "src/vec/vecimpl.h" 7 8 extern int MatSetUpMultiply_MPIBAIJ(Mat); 9 extern int DisAssemble_MPIBAIJ(Mat); 10 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 11 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 12 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 13 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 15 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 16 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 17 extern int MatPrintHelp_SeqBAIJ(Mat); 18 19 EXTERN_C_BEGIN 20 #undef __FUNC__ 21 #define __FUNC__ "MatStoreValues_MPIBAIJ" 22 int MatStoreValues_MPIBAIJ(Mat mat) 23 { 24 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 25 int ierr; 26 27 PetscFunctionBegin; 28 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 29 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 30 PetscFunctionReturn(0); 31 } 32 EXTERN_C_END 33 34 EXTERN_C_BEGIN 35 #undef __FUNC__ 36 #define __FUNC__ "MatRetrieveValues_MPIBAIJ" 37 int MatRetrieveValues_MPIBAIJ(Mat mat) 38 { 39 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 40 int ierr; 41 42 PetscFunctionBegin; 43 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 44 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 45 PetscFunctionReturn(0); 46 } 47 EXTERN_C_END 48 49 /* 50 Local utility routine that creates a mapping from the global column 51 number to the local number in the off-diagonal part of the local 52 storage of the matrix. This is done in a non scable way since the 53 length of colmap equals the global matrix length. 54 */ 55 #undef __FUNC__ 56 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 57 static int CreateColmap_MPIBAIJ_Private(Mat mat) 58 { 59 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 60 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 61 int nbs = B->nbs,i,bs=B->bs,ierr; 62 63 PetscFunctionBegin; 64 #if defined (PETSC_USE_CTABLE) 65 ierr = TableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr); 66 for ( i=0; i<nbs; i++ ){ 67 ierr = TableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 68 } 69 #else 70 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 71 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 72 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 73 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 74 #endif 75 PetscFunctionReturn(0); 76 } 77 78 #define CHUNKSIZE 10 79 80 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 81 { \ 82 \ 83 brow = row/bs; \ 84 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 85 rmax = aimax[brow]; nrow = ailen[brow]; \ 86 bcol = col/bs; \ 87 ridx = row % bs; cidx = col % bs; \ 88 low = 0; high = nrow; \ 89 while (high-low > 3) { \ 90 t = (low+high)/2; \ 91 if (rp[t] > bcol) high = t; \ 92 else low = t; \ 93 } \ 94 for ( _i=low; _i<high; _i++ ) { \ 95 if (rp[_i] > bcol) break; \ 96 if (rp[_i] == bcol) { \ 97 bap = ap + bs2*_i + bs*cidx + ridx; \ 98 if (addv == ADD_VALUES) *bap += value; \ 99 else *bap = value; \ 100 goto a_noinsert; \ 101 } \ 102 } \ 103 if (a->nonew == 1) goto a_noinsert; \ 104 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 105 if (nrow >= rmax) { \ 106 /* there is no extra room in row, therefore enlarge */ \ 107 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 108 Scalar *new_a; \ 109 \ 110 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 111 \ 112 /* malloc new storage space */ \ 113 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 114 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 115 new_j = (int *) (new_a + bs2*new_nz); \ 116 new_i = new_j + new_nz; \ 117 \ 118 /* copy over old data into new slots */ \ 119 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 120 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 121 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 122 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 123 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 124 len*sizeof(int));CHKERRQ(ierr); \ 125 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \ 126 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 127 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 128 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr); \ 129 /* free up old matrix storage */ \ 130 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 131 if (!a->singlemalloc) { \ 132 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 133 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 134 } \ 135 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 136 a->singlemalloc = 1; \ 137 \ 138 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 139 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 140 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 141 a->maxnz += bs2*CHUNKSIZE; \ 142 a->reallocs++; \ 143 a->nz++; \ 144 } \ 145 N = nrow++ - 1; \ 146 /* shift up all the later entries in this row */ \ 147 for ( ii=N; ii>=_i; ii-- ) { \ 148 rp[ii+1] = rp[ii]; \ 149 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \ 150 } \ 151 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr); } \ 152 rp[_i] = bcol; \ 153 ap[bs2*_i + bs*cidx + ridx] = value; \ 154 a_noinsert:; \ 155 ailen[brow] = nrow; \ 156 } 157 158 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 159 { \ 160 \ 161 brow = row/bs; \ 162 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 163 rmax = bimax[brow]; nrow = bilen[brow]; \ 164 bcol = col/bs; \ 165 ridx = row % bs; cidx = col % bs; \ 166 low = 0; high = nrow; \ 167 while (high-low > 3) { \ 168 t = (low+high)/2; \ 169 if (rp[t] > bcol) high = t; \ 170 else low = t; \ 171 } \ 172 for ( _i=low; _i<high; _i++ ) { \ 173 if (rp[_i] > bcol) break; \ 174 if (rp[_i] == bcol) { \ 175 bap = ap + bs2*_i + bs*cidx + ridx; \ 176 if (addv == ADD_VALUES) *bap += value; \ 177 else *bap = value; \ 178 goto b_noinsert; \ 179 } \ 180 } \ 181 if (b->nonew == 1) goto b_noinsert; \ 182 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 183 if (nrow >= rmax) { \ 184 /* there is no extra room in row, therefore enlarge */ \ 185 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 186 Scalar *new_a; \ 187 \ 188 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 189 \ 190 /* malloc new storage space */ \ 191 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 192 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 193 new_j = (int *) (new_a + bs2*new_nz); \ 194 new_i = new_j + new_nz; \ 195 \ 196 /* copy over old data into new slots */ \ 197 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 198 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 199 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 200 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 201 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 202 len*sizeof(int));CHKERRQ(ierr); \ 203 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \ 204 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 205 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 206 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr); \ 207 /* free up old matrix storage */ \ 208 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 209 if (!b->singlemalloc) { \ 210 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 211 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 212 } \ 213 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 214 b->singlemalloc = 1; \ 215 \ 216 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 217 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 218 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 219 b->maxnz += bs2*CHUNKSIZE; \ 220 b->reallocs++; \ 221 b->nz++; \ 222 } \ 223 N = nrow++ - 1; \ 224 /* shift up all the later entries in this row */ \ 225 for ( ii=N; ii>=_i; ii-- ) { \ 226 rp[ii+1] = rp[ii]; \ 227 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \ 228 } \ 229 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr);} \ 230 rp[_i] = bcol; \ 231 ap[bs2*_i + bs*cidx + ridx] = value; \ 232 b_noinsert:; \ 233 bilen[brow] = nrow; \ 234 } 235 236 #undef __FUNC__ 237 #define __FUNC__ "MatSetValues_MPIBAIJ" 238 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 239 { 240 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 241 Scalar value; 242 int ierr,i,j,row,col; 243 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 244 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 245 int cend_orig=baij->cend_bs,bs=baij->bs; 246 247 /* Some Variables required in the macro */ 248 Mat A = baij->A; 249 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 250 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 251 Scalar *aa=a->a; 252 253 Mat B = baij->B; 254 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 255 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 256 Scalar *ba=b->a; 257 258 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 259 int low,high,t,ridx,cidx,bs2=a->bs2; 260 Scalar *ap,*bap; 261 262 PetscFunctionBegin; 263 for ( i=0; i<m; i++ ) { 264 if (im[i] < 0) continue; 265 #if defined(PETSC_USE_BOPT_g) 266 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 267 #endif 268 if (im[i] >= rstart_orig && im[i] < rend_orig) { 269 row = im[i] - rstart_orig; 270 for ( j=0; j<n; j++ ) { 271 if (in[j] >= cstart_orig && in[j] < cend_orig){ 272 col = in[j] - cstart_orig; 273 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 274 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 275 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 276 } else if (in[j] < 0) continue; 277 #if defined(PETSC_USE_BOPT_g) 278 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 279 #endif 280 else { 281 if (mat->was_assembled) { 282 if (!baij->colmap) { 283 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 284 } 285 #if defined (PETSC_USE_CTABLE) 286 ierr = TableFind(baij->colmap, in[j]/bs + 1,&col);CHKERRQ(ierr); 287 col = col - 1 + in[j]%bs; 288 #else 289 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 290 #endif 291 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 292 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 293 col = in[j]; 294 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 295 B = baij->B; 296 b = (Mat_SeqBAIJ *) (B)->data; 297 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 298 ba=b->a; 299 } 300 } else col = in[j]; 301 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 302 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 303 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 304 } 305 } 306 } else { 307 if ( !baij->donotstash) { 308 if (roworiented) { 309 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 310 } else { 311 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 312 } 313 } 314 } 315 } 316 PetscFunctionReturn(0); 317 } 318 319 #undef __FUNC__ 320 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 321 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 322 { 323 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 324 Scalar *value,*barray=baij->barray; 325 int ierr,i,j,ii,jj,row,col; 326 int roworiented = baij->roworiented,rstart=baij->rstart ; 327 int rend=baij->rend,cstart=baij->cstart,stepval; 328 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 329 330 PetscFunctionBegin; 331 if(!barray) { 332 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar));CHKPTRQ(barray); 333 } 334 335 if (roworiented) { 336 stepval = (n-1)*bs; 337 } else { 338 stepval = (m-1)*bs; 339 } 340 for ( i=0; i<m; i++ ) { 341 if (im[i] < 0) continue; 342 #if defined(PETSC_USE_BOPT_g) 343 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 344 #endif 345 if (im[i] >= rstart && im[i] < rend) { 346 row = im[i] - rstart; 347 for ( j=0; j<n; j++ ) { 348 /* If NumCol = 1 then a copy is not required */ 349 if ((roworiented) && (n == 1)) { 350 barray = v + i*bs2; 351 } else if((!roworiented) && (m == 1)) { 352 barray = v + j*bs2; 353 } else { /* Here a copy is required */ 354 if (roworiented) { 355 value = v + i*(stepval+bs)*bs + j*bs; 356 } else { 357 value = v + j*(stepval+bs)*bs + i*bs; 358 } 359 for ( ii=0; ii<bs; ii++,value+=stepval ) { 360 for (jj=0; jj<bs; jj++ ) { 361 *barray++ = *value++; 362 } 363 } 364 barray -=bs2; 365 } 366 367 if (in[j] >= cstart && in[j] < cend){ 368 col = in[j] - cstart; 369 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 370 } 371 else if (in[j] < 0) continue; 372 #if defined(PETSC_USE_BOPT_g) 373 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 374 #endif 375 else { 376 if (mat->was_assembled) { 377 if (!baij->colmap) { 378 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 379 } 380 381 #if defined(PETSC_USE_BOPT_g) 382 #if defined (PETSC_USE_CTABLE) 383 { int data; 384 ierr = TableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 385 if((data - 1) % bs) 386 {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 387 } 388 #else 389 if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 390 #endif 391 #endif 392 #if defined (PETSC_USE_CTABLE) 393 ierr = TableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 394 col = (col - 1)/bs; 395 #else 396 col = (baij->colmap[in[j]] - 1)/bs; 397 #endif 398 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 399 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 400 col = in[j]; 401 } 402 } 403 else col = in[j]; 404 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 405 } 406 } 407 } else { 408 if (!baij->donotstash) { 409 if (roworiented) { 410 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 411 } else { 412 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 413 } 414 } 415 } 416 } 417 PetscFunctionReturn(0); 418 } 419 #define HASH_KEY 0.6180339887 420 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 421 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 422 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 423 #undef __FUNC__ 424 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 425 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 426 { 427 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 428 int ierr,i,j,row,col; 429 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 430 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 431 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 432 double tmp; 433 Scalar ** HD = baij->hd,value; 434 #if defined(PETSC_USE_BOPT_g) 435 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 436 #endif 437 438 PetscFunctionBegin; 439 440 for ( i=0; i<m; i++ ) { 441 #if defined(PETSC_USE_BOPT_g) 442 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 443 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 444 #endif 445 row = im[i]; 446 if (row >= rstart_orig && row < rend_orig) { 447 for ( j=0; j<n; j++ ) { 448 col = in[j]; 449 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 450 /* Look up into the Hash Table */ 451 key = (row/bs)*Nbs+(col/bs)+1; 452 h1 = HASH(size,key,tmp); 453 454 455 idx = h1; 456 #if defined(PETSC_USE_BOPT_g) 457 insert_ct++; 458 total_ct++; 459 if (HT[idx] != key) { 460 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 461 if (idx == size) { 462 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 463 if (idx == h1) { 464 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 465 } 466 } 467 } 468 #else 469 if (HT[idx] != key) { 470 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 471 if (idx == size) { 472 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 473 if (idx == h1) { 474 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 475 } 476 } 477 } 478 #endif 479 /* A HASH table entry is found, so insert the values at the correct address */ 480 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 481 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 482 } 483 } else { 484 if (!baij->donotstash) { 485 if (roworiented) { 486 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 487 } else { 488 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 489 } 490 } 491 } 492 } 493 #if defined(PETSC_USE_BOPT_g) 494 baij->ht_total_ct = total_ct; 495 baij->ht_insert_ct = insert_ct; 496 #endif 497 PetscFunctionReturn(0); 498 } 499 500 #undef __FUNC__ 501 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 502 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 503 { 504 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 505 int ierr,i,j,ii,jj,row,col; 506 int roworiented = baij->roworiented,rstart=baij->rstart ; 507 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 508 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 509 double tmp; 510 Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 511 #if defined(PETSC_USE_BOPT_g) 512 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 513 #endif 514 515 PetscFunctionBegin; 516 517 if (roworiented) { 518 stepval = (n-1)*bs; 519 } else { 520 stepval = (m-1)*bs; 521 } 522 for ( i=0; i<m; i++ ) { 523 #if defined(PETSC_USE_BOPT_g) 524 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 525 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 526 #endif 527 row = im[i]; 528 v_t = v + i*bs2; 529 if (row >= rstart && row < rend) { 530 for ( j=0; j<n; j++ ) { 531 col = in[j]; 532 533 /* Look up into the Hash Table */ 534 key = row*Nbs+col+1; 535 h1 = HASH(size,key,tmp); 536 537 idx = h1; 538 #if defined(PETSC_USE_BOPT_g) 539 total_ct++; 540 insert_ct++; 541 if (HT[idx] != key) { 542 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 543 if (idx == size) { 544 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 545 if (idx == h1) { 546 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 547 } 548 } 549 } 550 #else 551 if (HT[idx] != key) { 552 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 553 if (idx == size) { 554 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 555 if (idx == h1) { 556 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 557 } 558 } 559 } 560 #endif 561 baij_a = HD[idx]; 562 if (roworiented) { 563 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 564 /* value = v + (i*(stepval+bs)+j)*bs; */ 565 value = v_t; 566 v_t += bs; 567 if (addv == ADD_VALUES) { 568 for ( ii=0; ii<bs; ii++,value+=stepval) { 569 for ( jj=ii; jj<bs2; jj+=bs ) { 570 baij_a[jj] += *value++; 571 } 572 } 573 } else { 574 for ( ii=0; ii<bs; ii++,value+=stepval) { 575 for ( jj=ii; jj<bs2; jj+=bs ) { 576 baij_a[jj] = *value++; 577 } 578 } 579 } 580 } else { 581 value = v + j*(stepval+bs)*bs + i*bs; 582 if (addv == ADD_VALUES) { 583 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 584 for ( jj=0; jj<bs; jj++ ) { 585 baij_a[jj] += *value++; 586 } 587 } 588 } else { 589 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 590 for ( jj=0; jj<bs; jj++ ) { 591 baij_a[jj] = *value++; 592 } 593 } 594 } 595 } 596 } 597 } else { 598 if (!baij->donotstash) { 599 if (roworiented) { 600 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 601 } else { 602 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 603 } 604 } 605 } 606 } 607 #if defined(PETSC_USE_BOPT_g) 608 baij->ht_total_ct = total_ct; 609 baij->ht_insert_ct = insert_ct; 610 #endif 611 PetscFunctionReturn(0); 612 } 613 614 #undef __FUNC__ 615 #define __FUNC__ "MatGetValues_MPIBAIJ" 616 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 617 { 618 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 619 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 620 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data; 621 622 PetscFunctionBegin; 623 for ( i=0; i<m; i++ ) { 624 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 625 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 626 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 627 row = idxm[i] - bsrstart; 628 for ( j=0; j<n; j++ ) { 629 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 630 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 631 if (idxn[j] >= bscstart && idxn[j] < bscend){ 632 col = idxn[j] - bscstart; 633 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 634 } else { 635 if (!baij->colmap) { 636 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 637 } 638 #if defined (PETSC_USE_CTABLE) 639 ierr = TableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 640 data --; 641 #else 642 data = baij->colmap[idxn[j]/bs]-1; 643 #endif 644 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 645 else { 646 col = data + idxn[j]%bs; 647 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 648 } 649 } 650 } 651 } else { 652 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 653 } 654 } 655 PetscFunctionReturn(0); 656 } 657 658 #undef __FUNC__ 659 #define __FUNC__ "MatNorm_MPIBAIJ" 660 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 661 { 662 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 663 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 664 int ierr, i,bs2=baij->bs2; 665 double sum = 0.0; 666 Scalar *v; 667 668 PetscFunctionBegin; 669 if (baij->size == 1) { 670 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 671 } else { 672 if (type == NORM_FROBENIUS) { 673 v = amat->a; 674 for (i=0; i<amat->nz*bs2; i++ ) { 675 #if defined(PETSC_USE_COMPLEX) 676 sum += PetscReal(PetscConj(*v)*(*v)); v++; 677 #else 678 sum += (*v)*(*v); v++; 679 #endif 680 } 681 v = bmat->a; 682 for (i=0; i<bmat->nz*bs2; i++ ) { 683 #if defined(PETSC_USE_COMPLEX) 684 sum += PetscReal(PetscConj(*v)*(*v)); v++; 685 #else 686 sum += (*v)*(*v); v++; 687 #endif 688 } 689 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 690 *norm = sqrt(*norm); 691 } else { 692 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 693 } 694 } 695 PetscFunctionReturn(0); 696 } 697 698 699 /* 700 Creates the hash table, and sets the table 701 This table is created only once. 702 If new entried need to be added to the matrix 703 then the hash table has to be destroyed and 704 recreated. 705 */ 706 #undef __FUNC__ 707 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 708 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 709 { 710 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 711 Mat A = baij->A, B=baij->B; 712 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 713 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 714 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 715 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 716 int *HT,key; 717 Scalar **HD; 718 double tmp; 719 #if defined(PETSC_USE_BOPT_g) 720 int ct=0,max=0; 721 #endif 722 723 PetscFunctionBegin; 724 baij->ht_size=(int)(factor*nz); 725 size = baij->ht_size; 726 727 if (baij->ht) { 728 PetscFunctionReturn(0); 729 } 730 731 /* Allocate Memory for Hash Table */ 732 baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1);CHKPTRQ(baij->hd); 733 baij->ht = (int*)(baij->hd + size); 734 HD = baij->hd; 735 HT = baij->ht; 736 737 738 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr); 739 740 741 /* Loop Over A */ 742 for ( i=0; i<a->mbs; i++ ) { 743 for ( j=ai[i]; j<ai[i+1]; j++ ) { 744 row = i+rstart; 745 col = aj[j]+cstart; 746 747 key = row*Nbs + col + 1; 748 h1 = HASH(size,key,tmp); 749 for ( k=0; k<size; k++ ){ 750 if (HT[(h1+k)%size] == 0.0) { 751 HT[(h1+k)%size] = key; 752 HD[(h1+k)%size] = a->a + j*bs2; 753 break; 754 #if defined(PETSC_USE_BOPT_g) 755 } else { 756 ct++; 757 #endif 758 } 759 } 760 #if defined(PETSC_USE_BOPT_g) 761 if (k> max) max = k; 762 #endif 763 } 764 } 765 /* Loop Over B */ 766 for ( i=0; i<b->mbs; i++ ) { 767 for ( j=bi[i]; j<bi[i+1]; j++ ) { 768 row = i+rstart; 769 col = garray[bj[j]]; 770 key = row*Nbs + col + 1; 771 h1 = HASH(size,key,tmp); 772 for ( k=0; k<size; k++ ){ 773 if (HT[(h1+k)%size] == 0.0) { 774 HT[(h1+k)%size] = key; 775 HD[(h1+k)%size] = b->a + j*bs2; 776 break; 777 #if defined(PETSC_USE_BOPT_g) 778 } else { 779 ct++; 780 #endif 781 } 782 } 783 #if defined(PETSC_USE_BOPT_g) 784 if (k> max) max = k; 785 #endif 786 } 787 } 788 789 /* Print Summary */ 790 #if defined(PETSC_USE_BOPT_g) 791 for ( i=0,j=0; i<size; i++) 792 if (HT[i]) {j++;} 793 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n", 794 (j== 0)? 0.0:((double)(ct+j))/j,max); 795 #endif 796 PetscFunctionReturn(0); 797 } 798 799 #undef __FUNC__ 800 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 801 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 802 { 803 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 804 int ierr,nstash,reallocs; 805 InsertMode addv; 806 807 PetscFunctionBegin; 808 if (baij->donotstash) { 809 PetscFunctionReturn(0); 810 } 811 812 /* make sure all processors are either in INSERTMODE or ADDMODE */ 813 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 814 if (addv == (ADD_VALUES|INSERT_VALUES)) { 815 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 816 } 817 mat->insertmode = addv; /* in case this processor had no cache */ 818 819 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 820 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 821 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 822 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 823 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 824 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 825 PetscFunctionReturn(0); 826 } 827 828 #undef __FUNC__ 829 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 830 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 831 { 832 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data; 833 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 834 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 835 int *row,*col,other_disassembled,r1,r2,r3; 836 Scalar *val; 837 InsertMode addv = mat->insertmode; 838 839 PetscFunctionBegin; 840 if (!baij->donotstash) { 841 while (1) { 842 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 843 if (!flg) break; 844 845 for ( i=0; i<n; ) { 846 /* Now identify the consecutive vals belonging to the same row */ 847 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 848 if (j < n) ncols = j-i; 849 else ncols = n-i; 850 /* Now assemble all these values with a single function call */ 851 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 852 i = j; 853 } 854 } 855 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 856 /* Now process the block-stash. Since the values are stashed column-oriented, 857 set the roworiented flag to column oriented, and after MatSetValues() 858 restore the original flags */ 859 r1 = baij->roworiented; 860 r2 = a->roworiented; 861 r3 = b->roworiented; 862 baij->roworiented = 0; 863 a->roworiented = 0; 864 b->roworiented = 0; 865 while (1) { 866 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 867 if (!flg) break; 868 869 for ( i=0; i<n; ) { 870 /* Now identify the consecutive vals belonging to the same row */ 871 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 872 if (j < n) ncols = j-i; 873 else ncols = n-i; 874 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 875 i = j; 876 } 877 } 878 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 879 baij->roworiented = r1; 880 a->roworiented = r2; 881 b->roworiented = r3; 882 } 883 884 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 885 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 886 887 /* determine if any processor has disassembled, if so we must 888 also disassemble ourselfs, in order that we may reassemble. */ 889 /* 890 if nonzero structure of submatrix B cannot change then we know that 891 no processor disassembled thus we can skip this stuff 892 */ 893 if (!((Mat_SeqBAIJ*) baij->B->data)->nonew) { 894 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 895 if (mat->was_assembled && !other_disassembled) { 896 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 897 } 898 } 899 900 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 901 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 902 } 903 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 904 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 905 906 #if defined(PETSC_USE_BOPT_g) 907 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 908 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 909 ((double)baij->ht_total_ct)/baij->ht_insert_ct); 910 baij->ht_total_ct = 0; 911 baij->ht_insert_ct = 0; 912 } 913 #endif 914 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 915 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 916 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 917 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 918 } 919 920 if (baij->rowvalues) { 921 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 922 baij->rowvalues = 0; 923 } 924 PetscFunctionReturn(0); 925 } 926 927 /* 928 #undef __FUNC__ 929 #define __FUNC__ "MatView_MPIBAIJ_Binary" 930 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 931 { 932 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 933 int ierr; 934 935 PetscFunctionBegin; 936 if (baij->size == 1) { 937 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 938 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 939 PetscFunctionReturn(0); 940 } 941 */ 942 943 #undef __FUNC__ 944 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 945 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 946 { 947 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 948 int ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank; 949 FILE *fd; 950 951 PetscFunctionBegin; 952 if (PetscTypeCompare(viewer,ASCII_VIEWER)) { 953 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 954 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 955 MatInfo info; 956 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 957 ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr); 958 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 959 ierr = PetscSequentialPhaseBegin(mat->comm,1);CHKERRQ(ierr); 960 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 961 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 962 baij->bs,(int)info.memory); 963 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 964 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 965 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 966 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 967 fflush(fd); 968 ierr = PetscSequentialPhaseEnd(mat->comm,1);CHKERRQ(ierr); 969 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 970 PetscFunctionReturn(0); 971 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 972 ierr = PetscPrintf(mat->comm," block size is %d\n",bs);CHKERRQ(ierr); 973 PetscFunctionReturn(0); 974 } 975 } 976 977 if (PetscTypeCompare(viewer,DRAW_VIEWER)) { 978 Draw draw; 979 PetscTruth isnull; 980 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 981 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 982 } 983 984 if (size == 1) { 985 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 986 } else { 987 /* assemble the entire matrix onto first processor. */ 988 Mat A; 989 Mat_SeqBAIJ *Aloc; 990 int M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals; 991 int mbs = baij->mbs; 992 Scalar *a; 993 994 if (!rank) { 995 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 996 } else { 997 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 998 } 999 PLogObjectParent(mat,A); 1000 1001 /* copy over the A part */ 1002 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1003 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1004 rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1005 1006 for ( i=0; i<mbs; i++ ) { 1007 rvals[0] = bs*(baij->rstart + i); 1008 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1009 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1010 col = (baij->cstart+aj[j])*bs; 1011 for (k=0; k<bs; k++ ) { 1012 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1013 col++; a += bs; 1014 } 1015 } 1016 } 1017 /* copy over the B part */ 1018 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1019 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1020 for ( i=0; i<mbs; i++ ) { 1021 rvals[0] = bs*(baij->rstart + i); 1022 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1023 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1024 col = baij->garray[aj[j]]*bs; 1025 for (k=0; k<bs; k++ ) { 1026 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1027 col++; a += bs; 1028 } 1029 } 1030 } 1031 ierr = PetscFree(rvals);CHKERRQ(ierr); 1032 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1033 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1034 /* 1035 Everyone has to call to draw the matrix since the graphics waits are 1036 synchronized across all processors that share the Draw object 1037 */ 1038 if (!rank || PetscTypeCompare(viewer,DRAW_VIEWER)) { 1039 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer);CHKERRQ(ierr); 1040 } 1041 ierr = MatDestroy(A);CHKERRQ(ierr); 1042 } 1043 PetscFunctionReturn(0); 1044 } 1045 1046 1047 1048 #undef __FUNC__ 1049 #define __FUNC__ "MatView_MPIBAIJ" 1050 int MatView_MPIBAIJ(Mat mat,Viewer viewer) 1051 { 1052 int ierr; 1053 1054 PetscFunctionBegin; 1055 if (PetscTypeCompare(viewer,ASCII_VIEWER) || PetscTypeCompare(viewer,DRAW_VIEWER) || 1056 PetscTypeCompare(viewer,SOCKET_VIEWER)|| PetscTypeCompare(viewer,BINARY_VIEWER)) { 1057 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1058 /* 1059 } else if (PetscTypeCompare(viewer,BINARY_VIEWER)) { 1060 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1061 */ 1062 } else { 1063 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 1064 } 1065 PetscFunctionReturn(0); 1066 } 1067 1068 #undef __FUNC__ 1069 #define __FUNC__ "MatDestroy_MPIBAIJ" 1070 int MatDestroy_MPIBAIJ(Mat mat) 1071 { 1072 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1073 int ierr; 1074 1075 PetscFunctionBegin; 1076 1077 if (mat->mapping) { 1078 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 1079 } 1080 if (mat->bmapping) { 1081 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 1082 } 1083 if (mat->rmap) { 1084 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 1085 } 1086 if (mat->cmap) { 1087 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 1088 } 1089 #if defined(PETSC_USE_LOG) 1090 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N); 1091 #endif 1092 1093 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1094 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1095 1096 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1097 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1098 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1099 #if defined (PETSC_USE_CTABLE) 1100 if (baij->colmap) {ierr = TableDelete(baij->colmap);CHKERRQ(ierr);} 1101 #else 1102 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1103 #endif 1104 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1105 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1106 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1107 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1108 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1109 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1110 ierr = PetscFree(baij);CHKERRQ(ierr); 1111 PLogObjectDestroy(mat); 1112 PetscHeaderDestroy(mat); 1113 PetscFunctionReturn(0); 1114 } 1115 1116 #undef __FUNC__ 1117 #define __FUNC__ "MatMult_MPIBAIJ" 1118 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1119 { 1120 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1121 int ierr, nt; 1122 1123 PetscFunctionBegin; 1124 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1125 if (nt != a->n) { 1126 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1127 } 1128 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1129 if (nt != a->m) { 1130 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1131 } 1132 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1133 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1134 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1135 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1136 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1137 PetscFunctionReturn(0); 1138 } 1139 1140 #undef __FUNC__ 1141 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1142 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1143 { 1144 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1145 int ierr; 1146 1147 PetscFunctionBegin; 1148 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1149 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1150 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1151 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1152 PetscFunctionReturn(0); 1153 } 1154 1155 #undef __FUNC__ 1156 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1157 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1158 { 1159 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1160 int ierr; 1161 1162 PetscFunctionBegin; 1163 /* do nondiagonal part */ 1164 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 1165 /* send it on its way */ 1166 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1167 /* do local part */ 1168 ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr); 1169 /* receive remote parts: note this assumes the values are not actually */ 1170 /* inserted in yy until the next line, which is true for my implementation*/ 1171 /* but is not perhaps always true. */ 1172 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNC__ 1177 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1178 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1179 { 1180 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1181 int ierr; 1182 1183 PetscFunctionBegin; 1184 /* do nondiagonal part */ 1185 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 1186 /* send it on its way */ 1187 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1188 /* do local part */ 1189 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1190 /* receive remote parts: note this assumes the values are not actually */ 1191 /* inserted in yy until the next line, which is true for my implementation*/ 1192 /* but is not perhaps always true. */ 1193 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1194 PetscFunctionReturn(0); 1195 } 1196 1197 /* 1198 This only works correctly for square matrices where the subblock A->A is the 1199 diagonal block 1200 */ 1201 #undef __FUNC__ 1202 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1203 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1204 { 1205 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1206 int ierr; 1207 1208 PetscFunctionBegin; 1209 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1210 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1211 PetscFunctionReturn(0); 1212 } 1213 1214 #undef __FUNC__ 1215 #define __FUNC__ "MatScale_MPIBAIJ" 1216 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1217 { 1218 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1219 int ierr; 1220 1221 PetscFunctionBegin; 1222 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1223 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 #undef __FUNC__ 1228 #define __FUNC__ "MatGetSize_MPIBAIJ" 1229 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1230 { 1231 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1232 1233 PetscFunctionBegin; 1234 if (m) *m = mat->M; 1235 if (n) *n = mat->N; 1236 PetscFunctionReturn(0); 1237 } 1238 1239 #undef __FUNC__ 1240 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1241 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1242 { 1243 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1244 1245 PetscFunctionBegin; 1246 *m = mat->m; *n = mat->n; 1247 PetscFunctionReturn(0); 1248 } 1249 1250 #undef __FUNC__ 1251 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1252 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1253 { 1254 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1255 1256 PetscFunctionBegin; 1257 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1258 PetscFunctionReturn(0); 1259 } 1260 1261 #undef __FUNC__ 1262 #define __FUNC__ "MatGetRow_MPIBAIJ" 1263 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1264 { 1265 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1266 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1267 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1268 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1269 int *cmap, *idx_p,cstart = mat->cstart; 1270 1271 PetscFunctionBegin; 1272 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1273 mat->getrowactive = PETSC_TRUE; 1274 1275 if (!mat->rowvalues && (idx || v)) { 1276 /* 1277 allocate enough space to hold information from the longest row. 1278 */ 1279 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1280 int max = 1,mbs = mat->mbs,tmp; 1281 for ( i=0; i<mbs; i++ ) { 1282 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1283 if (max < tmp) { max = tmp; } 1284 } 1285 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1286 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1287 } 1288 1289 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1290 lrow = row - brstart; 1291 1292 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1293 if (!v) {pvA = 0; pvB = 0;} 1294 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1295 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1296 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1297 nztot = nzA + nzB; 1298 1299 cmap = mat->garray; 1300 if (v || idx) { 1301 if (nztot) { 1302 /* Sort by increasing column numbers, assuming A and B already sorted */ 1303 int imark = -1; 1304 if (v) { 1305 *v = v_p = mat->rowvalues; 1306 for ( i=0; i<nzB; i++ ) { 1307 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1308 else break; 1309 } 1310 imark = i; 1311 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1312 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1313 } 1314 if (idx) { 1315 *idx = idx_p = mat->rowindices; 1316 if (imark > -1) { 1317 for ( i=0; i<imark; i++ ) { 1318 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1319 } 1320 } else { 1321 for ( i=0; i<nzB; i++ ) { 1322 if (cmap[cworkB[i]/bs] < cstart) 1323 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1324 else break; 1325 } 1326 imark = i; 1327 } 1328 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1329 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1330 } 1331 } else { 1332 if (idx) *idx = 0; 1333 if (v) *v = 0; 1334 } 1335 } 1336 *nz = nztot; 1337 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1338 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } 1341 1342 #undef __FUNC__ 1343 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1344 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1345 { 1346 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1347 1348 PetscFunctionBegin; 1349 if (baij->getrowactive == PETSC_FALSE) { 1350 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1351 } 1352 baij->getrowactive = PETSC_FALSE; 1353 PetscFunctionReturn(0); 1354 } 1355 1356 #undef __FUNC__ 1357 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1358 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1359 { 1360 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1361 1362 PetscFunctionBegin; 1363 *bs = baij->bs; 1364 PetscFunctionReturn(0); 1365 } 1366 1367 #undef __FUNC__ 1368 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1369 int MatZeroEntries_MPIBAIJ(Mat A) 1370 { 1371 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1372 int ierr; 1373 1374 PetscFunctionBegin; 1375 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1376 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1377 PetscFunctionReturn(0); 1378 } 1379 1380 #undef __FUNC__ 1381 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1382 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1383 { 1384 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1385 Mat A = a->A, B = a->B; 1386 int ierr; 1387 double isend[5], irecv[5]; 1388 1389 PetscFunctionBegin; 1390 info->block_size = (double)a->bs; 1391 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1392 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1393 isend[3] = info->memory; isend[4] = info->mallocs; 1394 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1395 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1396 isend[3] += info->memory; isend[4] += info->mallocs; 1397 if (flag == MAT_LOCAL) { 1398 info->nz_used = isend[0]; 1399 info->nz_allocated = isend[1]; 1400 info->nz_unneeded = isend[2]; 1401 info->memory = isend[3]; 1402 info->mallocs = isend[4]; 1403 } else if (flag == MAT_GLOBAL_MAX) { 1404 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1405 info->nz_used = irecv[0]; 1406 info->nz_allocated = irecv[1]; 1407 info->nz_unneeded = irecv[2]; 1408 info->memory = irecv[3]; 1409 info->mallocs = irecv[4]; 1410 } else if (flag == MAT_GLOBAL_SUM) { 1411 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1412 info->nz_used = irecv[0]; 1413 info->nz_allocated = irecv[1]; 1414 info->nz_unneeded = irecv[2]; 1415 info->memory = irecv[3]; 1416 info->mallocs = irecv[4]; 1417 } else { 1418 SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 1419 } 1420 info->rows_global = (double)a->M; 1421 info->columns_global = (double)a->N; 1422 info->rows_local = (double)a->m; 1423 info->columns_local = (double)a->N; 1424 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1425 info->fill_ratio_needed = 0; 1426 info->factor_mallocs = 0; 1427 PetscFunctionReturn(0); 1428 } 1429 1430 #undef __FUNC__ 1431 #define __FUNC__ "MatSetOption_MPIBAIJ" 1432 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1433 { 1434 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1435 int ierr; 1436 1437 PetscFunctionBegin; 1438 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1439 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1440 op == MAT_COLUMNS_UNSORTED || 1441 op == MAT_COLUMNS_SORTED || 1442 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1443 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1444 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1445 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1446 } else if (op == MAT_ROW_ORIENTED) { 1447 a->roworiented = 1; 1448 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1449 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1450 } else if (op == MAT_ROWS_SORTED || 1451 op == MAT_ROWS_UNSORTED || 1452 op == MAT_SYMMETRIC || 1453 op == MAT_STRUCTURALLY_SYMMETRIC || 1454 op == MAT_YES_NEW_DIAGONALS || 1455 op == MAT_USE_HASH_TABLE) { 1456 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1457 } else if (op == MAT_COLUMN_ORIENTED) { 1458 a->roworiented = 0; 1459 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1460 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1461 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1462 a->donotstash = 1; 1463 } else if (op == MAT_NO_NEW_DIAGONALS) { 1464 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1465 } else if (op == MAT_USE_HASH_TABLE) { 1466 a->ht_flag = 1; 1467 } else { 1468 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1469 } 1470 PetscFunctionReturn(0); 1471 } 1472 1473 #undef __FUNC__ 1474 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1475 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1476 { 1477 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1478 Mat_SeqBAIJ *Aloc; 1479 Mat B; 1480 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1481 int bs=baij->bs,mbs=baij->mbs; 1482 Scalar *a; 1483 1484 PetscFunctionBegin; 1485 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1486 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1487 CHKERRQ(ierr); 1488 1489 /* copy over the A part */ 1490 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1491 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1492 rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1493 1494 for ( i=0; i<mbs; i++ ) { 1495 rvals[0] = bs*(baij->rstart + i); 1496 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1497 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1498 col = (baij->cstart+aj[j])*bs; 1499 for (k=0; k<bs; k++ ) { 1500 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1501 col++; a += bs; 1502 } 1503 } 1504 } 1505 /* copy over the B part */ 1506 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1507 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1508 for ( i=0; i<mbs; i++ ) { 1509 rvals[0] = bs*(baij->rstart + i); 1510 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1511 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1512 col = baij->garray[aj[j]]*bs; 1513 for (k=0; k<bs; k++ ) { 1514 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1515 col++; a += bs; 1516 } 1517 } 1518 } 1519 ierr = PetscFree(rvals);CHKERRQ(ierr); 1520 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1521 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1522 1523 if (matout != PETSC_NULL) { 1524 *matout = B; 1525 } else { 1526 PetscOps *Abops; 1527 MatOps Aops; 1528 1529 /* This isn't really an in-place transpose .... but free data structures from baij */ 1530 ierr = PetscFree(baij->rowners); CHKERRQ(ierr); 1531 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1532 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1533 #if defined (PETSC_USE_CTABLE) 1534 if (baij->colmap) {ierr = TableDelete(baij->colmap);CHKERRQ(ierr);} 1535 #else 1536 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1537 #endif 1538 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1539 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1540 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1541 ierr = PetscFree(baij);CHKERRQ(ierr); 1542 1543 /* 1544 This is horrible, horrible code. We need to keep the 1545 A pointers for the bops and ops but copy everything 1546 else from C. 1547 */ 1548 Abops = A->bops; 1549 Aops = A->ops; 1550 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1551 A->bops = Abops; 1552 A->ops = Aops; 1553 1554 PetscHeaderDestroy(B); 1555 } 1556 PetscFunctionReturn(0); 1557 } 1558 1559 #undef __FUNC__ 1560 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1561 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1562 { 1563 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1564 Mat a = baij->A, b = baij->B; 1565 int ierr,s1,s2,s3; 1566 1567 PetscFunctionBegin; 1568 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1569 if (rr) { 1570 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1571 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1572 /* Overlap communication with computation. */ 1573 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1574 } 1575 if (ll) { 1576 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1577 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1578 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1579 } 1580 /* scale the diagonal block */ 1581 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1582 1583 if (rr) { 1584 /* Do a scatter end and then right scale the off-diagonal block */ 1585 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1586 ierr = (*b->ops->diagonalscale)(b,0,baij->lvec);CHKERRQ(ierr); 1587 } 1588 1589 PetscFunctionReturn(0); 1590 } 1591 1592 #undef __FUNC__ 1593 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1594 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1595 { 1596 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1597 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1598 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1599 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1600 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1601 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1602 MPI_Comm comm = A->comm; 1603 MPI_Request *send_waits,*recv_waits; 1604 MPI_Status recv_status,*send_status; 1605 IS istmp; 1606 1607 PetscFunctionBegin; 1608 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 1609 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1610 1611 /* first count number of contributors to each processor */ 1612 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 1613 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1614 procs = nprocs + size; 1615 owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 1616 for ( i=0; i<N; i++ ) { 1617 idx = rows[i]; 1618 found = 0; 1619 for ( j=0; j<size; j++ ) { 1620 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1621 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1622 } 1623 } 1624 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1625 } 1626 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1627 1628 /* inform other processors of number of messages and max length*/ 1629 work = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work); 1630 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1631 nrecvs = work[rank]; 1632 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 1633 nmax = work[rank]; 1634 ierr = PetscFree(work);CHKERRQ(ierr); 1635 1636 /* post receives: */ 1637 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 1638 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1639 for ( i=0; i<nrecvs; i++ ) { 1640 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1641 } 1642 1643 /* do sends: 1644 1) starts[i] gives the starting index in svalues for stuff going to 1645 the ith processor 1646 */ 1647 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 1648 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1649 starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 1650 starts[0] = 0; 1651 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1652 for ( i=0; i<N; i++ ) { 1653 svalues[starts[owner[i]]++] = rows[i]; 1654 } 1655 ISRestoreIndices(is,&rows); 1656 1657 starts[0] = 0; 1658 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1659 count = 0; 1660 for ( i=0; i<size; i++ ) { 1661 if (procs[i]) { 1662 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1663 } 1664 } 1665 ierr = PetscFree(starts);CHKERRQ(ierr); 1666 1667 base = owners[rank]*bs; 1668 1669 /* wait on receives */ 1670 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 1671 source = lens + nrecvs; 1672 count = nrecvs; slen = 0; 1673 while (count) { 1674 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1675 /* unpack receives into our local space */ 1676 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1677 source[imdex] = recv_status.MPI_SOURCE; 1678 lens[imdex] = n; 1679 slen += n; 1680 count--; 1681 } 1682 ierr = PetscFree(recv_waits); CHKERRQ(ierr); 1683 1684 /* move the data into the send scatter */ 1685 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 1686 count = 0; 1687 for ( i=0; i<nrecvs; i++ ) { 1688 values = rvalues + i*nmax; 1689 for ( j=0; j<lens[i]; j++ ) { 1690 lrows[count++] = values[j] - base; 1691 } 1692 } 1693 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1694 ierr = PetscFree(lens);CHKERRQ(ierr); 1695 ierr = PetscFree(owner);CHKERRQ(ierr); 1696 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1697 1698 /* actually zap the local rows */ 1699 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1700 PLogObjectParent(A,istmp); 1701 1702 /* 1703 Zero the required rows. If the "diagonal block" of the matrix 1704 is square and the user wishes to set the diagonal we use seperate 1705 code so that MatSetValues() is not called for each diagonal allocating 1706 new memory, thus calling lots of mallocs and slowing things down. 1707 1708 Contributed by: Mathew Knepley 1709 */ 1710 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1711 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 1712 if (diag && (l->A->M == l->A->N)) { 1713 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 1714 } else if (diag) { 1715 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 1716 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1717 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1718 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1719 } 1720 for ( i = 0; i < slen; i++ ) { 1721 row = lrows[i] + rstart_bs; 1722 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1723 } 1724 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1725 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1726 } else { 1727 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 1728 } 1729 1730 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1731 ierr = PetscFree(lrows);CHKERRQ(ierr); 1732 1733 /* wait on sends */ 1734 if (nsends) { 1735 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1736 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1737 ierr = PetscFree(send_status);CHKERRQ(ierr); 1738 } 1739 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1740 ierr = PetscFree(svalues);CHKERRQ(ierr); 1741 1742 PetscFunctionReturn(0); 1743 } 1744 1745 #undef __FUNC__ 1746 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1747 int MatPrintHelp_MPIBAIJ(Mat A) 1748 { 1749 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1750 MPI_Comm comm = A->comm; 1751 static int called = 0; 1752 int ierr; 1753 1754 PetscFunctionBegin; 1755 if (!a->rank) { 1756 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1757 } 1758 if (called) {PetscFunctionReturn(0);} else called = 1; 1759 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1760 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1761 PetscFunctionReturn(0); 1762 } 1763 1764 #undef __FUNC__ 1765 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1766 int MatSetUnfactored_MPIBAIJ(Mat A) 1767 { 1768 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1769 int ierr; 1770 1771 PetscFunctionBegin; 1772 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1773 PetscFunctionReturn(0); 1774 } 1775 1776 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1777 1778 #undef __FUNC__ 1779 #define __FUNC__ "MatEqual_MPIBAIJ" 1780 int MatEqual_MPIBAIJ(Mat A, Mat B, PetscTruth *flag) 1781 { 1782 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *) B->data,*matA = (Mat_MPIBAIJ *) A->data; 1783 Mat a, b, c, d; 1784 PetscTruth flg; 1785 int ierr; 1786 1787 PetscFunctionBegin; 1788 if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1789 a = matA->A; b = matA->B; 1790 c = matB->A; d = matB->B; 1791 1792 ierr = MatEqual(a, c, &flg);CHKERRQ(ierr); 1793 if (flg == PETSC_TRUE) { 1794 ierr = MatEqual(b, d, &flg);CHKERRQ(ierr); 1795 } 1796 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1797 PetscFunctionReturn(0); 1798 } 1799 1800 /* -------------------------------------------------------------------*/ 1801 static struct _MatOps MatOps_Values = { 1802 MatSetValues_MPIBAIJ, 1803 MatGetRow_MPIBAIJ, 1804 MatRestoreRow_MPIBAIJ, 1805 MatMult_MPIBAIJ, 1806 MatMultAdd_MPIBAIJ, 1807 MatMultTrans_MPIBAIJ, 1808 MatMultTransAdd_MPIBAIJ, 1809 0, 1810 0, 1811 0, 1812 0, 1813 0, 1814 0, 1815 0, 1816 MatTranspose_MPIBAIJ, 1817 MatGetInfo_MPIBAIJ, 1818 MatEqual_MPIBAIJ, 1819 MatGetDiagonal_MPIBAIJ, 1820 MatDiagonalScale_MPIBAIJ, 1821 MatNorm_MPIBAIJ, 1822 MatAssemblyBegin_MPIBAIJ, 1823 MatAssemblyEnd_MPIBAIJ, 1824 0, 1825 MatSetOption_MPIBAIJ, 1826 MatZeroEntries_MPIBAIJ, 1827 MatZeroRows_MPIBAIJ, 1828 0, 1829 0, 1830 0, 1831 0, 1832 MatGetSize_MPIBAIJ, 1833 MatGetLocalSize_MPIBAIJ, 1834 MatGetOwnershipRange_MPIBAIJ, 1835 0, 1836 0, 1837 0, 1838 0, 1839 MatDuplicate_MPIBAIJ, 1840 0, 1841 0, 1842 0, 1843 0, 1844 0, 1845 MatGetSubMatrices_MPIBAIJ, 1846 MatIncreaseOverlap_MPIBAIJ, 1847 MatGetValues_MPIBAIJ, 1848 0, 1849 MatPrintHelp_MPIBAIJ, 1850 MatScale_MPIBAIJ, 1851 0, 1852 0, 1853 0, 1854 MatGetBlockSize_MPIBAIJ, 1855 0, 1856 0, 1857 0, 1858 0, 1859 0, 1860 0, 1861 MatSetUnfactored_MPIBAIJ, 1862 0, 1863 MatSetValuesBlocked_MPIBAIJ, 1864 0, 1865 0, 1866 0, 1867 MatGetMaps_Petsc}; 1868 1869 1870 EXTERN_C_BEGIN 1871 #undef __FUNC__ 1872 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ" 1873 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1874 { 1875 PetscFunctionBegin; 1876 *a = ((Mat_MPIBAIJ *)A->data)->A; 1877 *iscopy = PETSC_FALSE; 1878 PetscFunctionReturn(0); 1879 } 1880 EXTERN_C_END 1881 1882 #undef __FUNC__ 1883 #define __FUNC__ "MatCreateMPIBAIJ" 1884 /*@C 1885 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1886 (block compressed row). For good matrix assembly performance 1887 the user should preallocate the matrix storage by setting the parameters 1888 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1889 performance can be increased by more than a factor of 50. 1890 1891 Collective on MPI_Comm 1892 1893 Input Parameters: 1894 + comm - MPI communicator 1895 . bs - size of blockk 1896 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1897 This value should be the same as the local size used in creating the 1898 y vector for the matrix-vector product y = Ax. 1899 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1900 This value should be the same as the local size used in creating the 1901 x vector for the matrix-vector product y = Ax. 1902 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1903 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1904 . d_nz - number of block nonzeros per block row in diagonal portion of local 1905 submatrix (same for all local rows) 1906 . d_nnz - array containing the number of block nonzeros in the various block rows 1907 of the in diagonal portion of the local (possibly different for each block 1908 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1909 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1910 submatrix (same for all local rows). 1911 - o_nnz - array containing the number of nonzeros in the various block rows of the 1912 off-diagonal portion of the local submatrix (possibly different for 1913 each block row) or PETSC_NULL. 1914 1915 Output Parameter: 1916 . A - the matrix 1917 1918 Options Database Keys: 1919 . -mat_no_unroll - uses code that does not unroll the loops in the 1920 block calculations (much slower) 1921 . -mat_block_size - size of the blocks to use 1922 . -mat_mpi - use the parallel matrix data structures even on one processor 1923 (defaults to using SeqBAIJ format on one processor) 1924 1925 Notes: 1926 The user MUST specify either the local or global matrix dimensions 1927 (possibly both). 1928 1929 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1930 than it must be used on all processors that share the object for that argument. 1931 1932 Storage Information: 1933 For a square global matrix we define each processor's diagonal portion 1934 to be its local rows and the corresponding columns (a square submatrix); 1935 each processor's off-diagonal portion encompasses the remainder of the 1936 local matrix (a rectangular submatrix). 1937 1938 The user can specify preallocated storage for the diagonal part of 1939 the local submatrix with either d_nz or d_nnz (not both). Set 1940 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1941 memory allocation. Likewise, specify preallocated storage for the 1942 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1943 1944 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1945 the figure below we depict these three local rows and all columns (0-11). 1946 1947 .vb 1948 0 1 2 3 4 5 6 7 8 9 10 11 1949 ------------------- 1950 row 3 | o o o d d d o o o o o o 1951 row 4 | o o o d d d o o o o o o 1952 row 5 | o o o d d d o o o o o o 1953 ------------------- 1954 .ve 1955 1956 Thus, any entries in the d locations are stored in the d (diagonal) 1957 submatrix, and any entries in the o locations are stored in the 1958 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1959 stored simply in the MATSEQBAIJ format for compressed row storage. 1960 1961 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1962 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1963 In general, for PDE problems in which most nonzeros are near the diagonal, 1964 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1965 or you will get TERRIBLE performance; see the users' manual chapter on 1966 matrices. 1967 1968 Level: intermediate 1969 1970 .keywords: matrix, block, aij, compressed row, sparse, parallel 1971 1972 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ() 1973 @*/ 1974 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1975 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1976 { 1977 Mat B; 1978 Mat_MPIBAIJ *b; 1979 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg; 1980 int flag1 = 0,flag2 = 0; 1981 1982 PetscFunctionBegin; 1983 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1984 1985 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1986 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -1: value %d",d_nz); 1987 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -1: value %d",o_nz); 1988 if (d_nnz) { 1989 for (i=0; i<m/bs; i++) { 1990 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1991 } 1992 } 1993 if (o_nnz) { 1994 for (i=0; i<m/bs; i++) { 1995 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1996 } 1997 } 1998 1999 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2000 ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr); 2001 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 2002 if (!flag1 && !flag2 && size == 1) { 2003 if (M == PETSC_DECIDE) M = m; 2004 if (N == PETSC_DECIDE) N = n; 2005 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 2006 PetscFunctionReturn(0); 2007 } 2008 2009 *A = 0; 2010 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 2011 PLogObjectCreate(B); 2012 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b); 2013 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2014 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2015 2016 B->ops->destroy = MatDestroy_MPIBAIJ; 2017 B->ops->view = MatView_MPIBAIJ; 2018 B->mapping = 0; 2019 B->factor = 0; 2020 B->assembled = PETSC_FALSE; 2021 2022 B->insertmode = NOT_SET_VALUES; 2023 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 2024 ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr); 2025 2026 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 2027 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 2028 } 2029 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 2030 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 2031 } 2032 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 2033 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 2034 } 2035 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 2036 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 2037 2038 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 2039 work[0] = m; work[1] = n; 2040 mbs = m/bs; nbs = n/bs; 2041 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 2042 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 2043 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 2044 } 2045 if (m == PETSC_DECIDE) { 2046 Mbs = M/bs; 2047 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 2048 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 2049 m = mbs*bs; 2050 } 2051 if (n == PETSC_DECIDE) { 2052 Nbs = N/bs; 2053 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 2054 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 2055 n = nbs*bs; 2056 } 2057 if (mbs*bs != m || nbs*bs != n) { 2058 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 2059 } 2060 2061 b->m = m; B->m = m; 2062 b->n = n; B->n = n; 2063 b->N = N; B->N = N; 2064 b->M = M; B->M = M; 2065 b->bs = bs; 2066 b->bs2 = bs*bs; 2067 b->mbs = mbs; 2068 b->nbs = nbs; 2069 b->Mbs = Mbs; 2070 b->Nbs = Nbs; 2071 2072 /* the information in the maps duplicates the information computed below, eventually 2073 we should remove the duplicate information that is not contained in the maps */ 2074 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 2075 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 2076 2077 /* build local table of row and column ownerships */ 2078 b->rowners = (int *) PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 2079 PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2080 b->cowners = b->rowners + b->size + 2; 2081 b->rowners_bs = b->cowners + b->size + 2; 2082 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2083 b->rowners[0] = 0; 2084 for ( i=2; i<=b->size; i++ ) { 2085 b->rowners[i] += b->rowners[i-1]; 2086 } 2087 for ( i=0; i<=b->size; i++ ) { 2088 b->rowners_bs[i] = b->rowners[i]*bs; 2089 } 2090 b->rstart = b->rowners[b->rank]; 2091 b->rend = b->rowners[b->rank+1]; 2092 b->rstart_bs = b->rstart * bs; 2093 b->rend_bs = b->rend * bs; 2094 2095 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2096 b->cowners[0] = 0; 2097 for ( i=2; i<=b->size; i++ ) { 2098 b->cowners[i] += b->cowners[i-1]; 2099 } 2100 b->cstart = b->cowners[b->rank]; 2101 b->cend = b->cowners[b->rank+1]; 2102 b->cstart_bs = b->cstart * bs; 2103 b->cend_bs = b->cend * bs; 2104 2105 2106 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2107 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2108 PLogObjectParent(B,b->A); 2109 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2110 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2111 PLogObjectParent(B,b->B); 2112 2113 /* build cache for off array entries formed */ 2114 ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr); 2115 ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr); 2116 b->donotstash = 0; 2117 b->colmap = 0; 2118 b->garray = 0; 2119 b->roworiented = 1; 2120 2121 /* stuff used in block assembly */ 2122 b->barray = 0; 2123 2124 /* stuff used for matrix vector multiply */ 2125 b->lvec = 0; 2126 b->Mvctx = 0; 2127 2128 /* stuff for MatGetRow() */ 2129 b->rowindices = 0; 2130 b->rowvalues = 0; 2131 b->getrowactive = PETSC_FALSE; 2132 2133 /* hash table stuff */ 2134 b->ht = 0; 2135 b->hd = 0; 2136 b->ht_size = 0; 2137 b->ht_flag = 0; 2138 b->ht_fact = 0; 2139 b->ht_total_ct = 0; 2140 b->ht_insert_ct = 0; 2141 2142 *A = B; 2143 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2144 if (flg) { 2145 double fact = 1.39; 2146 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2147 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg);CHKERRQ(ierr); 2148 if (fact <= 1.0) fact = 1.39; 2149 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2150 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2151 } 2152 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 2153 "MatStoreValues_MPIBAIJ", 2154 (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2155 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 2156 "MatRetrieveValues_MPIBAIJ", 2157 (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2158 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 2159 "MatGetDiagonalBlock_MPIBAIJ", 2160 (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2161 PetscFunctionReturn(0); 2162 } 2163 2164 #undef __FUNC__ 2165 #define __FUNC__ "MatDuplicate_MPIBAIJ" 2166 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2167 { 2168 Mat mat; 2169 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 2170 int ierr, len=0, flg; 2171 2172 PetscFunctionBegin; 2173 *newmat = 0; 2174 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 2175 PLogObjectCreate(mat); 2176 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a); 2177 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2178 mat->ops->destroy = MatDestroy_MPIBAIJ; 2179 mat->ops->view = MatView_MPIBAIJ; 2180 mat->factor = matin->factor; 2181 mat->assembled = PETSC_TRUE; 2182 mat->insertmode = NOT_SET_VALUES; 2183 2184 a->m = mat->m = oldmat->m; 2185 a->n = mat->n = oldmat->n; 2186 a->M = mat->M = oldmat->M; 2187 a->N = mat->N = oldmat->N; 2188 2189 a->bs = oldmat->bs; 2190 a->bs2 = oldmat->bs2; 2191 a->mbs = oldmat->mbs; 2192 a->nbs = oldmat->nbs; 2193 a->Mbs = oldmat->Mbs; 2194 a->Nbs = oldmat->Nbs; 2195 2196 a->rstart = oldmat->rstart; 2197 a->rend = oldmat->rend; 2198 a->cstart = oldmat->cstart; 2199 a->cend = oldmat->cend; 2200 a->size = oldmat->size; 2201 a->rank = oldmat->rank; 2202 a->donotstash = oldmat->donotstash; 2203 a->roworiented = oldmat->roworiented; 2204 a->rowindices = 0; 2205 a->rowvalues = 0; 2206 a->getrowactive = PETSC_FALSE; 2207 a->barray = 0; 2208 a->rstart_bs = oldmat->rstart_bs; 2209 a->rend_bs = oldmat->rend_bs; 2210 a->cstart_bs = oldmat->cstart_bs; 2211 a->cend_bs = oldmat->cend_bs; 2212 2213 /* hash table stuff */ 2214 a->ht = 0; 2215 a->hd = 0; 2216 a->ht_size = 0; 2217 a->ht_flag = oldmat->ht_flag; 2218 a->ht_fact = oldmat->ht_fact; 2219 a->ht_total_ct = 0; 2220 a->ht_insert_ct = 0; 2221 2222 2223 a->rowners = (int *) PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 2224 PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2225 a->cowners = a->rowners + a->size + 2; 2226 a->rowners_bs = a->cowners + a->size + 2; 2227 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2228 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2229 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2230 if (oldmat->colmap) { 2231 #if defined (PETSC_USE_CTABLE) 2232 ierr = TableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2233 #else 2234 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2235 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2236 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2237 #endif 2238 } else a->colmap = 0; 2239 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 2240 a->garray = (int *) PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray); 2241 PLogObjectMemory(mat,len*sizeof(int)); 2242 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2243 } else a->garray = 0; 2244 2245 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2246 PLogObjectParent(mat,a->lvec); 2247 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2248 2249 PLogObjectParent(mat,a->Mvctx); 2250 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2251 PLogObjectParent(mat,a->A); 2252 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2253 PLogObjectParent(mat,a->B); 2254 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 2255 ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2256 if (flg) { 2257 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 2258 } 2259 *newmat = mat; 2260 PetscFunctionReturn(0); 2261 } 2262 2263 #include "sys.h" 2264 2265 #undef __FUNC__ 2266 #define __FUNC__ "MatLoad_MPIBAIJ" 2267 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2268 { 2269 Mat A; 2270 int i, nz, ierr, j,rstart, rend, fd; 2271 Scalar *vals,*buf; 2272 MPI_Comm comm = ((PetscObject)viewer)->comm; 2273 MPI_Status status; 2274 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2275 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 2276 int flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2277 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2278 int dcount,kmax,k,nzcount,tmp; 2279 2280 PetscFunctionBegin; 2281 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2282 2283 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2284 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2285 if (!rank) { 2286 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2287 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2288 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2289 if (header[3] < 0) { 2290 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2291 } 2292 } 2293 2294 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2295 M = header[1]; N = header[2]; 2296 2297 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2298 2299 /* 2300 This code adds extra rows to make sure the number of rows is 2301 divisible by the blocksize 2302 */ 2303 Mbs = M/bs; 2304 extra_rows = bs - M + bs*(Mbs); 2305 if (extra_rows == bs) extra_rows = 0; 2306 else Mbs++; 2307 if (extra_rows &&!rank) { 2308 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2309 } 2310 2311 /* determine ownership of all rows */ 2312 mbs = Mbs/size + ((Mbs % size) > rank); 2313 m = mbs * bs; 2314 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners); 2315 browners = rowners + size + 1; 2316 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2317 rowners[0] = 0; 2318 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2319 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2320 rstart = rowners[rank]; 2321 rend = rowners[rank+1]; 2322 2323 /* distribute row lengths to all processors */ 2324 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) );CHKPTRQ(locrowlens); 2325 if (!rank) { 2326 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) );CHKPTRQ(rowlengths); 2327 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2328 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2329 sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 2330 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2331 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2332 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2333 } else { 2334 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2335 } 2336 2337 if (!rank) { 2338 /* calculate the number of nonzeros on each processor */ 2339 procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 2340 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2341 for ( i=0; i<size; i++ ) { 2342 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2343 procsnz[i] += rowlengths[j]; 2344 } 2345 } 2346 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2347 2348 /* determine max buffer needed and allocate it */ 2349 maxnz = 0; 2350 for ( i=0; i<size; i++ ) { 2351 maxnz = PetscMax(maxnz,procsnz[i]); 2352 } 2353 cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 2354 2355 /* read in my part of the matrix column indices */ 2356 nz = procsnz[0]; 2357 ibuf = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf); 2358 mycols = ibuf; 2359 if (size == 1) nz -= extra_rows; 2360 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2361 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2362 2363 /* read in every ones (except the last) and ship off */ 2364 for ( i=1; i<size-1; i++ ) { 2365 nz = procsnz[i]; 2366 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2367 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2368 } 2369 /* read in the stuff for the last proc */ 2370 if ( size != 1 ) { 2371 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2372 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2373 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2374 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2375 } 2376 ierr = PetscFree(cols);CHKERRQ(ierr); 2377 } else { 2378 /* determine buffer space needed for message */ 2379 nz = 0; 2380 for ( i=0; i<m; i++ ) { 2381 nz += locrowlens[i]; 2382 } 2383 ibuf = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf); 2384 mycols = ibuf; 2385 /* receive message of column indices*/ 2386 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2387 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2388 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2389 } 2390 2391 /* loop over local rows, determining number of off diagonal entries */ 2392 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) );CHKPTRQ(dlens); 2393 odlens = dlens + (rend-rstart); 2394 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) );CHKPTRQ(mask); 2395 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2396 masked1 = mask + Mbs; 2397 masked2 = masked1 + Mbs; 2398 rowcount = 0; nzcount = 0; 2399 for ( i=0; i<mbs; i++ ) { 2400 dcount = 0; 2401 odcount = 0; 2402 for ( j=0; j<bs; j++ ) { 2403 kmax = locrowlens[rowcount]; 2404 for ( k=0; k<kmax; k++ ) { 2405 tmp = mycols[nzcount++]/bs; 2406 if (!mask[tmp]) { 2407 mask[tmp] = 1; 2408 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2409 else masked1[dcount++] = tmp; 2410 } 2411 } 2412 rowcount++; 2413 } 2414 2415 dlens[i] = dcount; 2416 odlens[i] = odcount; 2417 2418 /* zero out the mask elements we set */ 2419 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2420 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2421 } 2422 2423 /* create our matrix */ 2424 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 2425 A = *newmat; 2426 MatSetOption(A,MAT_COLUMNS_SORTED); 2427 2428 if (!rank) { 2429 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(buf); 2430 /* read in my part of the matrix numerical values */ 2431 nz = procsnz[0]; 2432 vals = buf; 2433 mycols = ibuf; 2434 if (size == 1) nz -= extra_rows; 2435 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2436 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2437 2438 /* insert into matrix */ 2439 jj = rstart*bs; 2440 for ( i=0; i<m; i++ ) { 2441 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2442 mycols += locrowlens[i]; 2443 vals += locrowlens[i]; 2444 jj++; 2445 } 2446 /* read in other processors (except the last one) and ship out */ 2447 for ( i=1; i<size-1; i++ ) { 2448 nz = procsnz[i]; 2449 vals = buf; 2450 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2451 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2452 } 2453 /* the last proc */ 2454 if ( size != 1 ){ 2455 nz = procsnz[i] - extra_rows; 2456 vals = buf; 2457 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2458 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2459 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2460 } 2461 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2462 } else { 2463 /* receive numeric values */ 2464 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(buf); 2465 2466 /* receive message of values*/ 2467 vals = buf; 2468 mycols = ibuf; 2469 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2470 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2471 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2472 2473 /* insert into matrix */ 2474 jj = rstart*bs; 2475 for ( i=0; i<m; i++ ) { 2476 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2477 mycols += locrowlens[i]; 2478 vals += locrowlens[i]; 2479 jj++; 2480 } 2481 } 2482 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2483 ierr = PetscFree(buf);CHKERRQ(ierr); 2484 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2485 ierr = PetscFree(rowners);CHKERRQ(ierr); 2486 ierr = PetscFree(dlens);CHKERRQ(ierr); 2487 ierr = PetscFree(mask);CHKERRQ(ierr); 2488 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2489 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2490 PetscFunctionReturn(0); 2491 } 2492 2493 #undef __FUNC__ 2494 #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2495 /*@ 2496 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2497 2498 Input Parameters: 2499 . mat - the matrix 2500 . fact - factor 2501 2502 Collective on Mat 2503 2504 Level: advanced 2505 2506 Notes: 2507 This can also be set by the command line option: -mat_use_hash_table fact 2508 2509 .keywords: matrix, hashtable, factor, HT 2510 2511 .seealso: MatSetOption() 2512 @*/ 2513 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact) 2514 { 2515 Mat_MPIBAIJ *baij; 2516 2517 PetscFunctionBegin; 2518 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2519 if (mat->type != MATMPIBAIJ) { 2520 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2521 } 2522 baij = (Mat_MPIBAIJ*) mat->data; 2523 baij->ht_fact = fact; 2524 PetscFunctionReturn(0); 2525 } 2526