1 /*$Id: mpibaij.c,v 1.180 1999/10/13 20:37:30 bsmith Exp bsmith $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "mat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 6 extern int MatSetUpMultiply_MPIBAIJ(Mat); 7 extern int DisAssemble_MPIBAIJ(Mat); 8 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 9 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 10 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 11 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 12 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 13 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 14 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 15 extern int MatPrintHelp_SeqBAIJ(Mat); 16 17 EXTERN_C_BEGIN 18 #undef __FUNC__ 19 #define __FUNC__ "MatStoreValues_MPIBAIJ" 20 int MatStoreValues_MPIBAIJ(Mat mat) 21 { 22 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 23 int ierr; 24 25 PetscFunctionBegin; 26 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 27 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 28 PetscFunctionReturn(0); 29 } 30 EXTERN_C_END 31 32 EXTERN_C_BEGIN 33 #undef __FUNC__ 34 #define __FUNC__ "MatRetrieveValues_MPIBAIJ" 35 int MatRetrieveValues_MPIBAIJ(Mat mat) 36 { 37 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 38 int ierr; 39 40 PetscFunctionBegin; 41 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 42 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 43 PetscFunctionReturn(0); 44 } 45 EXTERN_C_END 46 47 /* 48 Local utility routine that creates a mapping from the global column 49 number to the local number in the off-diagonal part of the local 50 storage of the matrix. This is done in a non scable way since the 51 length of colmap equals the global matrix length. 52 */ 53 #undef __FUNC__ 54 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 55 static int CreateColmap_MPIBAIJ_Private(Mat mat) 56 { 57 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 58 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 59 int nbs = B->nbs,i,bs=B->bs,ierr; 60 61 PetscFunctionBegin; 62 #if defined (PETSC_USE_CTABLE) 63 ierr = PetscTableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr); 64 for ( i=0; i<nbs; i++ ){ 65 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 66 } 67 #else 68 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 69 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 70 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 71 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 72 #endif 73 PetscFunctionReturn(0); 74 } 75 76 #define CHUNKSIZE 10 77 78 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 79 { \ 80 \ 81 brow = row/bs; \ 82 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 83 rmax = aimax[brow]; nrow = ailen[brow]; \ 84 bcol = col/bs; \ 85 ridx = row % bs; cidx = col % bs; \ 86 low = 0; high = nrow; \ 87 while (high-low > 3) { \ 88 t = (low+high)/2; \ 89 if (rp[t] > bcol) high = t; \ 90 else low = t; \ 91 } \ 92 for ( _i=low; _i<high; _i++ ) { \ 93 if (rp[_i] > bcol) break; \ 94 if (rp[_i] == bcol) { \ 95 bap = ap + bs2*_i + bs*cidx + ridx; \ 96 if (addv == ADD_VALUES) *bap += value; \ 97 else *bap = value; \ 98 goto a_noinsert; \ 99 } \ 100 } \ 101 if (a->nonew == 1) goto a_noinsert; \ 102 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 103 if (nrow >= rmax) { \ 104 /* there is no extra room in row, therefore enlarge */ \ 105 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 106 Scalar *new_a; \ 107 \ 108 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 109 \ 110 /* malloc new storage space */ \ 111 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 112 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 113 new_j = (int *) (new_a + bs2*new_nz); \ 114 new_i = new_j + new_nz; \ 115 \ 116 /* copy over old data into new slots */ \ 117 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 118 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 119 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 120 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 121 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 122 len*sizeof(int));CHKERRQ(ierr); \ 123 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \ 124 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 125 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 126 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr); \ 127 /* free up old matrix storage */ \ 128 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 129 if (!a->singlemalloc) { \ 130 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 131 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 132 } \ 133 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 134 a->singlemalloc = 1; \ 135 \ 136 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 137 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 138 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 139 a->maxnz += bs2*CHUNKSIZE; \ 140 a->reallocs++; \ 141 a->nz++; \ 142 } \ 143 N = nrow++ - 1; \ 144 /* shift up all the later entries in this row */ \ 145 for ( ii=N; ii>=_i; ii-- ) { \ 146 rp[ii+1] = rp[ii]; \ 147 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \ 148 } \ 149 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr); } \ 150 rp[_i] = bcol; \ 151 ap[bs2*_i + bs*cidx + ridx] = value; \ 152 a_noinsert:; \ 153 ailen[brow] = nrow; \ 154 } 155 156 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 157 { \ 158 \ 159 brow = row/bs; \ 160 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 161 rmax = bimax[brow]; nrow = bilen[brow]; \ 162 bcol = col/bs; \ 163 ridx = row % bs; cidx = col % bs; \ 164 low = 0; high = nrow; \ 165 while (high-low > 3) { \ 166 t = (low+high)/2; \ 167 if (rp[t] > bcol) high = t; \ 168 else low = t; \ 169 } \ 170 for ( _i=low; _i<high; _i++ ) { \ 171 if (rp[_i] > bcol) break; \ 172 if (rp[_i] == bcol) { \ 173 bap = ap + bs2*_i + bs*cidx + ridx; \ 174 if (addv == ADD_VALUES) *bap += value; \ 175 else *bap = value; \ 176 goto b_noinsert; \ 177 } \ 178 } \ 179 if (b->nonew == 1) goto b_noinsert; \ 180 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 181 if (nrow >= rmax) { \ 182 /* there is no extra room in row, therefore enlarge */ \ 183 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 184 Scalar *new_a; \ 185 \ 186 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 187 \ 188 /* malloc new storage space */ \ 189 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 190 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 191 new_j = (int *) (new_a + bs2*new_nz); \ 192 new_i = new_j + new_nz; \ 193 \ 194 /* copy over old data into new slots */ \ 195 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 196 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 197 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 198 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 199 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 200 len*sizeof(int));CHKERRQ(ierr); \ 201 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \ 202 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 203 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 204 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr); \ 205 /* free up old matrix storage */ \ 206 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 207 if (!b->singlemalloc) { \ 208 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 209 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 210 } \ 211 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 212 b->singlemalloc = 1; \ 213 \ 214 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 215 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 216 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 217 b->maxnz += bs2*CHUNKSIZE; \ 218 b->reallocs++; \ 219 b->nz++; \ 220 } \ 221 N = nrow++ - 1; \ 222 /* shift up all the later entries in this row */ \ 223 for ( ii=N; ii>=_i; ii-- ) { \ 224 rp[ii+1] = rp[ii]; \ 225 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \ 226 } \ 227 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr);} \ 228 rp[_i] = bcol; \ 229 ap[bs2*_i + bs*cidx + ridx] = value; \ 230 b_noinsert:; \ 231 bilen[brow] = nrow; \ 232 } 233 234 #undef __FUNC__ 235 #define __FUNC__ "MatSetValues_MPIBAIJ" 236 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 237 { 238 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 239 Scalar value; 240 int ierr,i,j,row,col; 241 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 242 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 243 int cend_orig=baij->cend_bs,bs=baij->bs; 244 245 /* Some Variables required in the macro */ 246 Mat A = baij->A; 247 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 248 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 249 Scalar *aa=a->a; 250 251 Mat B = baij->B; 252 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 253 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 254 Scalar *ba=b->a; 255 256 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 257 int low,high,t,ridx,cidx,bs2=a->bs2; 258 Scalar *ap,*bap; 259 260 PetscFunctionBegin; 261 for ( i=0; i<m; i++ ) { 262 if (im[i] < 0) continue; 263 #if defined(PETSC_USE_BOPT_g) 264 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 265 #endif 266 if (im[i] >= rstart_orig && im[i] < rend_orig) { 267 row = im[i] - rstart_orig; 268 for ( j=0; j<n; j++ ) { 269 if (in[j] >= cstart_orig && in[j] < cend_orig){ 270 col = in[j] - cstart_orig; 271 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 272 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 273 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 274 } else if (in[j] < 0) continue; 275 #if defined(PETSC_USE_BOPT_g) 276 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 277 #endif 278 else { 279 if (mat->was_assembled) { 280 if (!baij->colmap) { 281 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 282 } 283 #if defined (PETSC_USE_CTABLE) 284 ierr = PetscTableFind(baij->colmap, in[j]/bs + 1,&col);CHKERRQ(ierr); 285 col = col - 1 + in[j]%bs; 286 #else 287 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 288 #endif 289 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 290 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 291 col = in[j]; 292 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 293 B = baij->B; 294 b = (Mat_SeqBAIJ *) (B)->data; 295 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 296 ba=b->a; 297 } 298 } else col = in[j]; 299 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 300 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 301 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 302 } 303 } 304 } else { 305 if ( !baij->donotstash) { 306 if (roworiented) { 307 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 308 } else { 309 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 310 } 311 } 312 } 313 } 314 PetscFunctionReturn(0); 315 } 316 317 #undef __FUNC__ 318 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 319 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 320 { 321 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 322 Scalar *value,*barray=baij->barray; 323 int ierr,i,j,ii,jj,row,col; 324 int roworiented = baij->roworiented,rstart=baij->rstart ; 325 int rend=baij->rend,cstart=baij->cstart,stepval; 326 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 327 328 PetscFunctionBegin; 329 if(!barray) { 330 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar));CHKPTRQ(barray); 331 } 332 333 if (roworiented) { 334 stepval = (n-1)*bs; 335 } else { 336 stepval = (m-1)*bs; 337 } 338 for ( i=0; i<m; i++ ) { 339 if (im[i] < 0) continue; 340 #if defined(PETSC_USE_BOPT_g) 341 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 342 #endif 343 if (im[i] >= rstart && im[i] < rend) { 344 row = im[i] - rstart; 345 for ( j=0; j<n; j++ ) { 346 /* If NumCol = 1 then a copy is not required */ 347 if ((roworiented) && (n == 1)) { 348 barray = v + i*bs2; 349 } else if((!roworiented) && (m == 1)) { 350 barray = v + j*bs2; 351 } else { /* Here a copy is required */ 352 if (roworiented) { 353 value = v + i*(stepval+bs)*bs + j*bs; 354 } else { 355 value = v + j*(stepval+bs)*bs + i*bs; 356 } 357 for ( ii=0; ii<bs; ii++,value+=stepval ) { 358 for (jj=0; jj<bs; jj++ ) { 359 *barray++ = *value++; 360 } 361 } 362 barray -=bs2; 363 } 364 365 if (in[j] >= cstart && in[j] < cend){ 366 col = in[j] - cstart; 367 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 368 } 369 else if (in[j] < 0) continue; 370 #if defined(PETSC_USE_BOPT_g) 371 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 372 #endif 373 else { 374 if (mat->was_assembled) { 375 if (!baij->colmap) { 376 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 377 } 378 379 #if defined(PETSC_USE_BOPT_g) 380 #if defined (PETSC_USE_CTABLE) 381 { int data; 382 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 383 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap"); 384 } 385 #else 386 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap"); 387 #endif 388 #endif 389 #if defined (PETSC_USE_CTABLE) 390 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 391 col = (col - 1)/bs; 392 #else 393 col = (baij->colmap[in[j]] - 1)/bs; 394 #endif 395 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 396 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 397 col = in[j]; 398 } 399 } 400 else col = in[j]; 401 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 402 } 403 } 404 } else { 405 if (!baij->donotstash) { 406 if (roworiented) { 407 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 408 } else { 409 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 410 } 411 } 412 } 413 } 414 PetscFunctionReturn(0); 415 } 416 #define HASH_KEY 0.6180339887 417 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 418 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 419 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 420 #undef __FUNC__ 421 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 422 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 423 { 424 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 425 int ierr,i,j,row,col; 426 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 427 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 428 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 429 double tmp; 430 Scalar ** HD = baij->hd,value; 431 #if defined(PETSC_USE_BOPT_g) 432 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 433 #endif 434 435 PetscFunctionBegin; 436 437 for ( i=0; i<m; i++ ) { 438 #if defined(PETSC_USE_BOPT_g) 439 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 440 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 441 #endif 442 row = im[i]; 443 if (row >= rstart_orig && row < rend_orig) { 444 for ( j=0; j<n; j++ ) { 445 col = in[j]; 446 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 447 /* Look up into the Hash Table */ 448 key = (row/bs)*Nbs+(col/bs)+1; 449 h1 = HASH(size,key,tmp); 450 451 452 idx = h1; 453 #if defined(PETSC_USE_BOPT_g) 454 insert_ct++; 455 total_ct++; 456 if (HT[idx] != key) { 457 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 458 if (idx == size) { 459 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 460 if (idx == h1) { 461 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 462 } 463 } 464 } 465 #else 466 if (HT[idx] != key) { 467 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 468 if (idx == size) { 469 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 470 if (idx == h1) { 471 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 472 } 473 } 474 } 475 #endif 476 /* A HASH table entry is found, so insert the values at the correct address */ 477 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 478 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 479 } 480 } else { 481 if (!baij->donotstash) { 482 if (roworiented) { 483 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 484 } else { 485 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 486 } 487 } 488 } 489 } 490 #if defined(PETSC_USE_BOPT_g) 491 baij->ht_total_ct = total_ct; 492 baij->ht_insert_ct = insert_ct; 493 #endif 494 PetscFunctionReturn(0); 495 } 496 497 #undef __FUNC__ 498 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 499 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 500 { 501 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 502 int ierr,i,j,ii,jj,row,col; 503 int roworiented = baij->roworiented,rstart=baij->rstart ; 504 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 505 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 506 double tmp; 507 Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 508 #if defined(PETSC_USE_BOPT_g) 509 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 510 #endif 511 512 PetscFunctionBegin; 513 514 if (roworiented) { 515 stepval = (n-1)*bs; 516 } else { 517 stepval = (m-1)*bs; 518 } 519 for ( i=0; i<m; i++ ) { 520 #if defined(PETSC_USE_BOPT_g) 521 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 522 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 523 #endif 524 row = im[i]; 525 v_t = v + i*bs2; 526 if (row >= rstart && row < rend) { 527 for ( j=0; j<n; j++ ) { 528 col = in[j]; 529 530 /* Look up into the Hash Table */ 531 key = row*Nbs+col+1; 532 h1 = HASH(size,key,tmp); 533 534 idx = h1; 535 #if defined(PETSC_USE_BOPT_g) 536 total_ct++; 537 insert_ct++; 538 if (HT[idx] != key) { 539 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 540 if (idx == size) { 541 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 542 if (idx == h1) { 543 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 544 } 545 } 546 } 547 #else 548 if (HT[idx] != key) { 549 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 550 if (idx == size) { 551 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 552 if (idx == h1) { 553 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 554 } 555 } 556 } 557 #endif 558 baij_a = HD[idx]; 559 if (roworiented) { 560 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 561 /* value = v + (i*(stepval+bs)+j)*bs; */ 562 value = v_t; 563 v_t += bs; 564 if (addv == ADD_VALUES) { 565 for ( ii=0; ii<bs; ii++,value+=stepval) { 566 for ( jj=ii; jj<bs2; jj+=bs ) { 567 baij_a[jj] += *value++; 568 } 569 } 570 } else { 571 for ( ii=0; ii<bs; ii++,value+=stepval) { 572 for ( jj=ii; jj<bs2; jj+=bs ) { 573 baij_a[jj] = *value++; 574 } 575 } 576 } 577 } else { 578 value = v + j*(stepval+bs)*bs + i*bs; 579 if (addv == ADD_VALUES) { 580 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 581 for ( jj=0; jj<bs; jj++ ) { 582 baij_a[jj] += *value++; 583 } 584 } 585 } else { 586 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 587 for ( jj=0; jj<bs; jj++ ) { 588 baij_a[jj] = *value++; 589 } 590 } 591 } 592 } 593 } 594 } else { 595 if (!baij->donotstash) { 596 if (roworiented) { 597 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 598 } else { 599 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 600 } 601 } 602 } 603 } 604 #if defined(PETSC_USE_BOPT_g) 605 baij->ht_total_ct = total_ct; 606 baij->ht_insert_ct = insert_ct; 607 #endif 608 PetscFunctionReturn(0); 609 } 610 611 #undef __FUNC__ 612 #define __FUNC__ "MatGetValues_MPIBAIJ" 613 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 614 { 615 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 616 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 617 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data; 618 619 PetscFunctionBegin; 620 for ( i=0; i<m; i++ ) { 621 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 622 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 623 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 624 row = idxm[i] - bsrstart; 625 for ( j=0; j<n; j++ ) { 626 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 627 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 628 if (idxn[j] >= bscstart && idxn[j] < bscend){ 629 col = idxn[j] - bscstart; 630 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 631 } else { 632 if (!baij->colmap) { 633 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 634 } 635 #if defined (PETSC_USE_CTABLE) 636 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 637 data --; 638 #else 639 data = baij->colmap[idxn[j]/bs]-1; 640 #endif 641 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 642 else { 643 col = data + idxn[j]%bs; 644 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 645 } 646 } 647 } 648 } else { 649 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 650 } 651 } 652 PetscFunctionReturn(0); 653 } 654 655 #undef __FUNC__ 656 #define __FUNC__ "MatNorm_MPIBAIJ" 657 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 658 { 659 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 660 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 661 int ierr, i,bs2=baij->bs2; 662 double sum = 0.0; 663 Scalar *v; 664 665 PetscFunctionBegin; 666 if (baij->size == 1) { 667 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 668 } else { 669 if (type == NORM_FROBENIUS) { 670 v = amat->a; 671 for (i=0; i<amat->nz*bs2; i++ ) { 672 #if defined(PETSC_USE_COMPLEX) 673 sum += PetscReal(PetscConj(*v)*(*v)); v++; 674 #else 675 sum += (*v)*(*v); v++; 676 #endif 677 } 678 v = bmat->a; 679 for (i=0; i<bmat->nz*bs2; i++ ) { 680 #if defined(PETSC_USE_COMPLEX) 681 sum += PetscReal(PetscConj(*v)*(*v)); v++; 682 #else 683 sum += (*v)*(*v); v++; 684 #endif 685 } 686 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 687 *norm = sqrt(*norm); 688 } else { 689 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 690 } 691 } 692 PetscFunctionReturn(0); 693 } 694 695 696 /* 697 Creates the hash table, and sets the table 698 This table is created only once. 699 If new entried need to be added to the matrix 700 then the hash table has to be destroyed and 701 recreated. 702 */ 703 #undef __FUNC__ 704 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 705 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 706 { 707 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 708 Mat A = baij->A, B=baij->B; 709 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 710 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 711 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 712 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 713 int *HT,key; 714 Scalar **HD; 715 double tmp; 716 #if defined(PETSC_USE_BOPT_g) 717 int ct=0,max=0; 718 #endif 719 720 PetscFunctionBegin; 721 baij->ht_size=(int)(factor*nz); 722 size = baij->ht_size; 723 724 if (baij->ht) { 725 PetscFunctionReturn(0); 726 } 727 728 /* Allocate Memory for Hash Table */ 729 baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1);CHKPTRQ(baij->hd); 730 baij->ht = (int*)(baij->hd + size); 731 HD = baij->hd; 732 HT = baij->ht; 733 734 735 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr); 736 737 738 /* Loop Over A */ 739 for ( i=0; i<a->mbs; i++ ) { 740 for ( j=ai[i]; j<ai[i+1]; j++ ) { 741 row = i+rstart; 742 col = aj[j]+cstart; 743 744 key = row*Nbs + col + 1; 745 h1 = HASH(size,key,tmp); 746 for ( k=0; k<size; k++ ){ 747 if (HT[(h1+k)%size] == 0.0) { 748 HT[(h1+k)%size] = key; 749 HD[(h1+k)%size] = a->a + j*bs2; 750 break; 751 #if defined(PETSC_USE_BOPT_g) 752 } else { 753 ct++; 754 #endif 755 } 756 } 757 #if defined(PETSC_USE_BOPT_g) 758 if (k> max) max = k; 759 #endif 760 } 761 } 762 /* Loop Over B */ 763 for ( i=0; i<b->mbs; i++ ) { 764 for ( j=bi[i]; j<bi[i+1]; j++ ) { 765 row = i+rstart; 766 col = garray[bj[j]]; 767 key = row*Nbs + col + 1; 768 h1 = HASH(size,key,tmp); 769 for ( k=0; k<size; k++ ){ 770 if (HT[(h1+k)%size] == 0.0) { 771 HT[(h1+k)%size] = key; 772 HD[(h1+k)%size] = b->a + j*bs2; 773 break; 774 #if defined(PETSC_USE_BOPT_g) 775 } else { 776 ct++; 777 #endif 778 } 779 } 780 #if defined(PETSC_USE_BOPT_g) 781 if (k> max) max = k; 782 #endif 783 } 784 } 785 786 /* Print Summary */ 787 #if defined(PETSC_USE_BOPT_g) 788 for ( i=0,j=0; i<size; i++) 789 if (HT[i]) {j++;} 790 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n", 791 (j== 0)? 0.0:((double)(ct+j))/j,max); 792 #endif 793 PetscFunctionReturn(0); 794 } 795 796 #undef __FUNC__ 797 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 798 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 799 { 800 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 801 int ierr,nstash,reallocs; 802 InsertMode addv; 803 804 PetscFunctionBegin; 805 if (baij->donotstash) { 806 PetscFunctionReturn(0); 807 } 808 809 /* make sure all processors are either in INSERTMODE or ADDMODE */ 810 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 811 if (addv == (ADD_VALUES|INSERT_VALUES)) { 812 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 813 } 814 mat->insertmode = addv; /* in case this processor had no cache */ 815 816 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 817 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 818 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 819 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 820 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 821 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 822 PetscFunctionReturn(0); 823 } 824 825 #undef __FUNC__ 826 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 827 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 828 { 829 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data; 830 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 831 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 832 int *row,*col,other_disassembled,r1,r2,r3; 833 Scalar *val; 834 InsertMode addv = mat->insertmode; 835 836 PetscFunctionBegin; 837 if (!baij->donotstash) { 838 while (1) { 839 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 840 if (!flg) break; 841 842 for ( i=0; i<n; ) { 843 /* Now identify the consecutive vals belonging to the same row */ 844 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 845 if (j < n) ncols = j-i; 846 else ncols = n-i; 847 /* Now assemble all these values with a single function call */ 848 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 849 i = j; 850 } 851 } 852 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 853 /* Now process the block-stash. Since the values are stashed column-oriented, 854 set the roworiented flag to column oriented, and after MatSetValues() 855 restore the original flags */ 856 r1 = baij->roworiented; 857 r2 = a->roworiented; 858 r3 = b->roworiented; 859 baij->roworiented = 0; 860 a->roworiented = 0; 861 b->roworiented = 0; 862 while (1) { 863 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 864 if (!flg) break; 865 866 for ( i=0; i<n; ) { 867 /* Now identify the consecutive vals belonging to the same row */ 868 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 869 if (j < n) ncols = j-i; 870 else ncols = n-i; 871 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 872 i = j; 873 } 874 } 875 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 876 baij->roworiented = r1; 877 a->roworiented = r2; 878 b->roworiented = r3; 879 } 880 881 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 882 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 883 884 /* determine if any processor has disassembled, if so we must 885 also disassemble ourselfs, in order that we may reassemble. */ 886 /* 887 if nonzero structure of submatrix B cannot change then we know that 888 no processor disassembled thus we can skip this stuff 889 */ 890 if (!((Mat_SeqBAIJ*) baij->B->data)->nonew) { 891 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 892 if (mat->was_assembled && !other_disassembled) { 893 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 894 } 895 } 896 897 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 898 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 899 } 900 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 901 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 902 903 #if defined(PETSC_USE_BOPT_g) 904 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 905 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 906 ((double)baij->ht_total_ct)/baij->ht_insert_ct); 907 baij->ht_total_ct = 0; 908 baij->ht_insert_ct = 0; 909 } 910 #endif 911 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 912 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 913 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 914 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 915 } 916 917 if (baij->rowvalues) { 918 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 919 baij->rowvalues = 0; 920 } 921 PetscFunctionReturn(0); 922 } 923 924 /* 925 #undef __FUNC__ 926 #define __FUNC__ "MatView_MPIBAIJ_Binary" 927 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 928 { 929 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 930 int ierr; 931 932 PetscFunctionBegin; 933 if (baij->size == 1) { 934 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 935 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 936 PetscFunctionReturn(0); 937 } 938 */ 939 940 #undef __FUNC__ 941 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 942 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 943 { 944 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 945 int ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank; 946 PetscTruth isascii,isdraw; 947 948 PetscFunctionBegin; 949 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 950 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 951 if (isascii) { 952 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 953 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 954 MatInfo info; 955 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 956 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 957 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 958 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 959 baij->bs,(int)info.memory);CHKERRQ(ierr); 960 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 961 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 962 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 963 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 964 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 965 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 968 ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } 971 } 972 973 if (isdraw) { 974 Draw draw; 975 PetscTruth isnull; 976 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 977 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 978 } 979 980 if (size == 1) { 981 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 982 } else { 983 /* assemble the entire matrix onto first processor. */ 984 Mat A; 985 Mat_SeqBAIJ *Aloc; 986 int M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals; 987 int mbs = baij->mbs; 988 Scalar *a; 989 990 if (!rank) { 991 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 992 } else { 993 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 994 } 995 PLogObjectParent(mat,A); 996 997 /* copy over the A part */ 998 Aloc = (Mat_SeqBAIJ*) baij->A->data; 999 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1000 rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1001 1002 for ( i=0; i<mbs; i++ ) { 1003 rvals[0] = bs*(baij->rstart + i); 1004 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1005 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1006 col = (baij->cstart+aj[j])*bs; 1007 for (k=0; k<bs; k++ ) { 1008 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1009 col++; a += bs; 1010 } 1011 } 1012 } 1013 /* copy over the B part */ 1014 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1015 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1016 for ( i=0; i<mbs; i++ ) { 1017 rvals[0] = bs*(baij->rstart + i); 1018 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1019 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1020 col = baij->garray[aj[j]]*bs; 1021 for (k=0; k<bs; k++ ) { 1022 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1023 col++; a += bs; 1024 } 1025 } 1026 } 1027 ierr = PetscFree(rvals);CHKERRQ(ierr); 1028 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1029 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1030 /* 1031 Everyone has to call to draw the matrix since the graphics waits are 1032 synchronized across all processors that share the Draw object 1033 */ 1034 if (!rank) { 1035 Viewer sviewer; 1036 ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1037 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1038 ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1039 } 1040 ierr = MatDestroy(A);CHKERRQ(ierr); 1041 } 1042 PetscFunctionReturn(0); 1043 } 1044 1045 1046 1047 #undef __FUNC__ 1048 #define __FUNC__ "MatView_MPIBAIJ" 1049 int MatView_MPIBAIJ(Mat mat,Viewer viewer) 1050 { 1051 int ierr; 1052 PetscTruth isascii,isdraw,issocket,isbinary; 1053 1054 PetscFunctionBegin; 1055 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 1056 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 1057 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 1058 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 1059 if (isascii || isdraw || issocket || isbinary) { 1060 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1061 } else { 1062 SETERRQ1(1,1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1063 } 1064 PetscFunctionReturn(0); 1065 } 1066 1067 #undef __FUNC__ 1068 #define __FUNC__ "MatDestroy_MPIBAIJ" 1069 int MatDestroy_MPIBAIJ(Mat mat) 1070 { 1071 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1072 int ierr; 1073 1074 PetscFunctionBegin; 1075 1076 if (mat->mapping) { 1077 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 1078 } 1079 if (mat->bmapping) { 1080 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 1081 } 1082 if (mat->rmap) { 1083 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 1084 } 1085 if (mat->cmap) { 1086 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 1087 } 1088 #if defined(PETSC_USE_LOG) 1089 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N); 1090 #endif 1091 1092 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1093 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1094 1095 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1096 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1097 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1098 #if defined (PETSC_USE_CTABLE) 1099 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1100 #else 1101 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1102 #endif 1103 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1104 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1105 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1106 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1107 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1108 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1109 ierr = PetscFree(baij);CHKERRQ(ierr); 1110 PLogObjectDestroy(mat); 1111 PetscHeaderDestroy(mat); 1112 PetscFunctionReturn(0); 1113 } 1114 1115 #undef __FUNC__ 1116 #define __FUNC__ "MatMult_MPIBAIJ" 1117 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1118 { 1119 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1120 int ierr, nt; 1121 1122 PetscFunctionBegin; 1123 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1124 if (nt != a->n) { 1125 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1126 } 1127 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1128 if (nt != a->m) { 1129 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1130 } 1131 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1132 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1133 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1134 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1135 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1136 PetscFunctionReturn(0); 1137 } 1138 1139 #undef __FUNC__ 1140 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1141 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1142 { 1143 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1144 int ierr; 1145 1146 PetscFunctionBegin; 1147 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1148 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1149 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1150 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1151 PetscFunctionReturn(0); 1152 } 1153 1154 #undef __FUNC__ 1155 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1156 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1157 { 1158 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1159 int ierr; 1160 1161 PetscFunctionBegin; 1162 /* do nondiagonal part */ 1163 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 1164 /* send it on its way */ 1165 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1166 /* do local part */ 1167 ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr); 1168 /* receive remote parts: note this assumes the values are not actually */ 1169 /* inserted in yy until the next line, which is true for my implementation*/ 1170 /* but is not perhaps always true. */ 1171 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1172 PetscFunctionReturn(0); 1173 } 1174 1175 #undef __FUNC__ 1176 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1177 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1178 { 1179 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1180 int ierr; 1181 1182 PetscFunctionBegin; 1183 /* do nondiagonal part */ 1184 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 1185 /* send it on its way */ 1186 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1187 /* do local part */ 1188 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1189 /* receive remote parts: note this assumes the values are not actually */ 1190 /* inserted in yy until the next line, which is true for my implementation*/ 1191 /* but is not perhaps always true. */ 1192 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1193 PetscFunctionReturn(0); 1194 } 1195 1196 /* 1197 This only works correctly for square matrices where the subblock A->A is the 1198 diagonal block 1199 */ 1200 #undef __FUNC__ 1201 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1202 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1203 { 1204 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1205 int ierr; 1206 1207 PetscFunctionBegin; 1208 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1209 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1210 PetscFunctionReturn(0); 1211 } 1212 1213 #undef __FUNC__ 1214 #define __FUNC__ "MatScale_MPIBAIJ" 1215 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1216 { 1217 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1218 int ierr; 1219 1220 PetscFunctionBegin; 1221 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1222 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1223 PetscFunctionReturn(0); 1224 } 1225 1226 #undef __FUNC__ 1227 #define __FUNC__ "MatGetSize_MPIBAIJ" 1228 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1229 { 1230 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1231 1232 PetscFunctionBegin; 1233 if (m) *m = mat->M; 1234 if (n) *n = mat->N; 1235 PetscFunctionReturn(0); 1236 } 1237 1238 #undef __FUNC__ 1239 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1240 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1241 { 1242 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1243 1244 PetscFunctionBegin; 1245 *m = mat->m; *n = mat->n; 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNC__ 1250 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1251 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1252 { 1253 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1254 1255 PetscFunctionBegin; 1256 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1257 PetscFunctionReturn(0); 1258 } 1259 1260 #undef __FUNC__ 1261 #define __FUNC__ "MatGetRow_MPIBAIJ" 1262 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1263 { 1264 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1265 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1266 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1267 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1268 int *cmap, *idx_p,cstart = mat->cstart; 1269 1270 PetscFunctionBegin; 1271 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1272 mat->getrowactive = PETSC_TRUE; 1273 1274 if (!mat->rowvalues && (idx || v)) { 1275 /* 1276 allocate enough space to hold information from the longest row. 1277 */ 1278 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1279 int max = 1,mbs = mat->mbs,tmp; 1280 for ( i=0; i<mbs; i++ ) { 1281 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1282 if (max < tmp) { max = tmp; } 1283 } 1284 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1285 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1286 } 1287 1288 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1289 lrow = row - brstart; 1290 1291 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1292 if (!v) {pvA = 0; pvB = 0;} 1293 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1294 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1295 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1296 nztot = nzA + nzB; 1297 1298 cmap = mat->garray; 1299 if (v || idx) { 1300 if (nztot) { 1301 /* Sort by increasing column numbers, assuming A and B already sorted */ 1302 int imark = -1; 1303 if (v) { 1304 *v = v_p = mat->rowvalues; 1305 for ( i=0; i<nzB; i++ ) { 1306 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1307 else break; 1308 } 1309 imark = i; 1310 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1311 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1312 } 1313 if (idx) { 1314 *idx = idx_p = mat->rowindices; 1315 if (imark > -1) { 1316 for ( i=0; i<imark; i++ ) { 1317 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1318 } 1319 } else { 1320 for ( i=0; i<nzB; i++ ) { 1321 if (cmap[cworkB[i]/bs] < cstart) 1322 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1323 else break; 1324 } 1325 imark = i; 1326 } 1327 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1328 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1329 } 1330 } else { 1331 if (idx) *idx = 0; 1332 if (v) *v = 0; 1333 } 1334 } 1335 *nz = nztot; 1336 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1337 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1338 PetscFunctionReturn(0); 1339 } 1340 1341 #undef __FUNC__ 1342 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1343 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1344 { 1345 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1346 1347 PetscFunctionBegin; 1348 if (baij->getrowactive == PETSC_FALSE) { 1349 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1350 } 1351 baij->getrowactive = PETSC_FALSE; 1352 PetscFunctionReturn(0); 1353 } 1354 1355 #undef __FUNC__ 1356 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1357 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1358 { 1359 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1360 1361 PetscFunctionBegin; 1362 *bs = baij->bs; 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNC__ 1367 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1368 int MatZeroEntries_MPIBAIJ(Mat A) 1369 { 1370 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1371 int ierr; 1372 1373 PetscFunctionBegin; 1374 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1375 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1376 PetscFunctionReturn(0); 1377 } 1378 1379 #undef __FUNC__ 1380 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1381 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1382 { 1383 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1384 Mat A = a->A, B = a->B; 1385 int ierr; 1386 double isend[5], irecv[5]; 1387 1388 PetscFunctionBegin; 1389 info->block_size = (double)a->bs; 1390 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1391 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1392 isend[3] = info->memory; isend[4] = info->mallocs; 1393 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1394 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1395 isend[3] += info->memory; isend[4] += info->mallocs; 1396 if (flag == MAT_LOCAL) { 1397 info->nz_used = isend[0]; 1398 info->nz_allocated = isend[1]; 1399 info->nz_unneeded = isend[2]; 1400 info->memory = isend[3]; 1401 info->mallocs = isend[4]; 1402 } else if (flag == MAT_GLOBAL_MAX) { 1403 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1404 info->nz_used = irecv[0]; 1405 info->nz_allocated = irecv[1]; 1406 info->nz_unneeded = irecv[2]; 1407 info->memory = irecv[3]; 1408 info->mallocs = irecv[4]; 1409 } else if (flag == MAT_GLOBAL_SUM) { 1410 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1411 info->nz_used = irecv[0]; 1412 info->nz_allocated = irecv[1]; 1413 info->nz_unneeded = irecv[2]; 1414 info->memory = irecv[3]; 1415 info->mallocs = irecv[4]; 1416 } else { 1417 SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 1418 } 1419 info->rows_global = (double)a->M; 1420 info->columns_global = (double)a->N; 1421 info->rows_local = (double)a->m; 1422 info->columns_local = (double)a->N; 1423 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1424 info->fill_ratio_needed = 0; 1425 info->factor_mallocs = 0; 1426 PetscFunctionReturn(0); 1427 } 1428 1429 #undef __FUNC__ 1430 #define __FUNC__ "MatSetOption_MPIBAIJ" 1431 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1432 { 1433 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1434 int ierr; 1435 1436 PetscFunctionBegin; 1437 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1438 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1439 op == MAT_COLUMNS_UNSORTED || 1440 op == MAT_COLUMNS_SORTED || 1441 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1442 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1443 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1444 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1445 } else if (op == MAT_ROW_ORIENTED) { 1446 a->roworiented = 1; 1447 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1448 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1449 } else if (op == MAT_ROWS_SORTED || 1450 op == MAT_ROWS_UNSORTED || 1451 op == MAT_SYMMETRIC || 1452 op == MAT_STRUCTURALLY_SYMMETRIC || 1453 op == MAT_YES_NEW_DIAGONALS || 1454 op == MAT_USE_HASH_TABLE) { 1455 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1456 } else if (op == MAT_COLUMN_ORIENTED) { 1457 a->roworiented = 0; 1458 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1459 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1460 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1461 a->donotstash = 1; 1462 } else if (op == MAT_NO_NEW_DIAGONALS) { 1463 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1464 } else if (op == MAT_USE_HASH_TABLE) { 1465 a->ht_flag = 1; 1466 } else { 1467 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1468 } 1469 PetscFunctionReturn(0); 1470 } 1471 1472 #undef __FUNC__ 1473 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1474 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1475 { 1476 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1477 Mat_SeqBAIJ *Aloc; 1478 Mat B; 1479 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1480 int bs=baij->bs,mbs=baij->mbs; 1481 Scalar *a; 1482 1483 PetscFunctionBegin; 1484 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1485 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1486 CHKERRQ(ierr); 1487 1488 /* copy over the A part */ 1489 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1490 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1491 rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1492 1493 for ( i=0; i<mbs; i++ ) { 1494 rvals[0] = bs*(baij->rstart + i); 1495 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1496 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1497 col = (baij->cstart+aj[j])*bs; 1498 for (k=0; k<bs; k++ ) { 1499 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1500 col++; a += bs; 1501 } 1502 } 1503 } 1504 /* copy over the B part */ 1505 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1506 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1507 for ( i=0; i<mbs; i++ ) { 1508 rvals[0] = bs*(baij->rstart + i); 1509 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1510 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1511 col = baij->garray[aj[j]]*bs; 1512 for (k=0; k<bs; k++ ) { 1513 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1514 col++; a += bs; 1515 } 1516 } 1517 } 1518 ierr = PetscFree(rvals);CHKERRQ(ierr); 1519 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1520 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1521 1522 if (matout != PETSC_NULL) { 1523 *matout = B; 1524 } else { 1525 PetscOps *Abops; 1526 MatOps Aops; 1527 1528 /* This isn't really an in-place transpose .... but free data structures from baij */ 1529 ierr = PetscFree(baij->rowners); CHKERRQ(ierr); 1530 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1531 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1532 #if defined (PETSC_USE_CTABLE) 1533 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1534 #else 1535 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1536 #endif 1537 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1538 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1539 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1540 ierr = PetscFree(baij);CHKERRQ(ierr); 1541 1542 /* 1543 This is horrible, horrible code. We need to keep the 1544 A pointers for the bops and ops but copy everything 1545 else from C. 1546 */ 1547 Abops = A->bops; 1548 Aops = A->ops; 1549 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1550 A->bops = Abops; 1551 A->ops = Aops; 1552 1553 PetscHeaderDestroy(B); 1554 } 1555 PetscFunctionReturn(0); 1556 } 1557 1558 #undef __FUNC__ 1559 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1560 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1561 { 1562 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1563 Mat a = baij->A, b = baij->B; 1564 int ierr,s1,s2,s3; 1565 1566 PetscFunctionBegin; 1567 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1568 if (rr) { 1569 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1570 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1571 /* Overlap communication with computation. */ 1572 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1573 } 1574 if (ll) { 1575 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1576 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1577 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1578 } 1579 /* scale the diagonal block */ 1580 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1581 1582 if (rr) { 1583 /* Do a scatter end and then right scale the off-diagonal block */ 1584 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1585 ierr = (*b->ops->diagonalscale)(b,0,baij->lvec);CHKERRQ(ierr); 1586 } 1587 1588 PetscFunctionReturn(0); 1589 } 1590 1591 #undef __FUNC__ 1592 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1593 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1594 { 1595 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1596 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1597 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1598 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1599 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1600 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1601 MPI_Comm comm = A->comm; 1602 MPI_Request *send_waits,*recv_waits; 1603 MPI_Status recv_status,*send_status; 1604 IS istmp; 1605 1606 PetscFunctionBegin; 1607 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 1608 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1609 1610 /* first count number of contributors to each processor */ 1611 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 1612 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1613 procs = nprocs + size; 1614 owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 1615 for ( i=0; i<N; i++ ) { 1616 idx = rows[i]; 1617 found = 0; 1618 for ( j=0; j<size; j++ ) { 1619 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1620 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1621 } 1622 } 1623 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1624 } 1625 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1626 1627 /* inform other processors of number of messages and max length*/ 1628 work = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(work); 1629 ierr = MPI_Allreduce( nprocs, work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 1630 nmax = work[rank]; 1631 nrecvs = work[size+rank]; 1632 ierr = PetscFree(work);CHKERRQ(ierr); 1633 1634 /* post receives: */ 1635 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 1636 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1637 for ( i=0; i<nrecvs; i++ ) { 1638 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1639 } 1640 1641 /* do sends: 1642 1) starts[i] gives the starting index in svalues for stuff going to 1643 the ith processor 1644 */ 1645 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 1646 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1647 starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 1648 starts[0] = 0; 1649 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1650 for ( i=0; i<N; i++ ) { 1651 svalues[starts[owner[i]]++] = rows[i]; 1652 } 1653 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1654 1655 starts[0] = 0; 1656 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1657 count = 0; 1658 for ( i=0; i<size; i++ ) { 1659 if (procs[i]) { 1660 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1661 } 1662 } 1663 ierr = PetscFree(starts);CHKERRQ(ierr); 1664 1665 base = owners[rank]*bs; 1666 1667 /* wait on receives */ 1668 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 1669 source = lens + nrecvs; 1670 count = nrecvs; slen = 0; 1671 while (count) { 1672 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1673 /* unpack receives into our local space */ 1674 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1675 source[imdex] = recv_status.MPI_SOURCE; 1676 lens[imdex] = n; 1677 slen += n; 1678 count--; 1679 } 1680 ierr = PetscFree(recv_waits); CHKERRQ(ierr); 1681 1682 /* move the data into the send scatter */ 1683 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 1684 count = 0; 1685 for ( i=0; i<nrecvs; i++ ) { 1686 values = rvalues + i*nmax; 1687 for ( j=0; j<lens[i]; j++ ) { 1688 lrows[count++] = values[j] - base; 1689 } 1690 } 1691 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1692 ierr = PetscFree(lens);CHKERRQ(ierr); 1693 ierr = PetscFree(owner);CHKERRQ(ierr); 1694 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1695 1696 /* actually zap the local rows */ 1697 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1698 PLogObjectParent(A,istmp); 1699 1700 /* 1701 Zero the required rows. If the "diagonal block" of the matrix 1702 is square and the user wishes to set the diagonal we use seperate 1703 code so that MatSetValues() is not called for each diagonal allocating 1704 new memory, thus calling lots of mallocs and slowing things down. 1705 1706 Contributed by: Mathew Knepley 1707 */ 1708 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1709 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 1710 if (diag && (l->A->M == l->A->N)) { 1711 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 1712 } else if (diag) { 1713 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 1714 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1715 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1716 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1717 } 1718 for ( i = 0; i < slen; i++ ) { 1719 row = lrows[i] + rstart_bs; 1720 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1721 } 1722 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1723 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1724 } else { 1725 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 1726 } 1727 1728 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1729 ierr = PetscFree(lrows);CHKERRQ(ierr); 1730 1731 /* wait on sends */ 1732 if (nsends) { 1733 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1734 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1735 ierr = PetscFree(send_status);CHKERRQ(ierr); 1736 } 1737 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1738 ierr = PetscFree(svalues);CHKERRQ(ierr); 1739 1740 PetscFunctionReturn(0); 1741 } 1742 1743 #undef __FUNC__ 1744 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1745 int MatPrintHelp_MPIBAIJ(Mat A) 1746 { 1747 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1748 MPI_Comm comm = A->comm; 1749 static int called = 0; 1750 int ierr; 1751 1752 PetscFunctionBegin; 1753 if (!a->rank) { 1754 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1755 } 1756 if (called) {PetscFunctionReturn(0);} else called = 1; 1757 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1758 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1759 PetscFunctionReturn(0); 1760 } 1761 1762 #undef __FUNC__ 1763 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1764 int MatSetUnfactored_MPIBAIJ(Mat A) 1765 { 1766 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1767 int ierr; 1768 1769 PetscFunctionBegin; 1770 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1771 PetscFunctionReturn(0); 1772 } 1773 1774 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1775 1776 #undef __FUNC__ 1777 #define __FUNC__ "MatEqual_MPIBAIJ" 1778 int MatEqual_MPIBAIJ(Mat A, Mat B, PetscTruth *flag) 1779 { 1780 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *) B->data,*matA = (Mat_MPIBAIJ *) A->data; 1781 Mat a, b, c, d; 1782 PetscTruth flg; 1783 int ierr; 1784 1785 PetscFunctionBegin; 1786 if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1787 a = matA->A; b = matA->B; 1788 c = matB->A; d = matB->B; 1789 1790 ierr = MatEqual(a, c, &flg);CHKERRQ(ierr); 1791 if (flg == PETSC_TRUE) { 1792 ierr = MatEqual(b, d, &flg);CHKERRQ(ierr); 1793 } 1794 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1795 PetscFunctionReturn(0); 1796 } 1797 1798 /* -------------------------------------------------------------------*/ 1799 static struct _MatOps MatOps_Values = { 1800 MatSetValues_MPIBAIJ, 1801 MatGetRow_MPIBAIJ, 1802 MatRestoreRow_MPIBAIJ, 1803 MatMult_MPIBAIJ, 1804 MatMultAdd_MPIBAIJ, 1805 MatMultTrans_MPIBAIJ, 1806 MatMultTransAdd_MPIBAIJ, 1807 0, 1808 0, 1809 0, 1810 0, 1811 0, 1812 0, 1813 0, 1814 MatTranspose_MPIBAIJ, 1815 MatGetInfo_MPIBAIJ, 1816 MatEqual_MPIBAIJ, 1817 MatGetDiagonal_MPIBAIJ, 1818 MatDiagonalScale_MPIBAIJ, 1819 MatNorm_MPIBAIJ, 1820 MatAssemblyBegin_MPIBAIJ, 1821 MatAssemblyEnd_MPIBAIJ, 1822 0, 1823 MatSetOption_MPIBAIJ, 1824 MatZeroEntries_MPIBAIJ, 1825 MatZeroRows_MPIBAIJ, 1826 0, 1827 0, 1828 0, 1829 0, 1830 MatGetSize_MPIBAIJ, 1831 MatGetLocalSize_MPIBAIJ, 1832 MatGetOwnershipRange_MPIBAIJ, 1833 0, 1834 0, 1835 0, 1836 0, 1837 MatDuplicate_MPIBAIJ, 1838 0, 1839 0, 1840 0, 1841 0, 1842 0, 1843 MatGetSubMatrices_MPIBAIJ, 1844 MatIncreaseOverlap_MPIBAIJ, 1845 MatGetValues_MPIBAIJ, 1846 0, 1847 MatPrintHelp_MPIBAIJ, 1848 MatScale_MPIBAIJ, 1849 0, 1850 0, 1851 0, 1852 MatGetBlockSize_MPIBAIJ, 1853 0, 1854 0, 1855 0, 1856 0, 1857 0, 1858 0, 1859 MatSetUnfactored_MPIBAIJ, 1860 0, 1861 MatSetValuesBlocked_MPIBAIJ, 1862 0, 1863 0, 1864 0, 1865 MatGetMaps_Petsc}; 1866 1867 1868 EXTERN_C_BEGIN 1869 #undef __FUNC__ 1870 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ" 1871 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1872 { 1873 PetscFunctionBegin; 1874 *a = ((Mat_MPIBAIJ *)A->data)->A; 1875 *iscopy = PETSC_FALSE; 1876 PetscFunctionReturn(0); 1877 } 1878 EXTERN_C_END 1879 1880 #undef __FUNC__ 1881 #define __FUNC__ "MatCreateMPIBAIJ" 1882 /*@C 1883 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1884 (block compressed row). For good matrix assembly performance 1885 the user should preallocate the matrix storage by setting the parameters 1886 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1887 performance can be increased by more than a factor of 50. 1888 1889 Collective on MPI_Comm 1890 1891 Input Parameters: 1892 + comm - MPI communicator 1893 . bs - size of blockk 1894 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1895 This value should be the same as the local size used in creating the 1896 y vector for the matrix-vector product y = Ax. 1897 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1898 This value should be the same as the local size used in creating the 1899 x vector for the matrix-vector product y = Ax. 1900 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1901 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1902 . d_nz - number of block nonzeros per block row in diagonal portion of local 1903 submatrix (same for all local rows) 1904 . d_nnz - array containing the number of block nonzeros in the various block rows 1905 of the in diagonal portion of the local (possibly different for each block 1906 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1907 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1908 submatrix (same for all local rows). 1909 - o_nnz - array containing the number of nonzeros in the various block rows of the 1910 off-diagonal portion of the local submatrix (possibly different for 1911 each block row) or PETSC_NULL. 1912 1913 Output Parameter: 1914 . A - the matrix 1915 1916 Options Database Keys: 1917 . -mat_no_unroll - uses code that does not unroll the loops in the 1918 block calculations (much slower) 1919 . -mat_block_size - size of the blocks to use 1920 . -mat_mpi - use the parallel matrix data structures even on one processor 1921 (defaults to using SeqBAIJ format on one processor) 1922 1923 Notes: 1924 The user MUST specify either the local or global matrix dimensions 1925 (possibly both). 1926 1927 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1928 than it must be used on all processors that share the object for that argument. 1929 1930 Storage Information: 1931 For a square global matrix we define each processor's diagonal portion 1932 to be its local rows and the corresponding columns (a square submatrix); 1933 each processor's off-diagonal portion encompasses the remainder of the 1934 local matrix (a rectangular submatrix). 1935 1936 The user can specify preallocated storage for the diagonal part of 1937 the local submatrix with either d_nz or d_nnz (not both). Set 1938 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1939 memory allocation. Likewise, specify preallocated storage for the 1940 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1941 1942 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1943 the figure below we depict these three local rows and all columns (0-11). 1944 1945 .vb 1946 0 1 2 3 4 5 6 7 8 9 10 11 1947 ------------------- 1948 row 3 | o o o d d d o o o o o o 1949 row 4 | o o o d d d o o o o o o 1950 row 5 | o o o d d d o o o o o o 1951 ------------------- 1952 .ve 1953 1954 Thus, any entries in the d locations are stored in the d (diagonal) 1955 submatrix, and any entries in the o locations are stored in the 1956 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1957 stored simply in the MATSEQBAIJ format for compressed row storage. 1958 1959 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1960 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1961 In general, for PDE problems in which most nonzeros are near the diagonal, 1962 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1963 or you will get TERRIBLE performance; see the users' manual chapter on 1964 matrices. 1965 1966 Level: intermediate 1967 1968 .keywords: matrix, block, aij, compressed row, sparse, parallel 1969 1970 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ() 1971 @*/ 1972 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1973 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1974 { 1975 Mat B; 1976 Mat_MPIBAIJ *b; 1977 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg; 1978 int flag1 = 0,flag2 = 0; 1979 1980 PetscFunctionBegin; 1981 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1982 1983 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1984 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -1: value %d",d_nz); 1985 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -1: value %d",o_nz); 1986 if (d_nnz) { 1987 for (i=0; i<m/bs; i++) { 1988 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1989 } 1990 } 1991 if (o_nnz) { 1992 for (i=0; i<m/bs; i++) { 1993 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1994 } 1995 } 1996 1997 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1998 ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr); 1999 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 2000 if (!flag1 && !flag2 && size == 1) { 2001 if (M == PETSC_DECIDE) M = m; 2002 if (N == PETSC_DECIDE) N = n; 2003 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 2004 PetscFunctionReturn(0); 2005 } 2006 2007 *A = 0; 2008 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 2009 PLogObjectCreate(B); 2010 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b); 2011 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2012 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2013 2014 B->ops->destroy = MatDestroy_MPIBAIJ; 2015 B->ops->view = MatView_MPIBAIJ; 2016 B->mapping = 0; 2017 B->factor = 0; 2018 B->assembled = PETSC_FALSE; 2019 2020 B->insertmode = NOT_SET_VALUES; 2021 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 2022 ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr); 2023 2024 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 2025 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 2026 } 2027 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 2028 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 2029 } 2030 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 2031 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 2032 } 2033 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 2034 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 2035 2036 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 2037 work[0] = m; work[1] = n; 2038 mbs = m/bs; nbs = n/bs; 2039 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 2040 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 2041 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 2042 } 2043 if (m == PETSC_DECIDE) { 2044 Mbs = M/bs; 2045 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 2046 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 2047 m = mbs*bs; 2048 } 2049 if (n == PETSC_DECIDE) { 2050 Nbs = N/bs; 2051 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 2052 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 2053 n = nbs*bs; 2054 } 2055 if (mbs*bs != m || nbs*bs != n) { 2056 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 2057 } 2058 2059 b->m = m; B->m = m; 2060 b->n = n; B->n = n; 2061 b->N = N; B->N = N; 2062 b->M = M; B->M = M; 2063 b->bs = bs; 2064 b->bs2 = bs*bs; 2065 b->mbs = mbs; 2066 b->nbs = nbs; 2067 b->Mbs = Mbs; 2068 b->Nbs = Nbs; 2069 2070 /* the information in the maps duplicates the information computed below, eventually 2071 we should remove the duplicate information that is not contained in the maps */ 2072 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 2073 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 2074 2075 /* build local table of row and column ownerships */ 2076 b->rowners = (int *) PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 2077 PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2078 b->cowners = b->rowners + b->size + 2; 2079 b->rowners_bs = b->cowners + b->size + 2; 2080 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2081 b->rowners[0] = 0; 2082 for ( i=2; i<=b->size; i++ ) { 2083 b->rowners[i] += b->rowners[i-1]; 2084 } 2085 for ( i=0; i<=b->size; i++ ) { 2086 b->rowners_bs[i] = b->rowners[i]*bs; 2087 } 2088 b->rstart = b->rowners[b->rank]; 2089 b->rend = b->rowners[b->rank+1]; 2090 b->rstart_bs = b->rstart * bs; 2091 b->rend_bs = b->rend * bs; 2092 2093 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2094 b->cowners[0] = 0; 2095 for ( i=2; i<=b->size; i++ ) { 2096 b->cowners[i] += b->cowners[i-1]; 2097 } 2098 b->cstart = b->cowners[b->rank]; 2099 b->cend = b->cowners[b->rank+1]; 2100 b->cstart_bs = b->cstart * bs; 2101 b->cend_bs = b->cend * bs; 2102 2103 2104 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2105 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2106 PLogObjectParent(B,b->A); 2107 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2108 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2109 PLogObjectParent(B,b->B); 2110 2111 /* build cache for off array entries formed */ 2112 ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr); 2113 ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr); 2114 b->donotstash = 0; 2115 b->colmap = 0; 2116 b->garray = 0; 2117 b->roworiented = 1; 2118 2119 /* stuff used in block assembly */ 2120 b->barray = 0; 2121 2122 /* stuff used for matrix vector multiply */ 2123 b->lvec = 0; 2124 b->Mvctx = 0; 2125 2126 /* stuff for MatGetRow() */ 2127 b->rowindices = 0; 2128 b->rowvalues = 0; 2129 b->getrowactive = PETSC_FALSE; 2130 2131 /* hash table stuff */ 2132 b->ht = 0; 2133 b->hd = 0; 2134 b->ht_size = 0; 2135 b->ht_flag = 0; 2136 b->ht_fact = 0; 2137 b->ht_total_ct = 0; 2138 b->ht_insert_ct = 0; 2139 2140 *A = B; 2141 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2142 if (flg) { 2143 double fact = 1.39; 2144 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2145 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg);CHKERRQ(ierr); 2146 if (fact <= 1.0) fact = 1.39; 2147 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2148 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2149 } 2150 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 2151 "MatStoreValues_MPIBAIJ", 2152 (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2153 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 2154 "MatRetrieveValues_MPIBAIJ", 2155 (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2156 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 2157 "MatGetDiagonalBlock_MPIBAIJ", 2158 (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2159 PetscFunctionReturn(0); 2160 } 2161 2162 #undef __FUNC__ 2163 #define __FUNC__ "MatDuplicate_MPIBAIJ" 2164 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2165 { 2166 Mat mat; 2167 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 2168 int ierr, len=0, flg; 2169 2170 PetscFunctionBegin; 2171 *newmat = 0; 2172 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 2173 PLogObjectCreate(mat); 2174 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a); 2175 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2176 mat->ops->destroy = MatDestroy_MPIBAIJ; 2177 mat->ops->view = MatView_MPIBAIJ; 2178 mat->factor = matin->factor; 2179 mat->assembled = PETSC_TRUE; 2180 mat->insertmode = NOT_SET_VALUES; 2181 2182 a->m = mat->m = oldmat->m; 2183 a->n = mat->n = oldmat->n; 2184 a->M = mat->M = oldmat->M; 2185 a->N = mat->N = oldmat->N; 2186 2187 a->bs = oldmat->bs; 2188 a->bs2 = oldmat->bs2; 2189 a->mbs = oldmat->mbs; 2190 a->nbs = oldmat->nbs; 2191 a->Mbs = oldmat->Mbs; 2192 a->Nbs = oldmat->Nbs; 2193 2194 a->rstart = oldmat->rstart; 2195 a->rend = oldmat->rend; 2196 a->cstart = oldmat->cstart; 2197 a->cend = oldmat->cend; 2198 a->size = oldmat->size; 2199 a->rank = oldmat->rank; 2200 a->donotstash = oldmat->donotstash; 2201 a->roworiented = oldmat->roworiented; 2202 a->rowindices = 0; 2203 a->rowvalues = 0; 2204 a->getrowactive = PETSC_FALSE; 2205 a->barray = 0; 2206 a->rstart_bs = oldmat->rstart_bs; 2207 a->rend_bs = oldmat->rend_bs; 2208 a->cstart_bs = oldmat->cstart_bs; 2209 a->cend_bs = oldmat->cend_bs; 2210 2211 /* hash table stuff */ 2212 a->ht = 0; 2213 a->hd = 0; 2214 a->ht_size = 0; 2215 a->ht_flag = oldmat->ht_flag; 2216 a->ht_fact = oldmat->ht_fact; 2217 a->ht_total_ct = 0; 2218 a->ht_insert_ct = 0; 2219 2220 2221 a->rowners = (int *) PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 2222 PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2223 a->cowners = a->rowners + a->size + 2; 2224 a->rowners_bs = a->cowners + a->size + 2; 2225 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2226 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2227 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2228 if (oldmat->colmap) { 2229 #if defined (PETSC_USE_CTABLE) 2230 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2231 #else 2232 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2233 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2234 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2235 #endif 2236 } else a->colmap = 0; 2237 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 2238 a->garray = (int *) PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray); 2239 PLogObjectMemory(mat,len*sizeof(int)); 2240 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2241 } else a->garray = 0; 2242 2243 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2244 PLogObjectParent(mat,a->lvec); 2245 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2246 2247 PLogObjectParent(mat,a->Mvctx); 2248 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2249 PLogObjectParent(mat,a->A); 2250 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2251 PLogObjectParent(mat,a->B); 2252 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 2253 ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2254 if (flg) { 2255 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 2256 } 2257 *newmat = mat; 2258 PetscFunctionReturn(0); 2259 } 2260 2261 #include "sys.h" 2262 2263 #undef __FUNC__ 2264 #define __FUNC__ "MatLoad_MPIBAIJ" 2265 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2266 { 2267 Mat A; 2268 int i, nz, ierr, j,rstart, rend, fd; 2269 Scalar *vals,*buf; 2270 MPI_Comm comm = ((PetscObject)viewer)->comm; 2271 MPI_Status status; 2272 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2273 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 2274 int flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2275 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2276 int dcount,kmax,k,nzcount,tmp; 2277 2278 PetscFunctionBegin; 2279 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2280 2281 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2282 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2283 if (!rank) { 2284 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2285 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2286 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2287 if (header[3] < 0) { 2288 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2289 } 2290 } 2291 2292 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2293 M = header[1]; N = header[2]; 2294 2295 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2296 2297 /* 2298 This code adds extra rows to make sure the number of rows is 2299 divisible by the blocksize 2300 */ 2301 Mbs = M/bs; 2302 extra_rows = bs - M + bs*(Mbs); 2303 if (extra_rows == bs) extra_rows = 0; 2304 else Mbs++; 2305 if (extra_rows &&!rank) { 2306 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2307 } 2308 2309 /* determine ownership of all rows */ 2310 mbs = Mbs/size + ((Mbs % size) > rank); 2311 m = mbs * bs; 2312 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners); 2313 browners = rowners + size + 1; 2314 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2315 rowners[0] = 0; 2316 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2317 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2318 rstart = rowners[rank]; 2319 rend = rowners[rank+1]; 2320 2321 /* distribute row lengths to all processors */ 2322 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) );CHKPTRQ(locrowlens); 2323 if (!rank) { 2324 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) );CHKPTRQ(rowlengths); 2325 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2326 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2327 sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 2328 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2329 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2330 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2331 } else { 2332 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2333 } 2334 2335 if (!rank) { 2336 /* calculate the number of nonzeros on each processor */ 2337 procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 2338 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2339 for ( i=0; i<size; i++ ) { 2340 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2341 procsnz[i] += rowlengths[j]; 2342 } 2343 } 2344 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2345 2346 /* determine max buffer needed and allocate it */ 2347 maxnz = 0; 2348 for ( i=0; i<size; i++ ) { 2349 maxnz = PetscMax(maxnz,procsnz[i]); 2350 } 2351 cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 2352 2353 /* read in my part of the matrix column indices */ 2354 nz = procsnz[0]; 2355 ibuf = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf); 2356 mycols = ibuf; 2357 if (size == 1) nz -= extra_rows; 2358 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2359 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2360 2361 /* read in every ones (except the last) and ship off */ 2362 for ( i=1; i<size-1; i++ ) { 2363 nz = procsnz[i]; 2364 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2365 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2366 } 2367 /* read in the stuff for the last proc */ 2368 if ( size != 1 ) { 2369 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2370 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2371 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2372 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2373 } 2374 ierr = PetscFree(cols);CHKERRQ(ierr); 2375 } else { 2376 /* determine buffer space needed for message */ 2377 nz = 0; 2378 for ( i=0; i<m; i++ ) { 2379 nz += locrowlens[i]; 2380 } 2381 ibuf = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf); 2382 mycols = ibuf; 2383 /* receive message of column indices*/ 2384 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2385 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2386 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2387 } 2388 2389 /* loop over local rows, determining number of off diagonal entries */ 2390 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) );CHKPTRQ(dlens); 2391 odlens = dlens + (rend-rstart); 2392 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) );CHKPTRQ(mask); 2393 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2394 masked1 = mask + Mbs; 2395 masked2 = masked1 + Mbs; 2396 rowcount = 0; nzcount = 0; 2397 for ( i=0; i<mbs; i++ ) { 2398 dcount = 0; 2399 odcount = 0; 2400 for ( j=0; j<bs; j++ ) { 2401 kmax = locrowlens[rowcount]; 2402 for ( k=0; k<kmax; k++ ) { 2403 tmp = mycols[nzcount++]/bs; 2404 if (!mask[tmp]) { 2405 mask[tmp] = 1; 2406 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2407 else masked1[dcount++] = tmp; 2408 } 2409 } 2410 rowcount++; 2411 } 2412 2413 dlens[i] = dcount; 2414 odlens[i] = odcount; 2415 2416 /* zero out the mask elements we set */ 2417 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2418 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2419 } 2420 2421 /* create our matrix */ 2422 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 2423 A = *newmat; 2424 MatSetOption(A,MAT_COLUMNS_SORTED); 2425 2426 if (!rank) { 2427 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(buf); 2428 /* read in my part of the matrix numerical values */ 2429 nz = procsnz[0]; 2430 vals = buf; 2431 mycols = ibuf; 2432 if (size == 1) nz -= extra_rows; 2433 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2434 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2435 2436 /* insert into matrix */ 2437 jj = rstart*bs; 2438 for ( i=0; i<m; i++ ) { 2439 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2440 mycols += locrowlens[i]; 2441 vals += locrowlens[i]; 2442 jj++; 2443 } 2444 /* read in other processors (except the last one) and ship out */ 2445 for ( i=1; i<size-1; i++ ) { 2446 nz = procsnz[i]; 2447 vals = buf; 2448 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2449 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2450 } 2451 /* the last proc */ 2452 if ( size != 1 ){ 2453 nz = procsnz[i] - extra_rows; 2454 vals = buf; 2455 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2456 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2457 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2458 } 2459 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2460 } else { 2461 /* receive numeric values */ 2462 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(buf); 2463 2464 /* receive message of values*/ 2465 vals = buf; 2466 mycols = ibuf; 2467 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2468 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2469 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2470 2471 /* insert into matrix */ 2472 jj = rstart*bs; 2473 for ( i=0; i<m; i++ ) { 2474 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2475 mycols += locrowlens[i]; 2476 vals += locrowlens[i]; 2477 jj++; 2478 } 2479 } 2480 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2481 ierr = PetscFree(buf);CHKERRQ(ierr); 2482 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2483 ierr = PetscFree(rowners);CHKERRQ(ierr); 2484 ierr = PetscFree(dlens);CHKERRQ(ierr); 2485 ierr = PetscFree(mask);CHKERRQ(ierr); 2486 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2487 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2488 PetscFunctionReturn(0); 2489 } 2490 2491 #undef __FUNC__ 2492 #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2493 /*@ 2494 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2495 2496 Input Parameters: 2497 . mat - the matrix 2498 . fact - factor 2499 2500 Collective on Mat 2501 2502 Level: advanced 2503 2504 Notes: 2505 This can also be set by the command line option: -mat_use_hash_table fact 2506 2507 .keywords: matrix, hashtable, factor, HT 2508 2509 .seealso: MatSetOption() 2510 @*/ 2511 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact) 2512 { 2513 Mat_MPIBAIJ *baij; 2514 2515 PetscFunctionBegin; 2516 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2517 if (mat->type != MATMPIBAIJ) { 2518 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2519 } 2520 baij = (Mat_MPIBAIJ*) mat->data; 2521 baij->ht_fact = fact; 2522 PetscFunctionReturn(0); 2523 } 2524