1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpibaij.c,v 1.179 1999/10/01 21:21:30 bsmith Exp bsmith $"; 3 #endif 4 5 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "mat.h" I*/ 6 #include "src/vec/vecimpl.h" 7 8 extern int MatSetUpMultiply_MPIBAIJ(Mat); 9 extern int DisAssemble_MPIBAIJ(Mat); 10 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 11 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 12 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 13 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 15 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 16 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 17 extern int MatPrintHelp_SeqBAIJ(Mat); 18 19 EXTERN_C_BEGIN 20 #undef __FUNC__ 21 #define __FUNC__ "MatStoreValues_MPIBAIJ" 22 int MatStoreValues_MPIBAIJ(Mat mat) 23 { 24 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 25 int ierr; 26 27 PetscFunctionBegin; 28 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 29 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 30 PetscFunctionReturn(0); 31 } 32 EXTERN_C_END 33 34 EXTERN_C_BEGIN 35 #undef __FUNC__ 36 #define __FUNC__ "MatRetrieveValues_MPIBAIJ" 37 int MatRetrieveValues_MPIBAIJ(Mat mat) 38 { 39 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 40 int ierr; 41 42 PetscFunctionBegin; 43 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 44 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 45 PetscFunctionReturn(0); 46 } 47 EXTERN_C_END 48 49 /* 50 Local utility routine that creates a mapping from the global column 51 number to the local number in the off-diagonal part of the local 52 storage of the matrix. This is done in a non scable way since the 53 length of colmap equals the global matrix length. 54 */ 55 #undef __FUNC__ 56 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 57 static int CreateColmap_MPIBAIJ_Private(Mat mat) 58 { 59 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 60 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 61 int nbs = B->nbs,i,bs=B->bs,ierr; 62 63 PetscFunctionBegin; 64 #if defined (PETSC_USE_CTABLE) 65 ierr = PetscTableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr); 66 for ( i=0; i<nbs; i++ ){ 67 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 68 } 69 #else 70 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 71 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 72 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 73 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 74 #endif 75 PetscFunctionReturn(0); 76 } 77 78 #define CHUNKSIZE 10 79 80 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 81 { \ 82 \ 83 brow = row/bs; \ 84 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 85 rmax = aimax[brow]; nrow = ailen[brow]; \ 86 bcol = col/bs; \ 87 ridx = row % bs; cidx = col % bs; \ 88 low = 0; high = nrow; \ 89 while (high-low > 3) { \ 90 t = (low+high)/2; \ 91 if (rp[t] > bcol) high = t; \ 92 else low = t; \ 93 } \ 94 for ( _i=low; _i<high; _i++ ) { \ 95 if (rp[_i] > bcol) break; \ 96 if (rp[_i] == bcol) { \ 97 bap = ap + bs2*_i + bs*cidx + ridx; \ 98 if (addv == ADD_VALUES) *bap += value; \ 99 else *bap = value; \ 100 goto a_noinsert; \ 101 } \ 102 } \ 103 if (a->nonew == 1) goto a_noinsert; \ 104 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 105 if (nrow >= rmax) { \ 106 /* there is no extra room in row, therefore enlarge */ \ 107 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 108 Scalar *new_a; \ 109 \ 110 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 111 \ 112 /* malloc new storage space */ \ 113 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 114 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 115 new_j = (int *) (new_a + bs2*new_nz); \ 116 new_i = new_j + new_nz; \ 117 \ 118 /* copy over old data into new slots */ \ 119 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 120 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 121 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 122 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 123 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 124 len*sizeof(int));CHKERRQ(ierr); \ 125 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \ 126 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 127 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 128 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr); \ 129 /* free up old matrix storage */ \ 130 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 131 if (!a->singlemalloc) { \ 132 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 133 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 134 } \ 135 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 136 a->singlemalloc = 1; \ 137 \ 138 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 139 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 140 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 141 a->maxnz += bs2*CHUNKSIZE; \ 142 a->reallocs++; \ 143 a->nz++; \ 144 } \ 145 N = nrow++ - 1; \ 146 /* shift up all the later entries in this row */ \ 147 for ( ii=N; ii>=_i; ii-- ) { \ 148 rp[ii+1] = rp[ii]; \ 149 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \ 150 } \ 151 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr); } \ 152 rp[_i] = bcol; \ 153 ap[bs2*_i + bs*cidx + ridx] = value; \ 154 a_noinsert:; \ 155 ailen[brow] = nrow; \ 156 } 157 158 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 159 { \ 160 \ 161 brow = row/bs; \ 162 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 163 rmax = bimax[brow]; nrow = bilen[brow]; \ 164 bcol = col/bs; \ 165 ridx = row % bs; cidx = col % bs; \ 166 low = 0; high = nrow; \ 167 while (high-low > 3) { \ 168 t = (low+high)/2; \ 169 if (rp[t] > bcol) high = t; \ 170 else low = t; \ 171 } \ 172 for ( _i=low; _i<high; _i++ ) { \ 173 if (rp[_i] > bcol) break; \ 174 if (rp[_i] == bcol) { \ 175 bap = ap + bs2*_i + bs*cidx + ridx; \ 176 if (addv == ADD_VALUES) *bap += value; \ 177 else *bap = value; \ 178 goto b_noinsert; \ 179 } \ 180 } \ 181 if (b->nonew == 1) goto b_noinsert; \ 182 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 183 if (nrow >= rmax) { \ 184 /* there is no extra room in row, therefore enlarge */ \ 185 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 186 Scalar *new_a; \ 187 \ 188 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 189 \ 190 /* malloc new storage space */ \ 191 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 192 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 193 new_j = (int *) (new_a + bs2*new_nz); \ 194 new_i = new_j + new_nz; \ 195 \ 196 /* copy over old data into new slots */ \ 197 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 198 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 199 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 200 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 201 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 202 len*sizeof(int));CHKERRQ(ierr); \ 203 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \ 204 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 205 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 206 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr); \ 207 /* free up old matrix storage */ \ 208 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 209 if (!b->singlemalloc) { \ 210 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 211 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 212 } \ 213 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 214 b->singlemalloc = 1; \ 215 \ 216 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 217 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 218 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 219 b->maxnz += bs2*CHUNKSIZE; \ 220 b->reallocs++; \ 221 b->nz++; \ 222 } \ 223 N = nrow++ - 1; \ 224 /* shift up all the later entries in this row */ \ 225 for ( ii=N; ii>=_i; ii-- ) { \ 226 rp[ii+1] = rp[ii]; \ 227 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \ 228 } \ 229 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr);} \ 230 rp[_i] = bcol; \ 231 ap[bs2*_i + bs*cidx + ridx] = value; \ 232 b_noinsert:; \ 233 bilen[brow] = nrow; \ 234 } 235 236 #undef __FUNC__ 237 #define __FUNC__ "MatSetValues_MPIBAIJ" 238 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 239 { 240 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 241 Scalar value; 242 int ierr,i,j,row,col; 243 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 244 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 245 int cend_orig=baij->cend_bs,bs=baij->bs; 246 247 /* Some Variables required in the macro */ 248 Mat A = baij->A; 249 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 250 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 251 Scalar *aa=a->a; 252 253 Mat B = baij->B; 254 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 255 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 256 Scalar *ba=b->a; 257 258 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 259 int low,high,t,ridx,cidx,bs2=a->bs2; 260 Scalar *ap,*bap; 261 262 PetscFunctionBegin; 263 for ( i=0; i<m; i++ ) { 264 if (im[i] < 0) continue; 265 #if defined(PETSC_USE_BOPT_g) 266 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 267 #endif 268 if (im[i] >= rstart_orig && im[i] < rend_orig) { 269 row = im[i] - rstart_orig; 270 for ( j=0; j<n; j++ ) { 271 if (in[j] >= cstart_orig && in[j] < cend_orig){ 272 col = in[j] - cstart_orig; 273 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 274 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 275 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 276 } else if (in[j] < 0) continue; 277 #if defined(PETSC_USE_BOPT_g) 278 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 279 #endif 280 else { 281 if (mat->was_assembled) { 282 if (!baij->colmap) { 283 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 284 } 285 #if defined (PETSC_USE_CTABLE) 286 ierr = PetscTableFind(baij->colmap, in[j]/bs + 1,&col);CHKERRQ(ierr); 287 col = col - 1 + in[j]%bs; 288 #else 289 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 290 #endif 291 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 292 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 293 col = in[j]; 294 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 295 B = baij->B; 296 b = (Mat_SeqBAIJ *) (B)->data; 297 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 298 ba=b->a; 299 } 300 } else col = in[j]; 301 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 302 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 303 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 304 } 305 } 306 } else { 307 if ( !baij->donotstash) { 308 if (roworiented) { 309 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 310 } else { 311 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 312 } 313 } 314 } 315 } 316 PetscFunctionReturn(0); 317 } 318 319 #undef __FUNC__ 320 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 321 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 322 { 323 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 324 Scalar *value,*barray=baij->barray; 325 int ierr,i,j,ii,jj,row,col; 326 int roworiented = baij->roworiented,rstart=baij->rstart ; 327 int rend=baij->rend,cstart=baij->cstart,stepval; 328 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 329 330 PetscFunctionBegin; 331 if(!barray) { 332 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar));CHKPTRQ(barray); 333 } 334 335 if (roworiented) { 336 stepval = (n-1)*bs; 337 } else { 338 stepval = (m-1)*bs; 339 } 340 for ( i=0; i<m; i++ ) { 341 if (im[i] < 0) continue; 342 #if defined(PETSC_USE_BOPT_g) 343 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 344 #endif 345 if (im[i] >= rstart && im[i] < rend) { 346 row = im[i] - rstart; 347 for ( j=0; j<n; j++ ) { 348 /* If NumCol = 1 then a copy is not required */ 349 if ((roworiented) && (n == 1)) { 350 barray = v + i*bs2; 351 } else if((!roworiented) && (m == 1)) { 352 barray = v + j*bs2; 353 } else { /* Here a copy is required */ 354 if (roworiented) { 355 value = v + i*(stepval+bs)*bs + j*bs; 356 } else { 357 value = v + j*(stepval+bs)*bs + i*bs; 358 } 359 for ( ii=0; ii<bs; ii++,value+=stepval ) { 360 for (jj=0; jj<bs; jj++ ) { 361 *barray++ = *value++; 362 } 363 } 364 barray -=bs2; 365 } 366 367 if (in[j] >= cstart && in[j] < cend){ 368 col = in[j] - cstart; 369 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 370 } 371 else if (in[j] < 0) continue; 372 #if defined(PETSC_USE_BOPT_g) 373 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 374 #endif 375 else { 376 if (mat->was_assembled) { 377 if (!baij->colmap) { 378 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 379 } 380 381 #if defined(PETSC_USE_BOPT_g) 382 #if defined (PETSC_USE_CTABLE) 383 { int data; 384 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 385 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap"); 386 } 387 #else 388 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap"); 389 #endif 390 #endif 391 #if defined (PETSC_USE_CTABLE) 392 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 393 col = (col - 1)/bs; 394 #else 395 col = (baij->colmap[in[j]] - 1)/bs; 396 #endif 397 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 398 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 399 col = in[j]; 400 } 401 } 402 else col = in[j]; 403 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 404 } 405 } 406 } else { 407 if (!baij->donotstash) { 408 if (roworiented) { 409 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 410 } else { 411 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 412 } 413 } 414 } 415 } 416 PetscFunctionReturn(0); 417 } 418 #define HASH_KEY 0.6180339887 419 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 420 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 421 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 422 #undef __FUNC__ 423 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 424 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 425 { 426 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 427 int ierr,i,j,row,col; 428 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 429 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 430 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 431 double tmp; 432 Scalar ** HD = baij->hd,value; 433 #if defined(PETSC_USE_BOPT_g) 434 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 435 #endif 436 437 PetscFunctionBegin; 438 439 for ( i=0; i<m; i++ ) { 440 #if defined(PETSC_USE_BOPT_g) 441 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 442 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 443 #endif 444 row = im[i]; 445 if (row >= rstart_orig && row < rend_orig) { 446 for ( j=0; j<n; j++ ) { 447 col = in[j]; 448 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 449 /* Look up into the Hash Table */ 450 key = (row/bs)*Nbs+(col/bs)+1; 451 h1 = HASH(size,key,tmp); 452 453 454 idx = h1; 455 #if defined(PETSC_USE_BOPT_g) 456 insert_ct++; 457 total_ct++; 458 if (HT[idx] != key) { 459 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 460 if (idx == size) { 461 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 462 if (idx == h1) { 463 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 464 } 465 } 466 } 467 #else 468 if (HT[idx] != key) { 469 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 470 if (idx == size) { 471 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 472 if (idx == h1) { 473 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 474 } 475 } 476 } 477 #endif 478 /* A HASH table entry is found, so insert the values at the correct address */ 479 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 480 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 481 } 482 } else { 483 if (!baij->donotstash) { 484 if (roworiented) { 485 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 486 } else { 487 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 488 } 489 } 490 } 491 } 492 #if defined(PETSC_USE_BOPT_g) 493 baij->ht_total_ct = total_ct; 494 baij->ht_insert_ct = insert_ct; 495 #endif 496 PetscFunctionReturn(0); 497 } 498 499 #undef __FUNC__ 500 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 501 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 502 { 503 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 504 int ierr,i,j,ii,jj,row,col; 505 int roworiented = baij->roworiented,rstart=baij->rstart ; 506 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 507 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 508 double tmp; 509 Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 510 #if defined(PETSC_USE_BOPT_g) 511 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 512 #endif 513 514 PetscFunctionBegin; 515 516 if (roworiented) { 517 stepval = (n-1)*bs; 518 } else { 519 stepval = (m-1)*bs; 520 } 521 for ( i=0; i<m; i++ ) { 522 #if defined(PETSC_USE_BOPT_g) 523 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 524 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 525 #endif 526 row = im[i]; 527 v_t = v + i*bs2; 528 if (row >= rstart && row < rend) { 529 for ( j=0; j<n; j++ ) { 530 col = in[j]; 531 532 /* Look up into the Hash Table */ 533 key = row*Nbs+col+1; 534 h1 = HASH(size,key,tmp); 535 536 idx = h1; 537 #if defined(PETSC_USE_BOPT_g) 538 total_ct++; 539 insert_ct++; 540 if (HT[idx] != key) { 541 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 542 if (idx == size) { 543 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 544 if (idx == h1) { 545 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 546 } 547 } 548 } 549 #else 550 if (HT[idx] != key) { 551 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 552 if (idx == size) { 553 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 554 if (idx == h1) { 555 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 556 } 557 } 558 } 559 #endif 560 baij_a = HD[idx]; 561 if (roworiented) { 562 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 563 /* value = v + (i*(stepval+bs)+j)*bs; */ 564 value = v_t; 565 v_t += bs; 566 if (addv == ADD_VALUES) { 567 for ( ii=0; ii<bs; ii++,value+=stepval) { 568 for ( jj=ii; jj<bs2; jj+=bs ) { 569 baij_a[jj] += *value++; 570 } 571 } 572 } else { 573 for ( ii=0; ii<bs; ii++,value+=stepval) { 574 for ( jj=ii; jj<bs2; jj+=bs ) { 575 baij_a[jj] = *value++; 576 } 577 } 578 } 579 } else { 580 value = v + j*(stepval+bs)*bs + i*bs; 581 if (addv == ADD_VALUES) { 582 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 583 for ( jj=0; jj<bs; jj++ ) { 584 baij_a[jj] += *value++; 585 } 586 } 587 } else { 588 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 589 for ( jj=0; jj<bs; jj++ ) { 590 baij_a[jj] = *value++; 591 } 592 } 593 } 594 } 595 } 596 } else { 597 if (!baij->donotstash) { 598 if (roworiented) { 599 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 600 } else { 601 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 602 } 603 } 604 } 605 } 606 #if defined(PETSC_USE_BOPT_g) 607 baij->ht_total_ct = total_ct; 608 baij->ht_insert_ct = insert_ct; 609 #endif 610 PetscFunctionReturn(0); 611 } 612 613 #undef __FUNC__ 614 #define __FUNC__ "MatGetValues_MPIBAIJ" 615 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 616 { 617 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 618 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 619 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data; 620 621 PetscFunctionBegin; 622 for ( i=0; i<m; i++ ) { 623 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 624 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 625 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 626 row = idxm[i] - bsrstart; 627 for ( j=0; j<n; j++ ) { 628 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 629 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 630 if (idxn[j] >= bscstart && idxn[j] < bscend){ 631 col = idxn[j] - bscstart; 632 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 633 } else { 634 if (!baij->colmap) { 635 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 636 } 637 #if defined (PETSC_USE_CTABLE) 638 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 639 data --; 640 #else 641 data = baij->colmap[idxn[j]/bs]-1; 642 #endif 643 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 644 else { 645 col = data + idxn[j]%bs; 646 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 647 } 648 } 649 } 650 } else { 651 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 652 } 653 } 654 PetscFunctionReturn(0); 655 } 656 657 #undef __FUNC__ 658 #define __FUNC__ "MatNorm_MPIBAIJ" 659 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 660 { 661 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 662 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 663 int ierr, i,bs2=baij->bs2; 664 double sum = 0.0; 665 Scalar *v; 666 667 PetscFunctionBegin; 668 if (baij->size == 1) { 669 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 670 } else { 671 if (type == NORM_FROBENIUS) { 672 v = amat->a; 673 for (i=0; i<amat->nz*bs2; i++ ) { 674 #if defined(PETSC_USE_COMPLEX) 675 sum += PetscReal(PetscConj(*v)*(*v)); v++; 676 #else 677 sum += (*v)*(*v); v++; 678 #endif 679 } 680 v = bmat->a; 681 for (i=0; i<bmat->nz*bs2; i++ ) { 682 #if defined(PETSC_USE_COMPLEX) 683 sum += PetscReal(PetscConj(*v)*(*v)); v++; 684 #else 685 sum += (*v)*(*v); v++; 686 #endif 687 } 688 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 689 *norm = sqrt(*norm); 690 } else { 691 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 692 } 693 } 694 PetscFunctionReturn(0); 695 } 696 697 698 /* 699 Creates the hash table, and sets the table 700 This table is created only once. 701 If new entried need to be added to the matrix 702 then the hash table has to be destroyed and 703 recreated. 704 */ 705 #undef __FUNC__ 706 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 707 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 708 { 709 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 710 Mat A = baij->A, B=baij->B; 711 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 712 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 713 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 714 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 715 int *HT,key; 716 Scalar **HD; 717 double tmp; 718 #if defined(PETSC_USE_BOPT_g) 719 int ct=0,max=0; 720 #endif 721 722 PetscFunctionBegin; 723 baij->ht_size=(int)(factor*nz); 724 size = baij->ht_size; 725 726 if (baij->ht) { 727 PetscFunctionReturn(0); 728 } 729 730 /* Allocate Memory for Hash Table */ 731 baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1);CHKPTRQ(baij->hd); 732 baij->ht = (int*)(baij->hd + size); 733 HD = baij->hd; 734 HT = baij->ht; 735 736 737 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr); 738 739 740 /* Loop Over A */ 741 for ( i=0; i<a->mbs; i++ ) { 742 for ( j=ai[i]; j<ai[i+1]; j++ ) { 743 row = i+rstart; 744 col = aj[j]+cstart; 745 746 key = row*Nbs + col + 1; 747 h1 = HASH(size,key,tmp); 748 for ( k=0; k<size; k++ ){ 749 if (HT[(h1+k)%size] == 0.0) { 750 HT[(h1+k)%size] = key; 751 HD[(h1+k)%size] = a->a + j*bs2; 752 break; 753 #if defined(PETSC_USE_BOPT_g) 754 } else { 755 ct++; 756 #endif 757 } 758 } 759 #if defined(PETSC_USE_BOPT_g) 760 if (k> max) max = k; 761 #endif 762 } 763 } 764 /* Loop Over B */ 765 for ( i=0; i<b->mbs; i++ ) { 766 for ( j=bi[i]; j<bi[i+1]; j++ ) { 767 row = i+rstart; 768 col = garray[bj[j]]; 769 key = row*Nbs + col + 1; 770 h1 = HASH(size,key,tmp); 771 for ( k=0; k<size; k++ ){ 772 if (HT[(h1+k)%size] == 0.0) { 773 HT[(h1+k)%size] = key; 774 HD[(h1+k)%size] = b->a + j*bs2; 775 break; 776 #if defined(PETSC_USE_BOPT_g) 777 } else { 778 ct++; 779 #endif 780 } 781 } 782 #if defined(PETSC_USE_BOPT_g) 783 if (k> max) max = k; 784 #endif 785 } 786 } 787 788 /* Print Summary */ 789 #if defined(PETSC_USE_BOPT_g) 790 for ( i=0,j=0; i<size; i++) 791 if (HT[i]) {j++;} 792 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n", 793 (j== 0)? 0.0:((double)(ct+j))/j,max); 794 #endif 795 PetscFunctionReturn(0); 796 } 797 798 #undef __FUNC__ 799 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 800 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 801 { 802 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 803 int ierr,nstash,reallocs; 804 InsertMode addv; 805 806 PetscFunctionBegin; 807 if (baij->donotstash) { 808 PetscFunctionReturn(0); 809 } 810 811 /* make sure all processors are either in INSERTMODE or ADDMODE */ 812 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 813 if (addv == (ADD_VALUES|INSERT_VALUES)) { 814 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 815 } 816 mat->insertmode = addv; /* in case this processor had no cache */ 817 818 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 819 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 820 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 821 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 822 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 823 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 824 PetscFunctionReturn(0); 825 } 826 827 #undef __FUNC__ 828 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 829 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 830 { 831 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data; 832 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 833 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 834 int *row,*col,other_disassembled,r1,r2,r3; 835 Scalar *val; 836 InsertMode addv = mat->insertmode; 837 838 PetscFunctionBegin; 839 if (!baij->donotstash) { 840 while (1) { 841 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 842 if (!flg) break; 843 844 for ( i=0; i<n; ) { 845 /* Now identify the consecutive vals belonging to the same row */ 846 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 847 if (j < n) ncols = j-i; 848 else ncols = n-i; 849 /* Now assemble all these values with a single function call */ 850 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 851 i = j; 852 } 853 } 854 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 855 /* Now process the block-stash. Since the values are stashed column-oriented, 856 set the roworiented flag to column oriented, and after MatSetValues() 857 restore the original flags */ 858 r1 = baij->roworiented; 859 r2 = a->roworiented; 860 r3 = b->roworiented; 861 baij->roworiented = 0; 862 a->roworiented = 0; 863 b->roworiented = 0; 864 while (1) { 865 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 866 if (!flg) break; 867 868 for ( i=0; i<n; ) { 869 /* Now identify the consecutive vals belonging to the same row */ 870 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 871 if (j < n) ncols = j-i; 872 else ncols = n-i; 873 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 874 i = j; 875 } 876 } 877 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 878 baij->roworiented = r1; 879 a->roworiented = r2; 880 b->roworiented = r3; 881 } 882 883 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 884 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 885 886 /* determine if any processor has disassembled, if so we must 887 also disassemble ourselfs, in order that we may reassemble. */ 888 /* 889 if nonzero structure of submatrix B cannot change then we know that 890 no processor disassembled thus we can skip this stuff 891 */ 892 if (!((Mat_SeqBAIJ*) baij->B->data)->nonew) { 893 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 894 if (mat->was_assembled && !other_disassembled) { 895 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 896 } 897 } 898 899 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 900 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 901 } 902 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 903 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 904 905 #if defined(PETSC_USE_BOPT_g) 906 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 907 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 908 ((double)baij->ht_total_ct)/baij->ht_insert_ct); 909 baij->ht_total_ct = 0; 910 baij->ht_insert_ct = 0; 911 } 912 #endif 913 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 914 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 915 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 916 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 917 } 918 919 if (baij->rowvalues) { 920 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 921 baij->rowvalues = 0; 922 } 923 PetscFunctionReturn(0); 924 } 925 926 /* 927 #undef __FUNC__ 928 #define __FUNC__ "MatView_MPIBAIJ_Binary" 929 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 930 { 931 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 932 int ierr; 933 934 PetscFunctionBegin; 935 if (baij->size == 1) { 936 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 937 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 938 PetscFunctionReturn(0); 939 } 940 */ 941 942 #undef __FUNC__ 943 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 944 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 945 { 946 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 947 int ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank; 948 FILE *fd; 949 int isascii,isdraw; 950 951 PetscFunctionBegin; 952 isascii = PetscTypeCompare(viewer,ASCII_VIEWER); 953 isdraw = PetscTypeCompare(viewer,DRAW_VIEWER); 954 if (isascii) { 955 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 956 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 957 MatInfo info; 958 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 959 ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr); 960 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 961 ierr = PetscSequentialPhaseBegin(mat->comm,1);CHKERRQ(ierr); 962 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 963 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 964 baij->bs,(int)info.memory); 965 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 966 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 967 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 968 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 969 fflush(fd); 970 ierr = PetscSequentialPhaseEnd(mat->comm,1);CHKERRQ(ierr); 971 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 972 PetscFunctionReturn(0); 973 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 974 ierr = PetscPrintf(mat->comm," block size is %d\n",bs);CHKERRQ(ierr); 975 PetscFunctionReturn(0); 976 } 977 } 978 979 if (isdraw) { 980 Draw draw; 981 PetscTruth isnull; 982 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 983 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 984 } 985 986 if (size == 1) { 987 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 988 } else { 989 /* assemble the entire matrix onto first processor. */ 990 Mat A; 991 Mat_SeqBAIJ *Aloc; 992 int M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals; 993 int mbs = baij->mbs; 994 Scalar *a; 995 996 if (!rank) { 997 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 998 } else { 999 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1000 } 1001 PLogObjectParent(mat,A); 1002 1003 /* copy over the A part */ 1004 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1005 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1006 rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1007 1008 for ( i=0; i<mbs; i++ ) { 1009 rvals[0] = bs*(baij->rstart + i); 1010 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1011 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1012 col = (baij->cstart+aj[j])*bs; 1013 for (k=0; k<bs; k++ ) { 1014 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1015 col++; a += bs; 1016 } 1017 } 1018 } 1019 /* copy over the B part */ 1020 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1021 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1022 for ( i=0; i<mbs; i++ ) { 1023 rvals[0] = bs*(baij->rstart + i); 1024 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1025 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1026 col = baij->garray[aj[j]]*bs; 1027 for (k=0; k<bs; k++ ) { 1028 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1029 col++; a += bs; 1030 } 1031 } 1032 } 1033 ierr = PetscFree(rvals);CHKERRQ(ierr); 1034 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1035 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1036 /* 1037 Everyone has to call to draw the matrix since the graphics waits are 1038 synchronized across all processors that share the Draw object 1039 */ 1040 if (!rank || PetscTypeCompare(viewer,DRAW_VIEWER)) { 1041 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer);CHKERRQ(ierr); 1042 } 1043 ierr = MatDestroy(A);CHKERRQ(ierr); 1044 } 1045 PetscFunctionReturn(0); 1046 } 1047 1048 1049 1050 #undef __FUNC__ 1051 #define __FUNC__ "MatView_MPIBAIJ" 1052 int MatView_MPIBAIJ(Mat mat,Viewer viewer) 1053 { 1054 int ierr; 1055 int isascii,isdraw,issocket,isbinary; 1056 1057 PetscFunctionBegin; 1058 isascii = PetscTypeCompare(viewer,ASCII_VIEWER); 1059 isdraw = PetscTypeCompare(viewer,DRAW_VIEWER); 1060 issocket = PetscTypeCompare(viewer,SOCKET_VIEWER); 1061 isbinary = PetscTypeCompare(viewer,BINARY_VIEWER); 1062 if (isascii || isdraw || issocket || isbinary) { 1063 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1064 } else { 1065 SETERRQ1(1,1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1066 } 1067 PetscFunctionReturn(0); 1068 } 1069 1070 #undef __FUNC__ 1071 #define __FUNC__ "MatDestroy_MPIBAIJ" 1072 int MatDestroy_MPIBAIJ(Mat mat) 1073 { 1074 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1075 int ierr; 1076 1077 PetscFunctionBegin; 1078 1079 if (mat->mapping) { 1080 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 1081 } 1082 if (mat->bmapping) { 1083 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 1084 } 1085 if (mat->rmap) { 1086 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 1087 } 1088 if (mat->cmap) { 1089 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 1090 } 1091 #if defined(PETSC_USE_LOG) 1092 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N); 1093 #endif 1094 1095 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1096 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1097 1098 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1099 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1100 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1101 #if defined (PETSC_USE_CTABLE) 1102 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1103 #else 1104 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1105 #endif 1106 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1107 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1108 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1109 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1110 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1111 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1112 ierr = PetscFree(baij);CHKERRQ(ierr); 1113 PLogObjectDestroy(mat); 1114 PetscHeaderDestroy(mat); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNC__ 1119 #define __FUNC__ "MatMult_MPIBAIJ" 1120 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1121 { 1122 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1123 int ierr, nt; 1124 1125 PetscFunctionBegin; 1126 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1127 if (nt != a->n) { 1128 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1129 } 1130 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1131 if (nt != a->m) { 1132 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1133 } 1134 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1135 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1136 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1137 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1138 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1139 PetscFunctionReturn(0); 1140 } 1141 1142 #undef __FUNC__ 1143 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1144 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1145 { 1146 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1147 int ierr; 1148 1149 PetscFunctionBegin; 1150 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1151 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1152 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1153 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1154 PetscFunctionReturn(0); 1155 } 1156 1157 #undef __FUNC__ 1158 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1159 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1160 { 1161 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1162 int ierr; 1163 1164 PetscFunctionBegin; 1165 /* do nondiagonal part */ 1166 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 1167 /* send it on its way */ 1168 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1169 /* do local part */ 1170 ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr); 1171 /* receive remote parts: note this assumes the values are not actually */ 1172 /* inserted in yy until the next line, which is true for my implementation*/ 1173 /* but is not perhaps always true. */ 1174 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1175 PetscFunctionReturn(0); 1176 } 1177 1178 #undef __FUNC__ 1179 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1180 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1181 { 1182 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1183 int ierr; 1184 1185 PetscFunctionBegin; 1186 /* do nondiagonal part */ 1187 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 1188 /* send it on its way */ 1189 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1190 /* do local part */ 1191 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1192 /* receive remote parts: note this assumes the values are not actually */ 1193 /* inserted in yy until the next line, which is true for my implementation*/ 1194 /* but is not perhaps always true. */ 1195 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1196 PetscFunctionReturn(0); 1197 } 1198 1199 /* 1200 This only works correctly for square matrices where the subblock A->A is the 1201 diagonal block 1202 */ 1203 #undef __FUNC__ 1204 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1205 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1206 { 1207 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1208 int ierr; 1209 1210 PetscFunctionBegin; 1211 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1212 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1213 PetscFunctionReturn(0); 1214 } 1215 1216 #undef __FUNC__ 1217 #define __FUNC__ "MatScale_MPIBAIJ" 1218 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1219 { 1220 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1221 int ierr; 1222 1223 PetscFunctionBegin; 1224 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1225 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1226 PetscFunctionReturn(0); 1227 } 1228 1229 #undef __FUNC__ 1230 #define __FUNC__ "MatGetSize_MPIBAIJ" 1231 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1232 { 1233 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1234 1235 PetscFunctionBegin; 1236 if (m) *m = mat->M; 1237 if (n) *n = mat->N; 1238 PetscFunctionReturn(0); 1239 } 1240 1241 #undef __FUNC__ 1242 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1243 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1244 { 1245 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1246 1247 PetscFunctionBegin; 1248 *m = mat->m; *n = mat->n; 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNC__ 1253 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1254 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1255 { 1256 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1257 1258 PetscFunctionBegin; 1259 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1260 PetscFunctionReturn(0); 1261 } 1262 1263 #undef __FUNC__ 1264 #define __FUNC__ "MatGetRow_MPIBAIJ" 1265 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1266 { 1267 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1268 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1269 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1270 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1271 int *cmap, *idx_p,cstart = mat->cstart; 1272 1273 PetscFunctionBegin; 1274 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1275 mat->getrowactive = PETSC_TRUE; 1276 1277 if (!mat->rowvalues && (idx || v)) { 1278 /* 1279 allocate enough space to hold information from the longest row. 1280 */ 1281 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1282 int max = 1,mbs = mat->mbs,tmp; 1283 for ( i=0; i<mbs; i++ ) { 1284 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1285 if (max < tmp) { max = tmp; } 1286 } 1287 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1288 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1289 } 1290 1291 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1292 lrow = row - brstart; 1293 1294 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1295 if (!v) {pvA = 0; pvB = 0;} 1296 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1297 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1298 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1299 nztot = nzA + nzB; 1300 1301 cmap = mat->garray; 1302 if (v || idx) { 1303 if (nztot) { 1304 /* Sort by increasing column numbers, assuming A and B already sorted */ 1305 int imark = -1; 1306 if (v) { 1307 *v = v_p = mat->rowvalues; 1308 for ( i=0; i<nzB; i++ ) { 1309 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1310 else break; 1311 } 1312 imark = i; 1313 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1314 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1315 } 1316 if (idx) { 1317 *idx = idx_p = mat->rowindices; 1318 if (imark > -1) { 1319 for ( i=0; i<imark; i++ ) { 1320 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1321 } 1322 } else { 1323 for ( i=0; i<nzB; i++ ) { 1324 if (cmap[cworkB[i]/bs] < cstart) 1325 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1326 else break; 1327 } 1328 imark = i; 1329 } 1330 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1331 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1332 } 1333 } else { 1334 if (idx) *idx = 0; 1335 if (v) *v = 0; 1336 } 1337 } 1338 *nz = nztot; 1339 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1340 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1341 PetscFunctionReturn(0); 1342 } 1343 1344 #undef __FUNC__ 1345 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1346 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1347 { 1348 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1349 1350 PetscFunctionBegin; 1351 if (baij->getrowactive == PETSC_FALSE) { 1352 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1353 } 1354 baij->getrowactive = PETSC_FALSE; 1355 PetscFunctionReturn(0); 1356 } 1357 1358 #undef __FUNC__ 1359 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1360 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1361 { 1362 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1363 1364 PetscFunctionBegin; 1365 *bs = baij->bs; 1366 PetscFunctionReturn(0); 1367 } 1368 1369 #undef __FUNC__ 1370 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1371 int MatZeroEntries_MPIBAIJ(Mat A) 1372 { 1373 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1374 int ierr; 1375 1376 PetscFunctionBegin; 1377 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1378 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1379 PetscFunctionReturn(0); 1380 } 1381 1382 #undef __FUNC__ 1383 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1384 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1385 { 1386 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1387 Mat A = a->A, B = a->B; 1388 int ierr; 1389 double isend[5], irecv[5]; 1390 1391 PetscFunctionBegin; 1392 info->block_size = (double)a->bs; 1393 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1394 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1395 isend[3] = info->memory; isend[4] = info->mallocs; 1396 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1397 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1398 isend[3] += info->memory; isend[4] += info->mallocs; 1399 if (flag == MAT_LOCAL) { 1400 info->nz_used = isend[0]; 1401 info->nz_allocated = isend[1]; 1402 info->nz_unneeded = isend[2]; 1403 info->memory = isend[3]; 1404 info->mallocs = isend[4]; 1405 } else if (flag == MAT_GLOBAL_MAX) { 1406 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1407 info->nz_used = irecv[0]; 1408 info->nz_allocated = irecv[1]; 1409 info->nz_unneeded = irecv[2]; 1410 info->memory = irecv[3]; 1411 info->mallocs = irecv[4]; 1412 } else if (flag == MAT_GLOBAL_SUM) { 1413 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1414 info->nz_used = irecv[0]; 1415 info->nz_allocated = irecv[1]; 1416 info->nz_unneeded = irecv[2]; 1417 info->memory = irecv[3]; 1418 info->mallocs = irecv[4]; 1419 } else { 1420 SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 1421 } 1422 info->rows_global = (double)a->M; 1423 info->columns_global = (double)a->N; 1424 info->rows_local = (double)a->m; 1425 info->columns_local = (double)a->N; 1426 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1427 info->fill_ratio_needed = 0; 1428 info->factor_mallocs = 0; 1429 PetscFunctionReturn(0); 1430 } 1431 1432 #undef __FUNC__ 1433 #define __FUNC__ "MatSetOption_MPIBAIJ" 1434 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1435 { 1436 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1437 int ierr; 1438 1439 PetscFunctionBegin; 1440 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1441 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1442 op == MAT_COLUMNS_UNSORTED || 1443 op == MAT_COLUMNS_SORTED || 1444 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1445 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1446 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1447 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1448 } else if (op == MAT_ROW_ORIENTED) { 1449 a->roworiented = 1; 1450 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1451 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1452 } else if (op == MAT_ROWS_SORTED || 1453 op == MAT_ROWS_UNSORTED || 1454 op == MAT_SYMMETRIC || 1455 op == MAT_STRUCTURALLY_SYMMETRIC || 1456 op == MAT_YES_NEW_DIAGONALS || 1457 op == MAT_USE_HASH_TABLE) { 1458 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1459 } else if (op == MAT_COLUMN_ORIENTED) { 1460 a->roworiented = 0; 1461 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1462 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1463 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1464 a->donotstash = 1; 1465 } else if (op == MAT_NO_NEW_DIAGONALS) { 1466 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1467 } else if (op == MAT_USE_HASH_TABLE) { 1468 a->ht_flag = 1; 1469 } else { 1470 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1471 } 1472 PetscFunctionReturn(0); 1473 } 1474 1475 #undef __FUNC__ 1476 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1477 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1478 { 1479 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1480 Mat_SeqBAIJ *Aloc; 1481 Mat B; 1482 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1483 int bs=baij->bs,mbs=baij->mbs; 1484 Scalar *a; 1485 1486 PetscFunctionBegin; 1487 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1488 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1489 CHKERRQ(ierr); 1490 1491 /* copy over the A part */ 1492 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1493 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1494 rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1495 1496 for ( i=0; i<mbs; i++ ) { 1497 rvals[0] = bs*(baij->rstart + i); 1498 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1499 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1500 col = (baij->cstart+aj[j])*bs; 1501 for (k=0; k<bs; k++ ) { 1502 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1503 col++; a += bs; 1504 } 1505 } 1506 } 1507 /* copy over the B part */ 1508 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1509 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1510 for ( i=0; i<mbs; i++ ) { 1511 rvals[0] = bs*(baij->rstart + i); 1512 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1513 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1514 col = baij->garray[aj[j]]*bs; 1515 for (k=0; k<bs; k++ ) { 1516 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1517 col++; a += bs; 1518 } 1519 } 1520 } 1521 ierr = PetscFree(rvals);CHKERRQ(ierr); 1522 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1523 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1524 1525 if (matout != PETSC_NULL) { 1526 *matout = B; 1527 } else { 1528 PetscOps *Abops; 1529 MatOps Aops; 1530 1531 /* This isn't really an in-place transpose .... but free data structures from baij */ 1532 ierr = PetscFree(baij->rowners); CHKERRQ(ierr); 1533 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1534 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1535 #if defined (PETSC_USE_CTABLE) 1536 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1537 #else 1538 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1539 #endif 1540 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1541 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1542 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1543 ierr = PetscFree(baij);CHKERRQ(ierr); 1544 1545 /* 1546 This is horrible, horrible code. We need to keep the 1547 A pointers for the bops and ops but copy everything 1548 else from C. 1549 */ 1550 Abops = A->bops; 1551 Aops = A->ops; 1552 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1553 A->bops = Abops; 1554 A->ops = Aops; 1555 1556 PetscHeaderDestroy(B); 1557 } 1558 PetscFunctionReturn(0); 1559 } 1560 1561 #undef __FUNC__ 1562 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1563 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1564 { 1565 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1566 Mat a = baij->A, b = baij->B; 1567 int ierr,s1,s2,s3; 1568 1569 PetscFunctionBegin; 1570 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1571 if (rr) { 1572 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1573 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1574 /* Overlap communication with computation. */ 1575 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1576 } 1577 if (ll) { 1578 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1579 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1580 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1581 } 1582 /* scale the diagonal block */ 1583 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1584 1585 if (rr) { 1586 /* Do a scatter end and then right scale the off-diagonal block */ 1587 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1588 ierr = (*b->ops->diagonalscale)(b,0,baij->lvec);CHKERRQ(ierr); 1589 } 1590 1591 PetscFunctionReturn(0); 1592 } 1593 1594 #undef __FUNC__ 1595 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1596 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1597 { 1598 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1599 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1600 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1601 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1602 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1603 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1604 MPI_Comm comm = A->comm; 1605 MPI_Request *send_waits,*recv_waits; 1606 MPI_Status recv_status,*send_status; 1607 IS istmp; 1608 1609 PetscFunctionBegin; 1610 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 1611 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1612 1613 /* first count number of contributors to each processor */ 1614 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 1615 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1616 procs = nprocs + size; 1617 owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 1618 for ( i=0; i<N; i++ ) { 1619 idx = rows[i]; 1620 found = 0; 1621 for ( j=0; j<size; j++ ) { 1622 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1623 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1624 } 1625 } 1626 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1627 } 1628 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1629 1630 /* inform other processors of number of messages and max length*/ 1631 work = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work); 1632 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1633 nrecvs = work[rank]; 1634 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 1635 nmax = work[rank]; 1636 ierr = PetscFree(work);CHKERRQ(ierr); 1637 1638 /* post receives: */ 1639 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 1640 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1641 for ( i=0; i<nrecvs; i++ ) { 1642 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1643 } 1644 1645 /* do sends: 1646 1) starts[i] gives the starting index in svalues for stuff going to 1647 the ith processor 1648 */ 1649 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 1650 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1651 starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 1652 starts[0] = 0; 1653 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1654 for ( i=0; i<N; i++ ) { 1655 svalues[starts[owner[i]]++] = rows[i]; 1656 } 1657 ISRestoreIndices(is,&rows); 1658 1659 starts[0] = 0; 1660 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1661 count = 0; 1662 for ( i=0; i<size; i++ ) { 1663 if (procs[i]) { 1664 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1665 } 1666 } 1667 ierr = PetscFree(starts);CHKERRQ(ierr); 1668 1669 base = owners[rank]*bs; 1670 1671 /* wait on receives */ 1672 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 1673 source = lens + nrecvs; 1674 count = nrecvs; slen = 0; 1675 while (count) { 1676 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1677 /* unpack receives into our local space */ 1678 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1679 source[imdex] = recv_status.MPI_SOURCE; 1680 lens[imdex] = n; 1681 slen += n; 1682 count--; 1683 } 1684 ierr = PetscFree(recv_waits); CHKERRQ(ierr); 1685 1686 /* move the data into the send scatter */ 1687 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 1688 count = 0; 1689 for ( i=0; i<nrecvs; i++ ) { 1690 values = rvalues + i*nmax; 1691 for ( j=0; j<lens[i]; j++ ) { 1692 lrows[count++] = values[j] - base; 1693 } 1694 } 1695 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1696 ierr = PetscFree(lens);CHKERRQ(ierr); 1697 ierr = PetscFree(owner);CHKERRQ(ierr); 1698 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1699 1700 /* actually zap the local rows */ 1701 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1702 PLogObjectParent(A,istmp); 1703 1704 /* 1705 Zero the required rows. If the "diagonal block" of the matrix 1706 is square and the user wishes to set the diagonal we use seperate 1707 code so that MatSetValues() is not called for each diagonal allocating 1708 new memory, thus calling lots of mallocs and slowing things down. 1709 1710 Contributed by: Mathew Knepley 1711 */ 1712 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1713 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 1714 if (diag && (l->A->M == l->A->N)) { 1715 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 1716 } else if (diag) { 1717 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 1718 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1719 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1720 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1721 } 1722 for ( i = 0; i < slen; i++ ) { 1723 row = lrows[i] + rstart_bs; 1724 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1725 } 1726 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1727 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1728 } else { 1729 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 1730 } 1731 1732 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1733 ierr = PetscFree(lrows);CHKERRQ(ierr); 1734 1735 /* wait on sends */ 1736 if (nsends) { 1737 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1738 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1739 ierr = PetscFree(send_status);CHKERRQ(ierr); 1740 } 1741 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1742 ierr = PetscFree(svalues);CHKERRQ(ierr); 1743 1744 PetscFunctionReturn(0); 1745 } 1746 1747 #undef __FUNC__ 1748 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1749 int MatPrintHelp_MPIBAIJ(Mat A) 1750 { 1751 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1752 MPI_Comm comm = A->comm; 1753 static int called = 0; 1754 int ierr; 1755 1756 PetscFunctionBegin; 1757 if (!a->rank) { 1758 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1759 } 1760 if (called) {PetscFunctionReturn(0);} else called = 1; 1761 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1762 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1763 PetscFunctionReturn(0); 1764 } 1765 1766 #undef __FUNC__ 1767 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1768 int MatSetUnfactored_MPIBAIJ(Mat A) 1769 { 1770 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1771 int ierr; 1772 1773 PetscFunctionBegin; 1774 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1775 PetscFunctionReturn(0); 1776 } 1777 1778 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1779 1780 #undef __FUNC__ 1781 #define __FUNC__ "MatEqual_MPIBAIJ" 1782 int MatEqual_MPIBAIJ(Mat A, Mat B, PetscTruth *flag) 1783 { 1784 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *) B->data,*matA = (Mat_MPIBAIJ *) A->data; 1785 Mat a, b, c, d; 1786 PetscTruth flg; 1787 int ierr; 1788 1789 PetscFunctionBegin; 1790 if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1791 a = matA->A; b = matA->B; 1792 c = matB->A; d = matB->B; 1793 1794 ierr = MatEqual(a, c, &flg);CHKERRQ(ierr); 1795 if (flg == PETSC_TRUE) { 1796 ierr = MatEqual(b, d, &flg);CHKERRQ(ierr); 1797 } 1798 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1799 PetscFunctionReturn(0); 1800 } 1801 1802 /* -------------------------------------------------------------------*/ 1803 static struct _MatOps MatOps_Values = { 1804 MatSetValues_MPIBAIJ, 1805 MatGetRow_MPIBAIJ, 1806 MatRestoreRow_MPIBAIJ, 1807 MatMult_MPIBAIJ, 1808 MatMultAdd_MPIBAIJ, 1809 MatMultTrans_MPIBAIJ, 1810 MatMultTransAdd_MPIBAIJ, 1811 0, 1812 0, 1813 0, 1814 0, 1815 0, 1816 0, 1817 0, 1818 MatTranspose_MPIBAIJ, 1819 MatGetInfo_MPIBAIJ, 1820 MatEqual_MPIBAIJ, 1821 MatGetDiagonal_MPIBAIJ, 1822 MatDiagonalScale_MPIBAIJ, 1823 MatNorm_MPIBAIJ, 1824 MatAssemblyBegin_MPIBAIJ, 1825 MatAssemblyEnd_MPIBAIJ, 1826 0, 1827 MatSetOption_MPIBAIJ, 1828 MatZeroEntries_MPIBAIJ, 1829 MatZeroRows_MPIBAIJ, 1830 0, 1831 0, 1832 0, 1833 0, 1834 MatGetSize_MPIBAIJ, 1835 MatGetLocalSize_MPIBAIJ, 1836 MatGetOwnershipRange_MPIBAIJ, 1837 0, 1838 0, 1839 0, 1840 0, 1841 MatDuplicate_MPIBAIJ, 1842 0, 1843 0, 1844 0, 1845 0, 1846 0, 1847 MatGetSubMatrices_MPIBAIJ, 1848 MatIncreaseOverlap_MPIBAIJ, 1849 MatGetValues_MPIBAIJ, 1850 0, 1851 MatPrintHelp_MPIBAIJ, 1852 MatScale_MPIBAIJ, 1853 0, 1854 0, 1855 0, 1856 MatGetBlockSize_MPIBAIJ, 1857 0, 1858 0, 1859 0, 1860 0, 1861 0, 1862 0, 1863 MatSetUnfactored_MPIBAIJ, 1864 0, 1865 MatSetValuesBlocked_MPIBAIJ, 1866 0, 1867 0, 1868 0, 1869 MatGetMaps_Petsc}; 1870 1871 1872 EXTERN_C_BEGIN 1873 #undef __FUNC__ 1874 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ" 1875 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1876 { 1877 PetscFunctionBegin; 1878 *a = ((Mat_MPIBAIJ *)A->data)->A; 1879 *iscopy = PETSC_FALSE; 1880 PetscFunctionReturn(0); 1881 } 1882 EXTERN_C_END 1883 1884 #undef __FUNC__ 1885 #define __FUNC__ "MatCreateMPIBAIJ" 1886 /*@C 1887 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1888 (block compressed row). For good matrix assembly performance 1889 the user should preallocate the matrix storage by setting the parameters 1890 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1891 performance can be increased by more than a factor of 50. 1892 1893 Collective on MPI_Comm 1894 1895 Input Parameters: 1896 + comm - MPI communicator 1897 . bs - size of blockk 1898 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1899 This value should be the same as the local size used in creating the 1900 y vector for the matrix-vector product y = Ax. 1901 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1902 This value should be the same as the local size used in creating the 1903 x vector for the matrix-vector product y = Ax. 1904 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1905 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1906 . d_nz - number of block nonzeros per block row in diagonal portion of local 1907 submatrix (same for all local rows) 1908 . d_nnz - array containing the number of block nonzeros in the various block rows 1909 of the in diagonal portion of the local (possibly different for each block 1910 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1911 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1912 submatrix (same for all local rows). 1913 - o_nnz - array containing the number of nonzeros in the various block rows of the 1914 off-diagonal portion of the local submatrix (possibly different for 1915 each block row) or PETSC_NULL. 1916 1917 Output Parameter: 1918 . A - the matrix 1919 1920 Options Database Keys: 1921 . -mat_no_unroll - uses code that does not unroll the loops in the 1922 block calculations (much slower) 1923 . -mat_block_size - size of the blocks to use 1924 . -mat_mpi - use the parallel matrix data structures even on one processor 1925 (defaults to using SeqBAIJ format on one processor) 1926 1927 Notes: 1928 The user MUST specify either the local or global matrix dimensions 1929 (possibly both). 1930 1931 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1932 than it must be used on all processors that share the object for that argument. 1933 1934 Storage Information: 1935 For a square global matrix we define each processor's diagonal portion 1936 to be its local rows and the corresponding columns (a square submatrix); 1937 each processor's off-diagonal portion encompasses the remainder of the 1938 local matrix (a rectangular submatrix). 1939 1940 The user can specify preallocated storage for the diagonal part of 1941 the local submatrix with either d_nz or d_nnz (not both). Set 1942 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1943 memory allocation. Likewise, specify preallocated storage for the 1944 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1945 1946 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1947 the figure below we depict these three local rows and all columns (0-11). 1948 1949 .vb 1950 0 1 2 3 4 5 6 7 8 9 10 11 1951 ------------------- 1952 row 3 | o o o d d d o o o o o o 1953 row 4 | o o o d d d o o o o o o 1954 row 5 | o o o d d d o o o o o o 1955 ------------------- 1956 .ve 1957 1958 Thus, any entries in the d locations are stored in the d (diagonal) 1959 submatrix, and any entries in the o locations are stored in the 1960 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1961 stored simply in the MATSEQBAIJ format for compressed row storage. 1962 1963 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1964 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1965 In general, for PDE problems in which most nonzeros are near the diagonal, 1966 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1967 or you will get TERRIBLE performance; see the users' manual chapter on 1968 matrices. 1969 1970 Level: intermediate 1971 1972 .keywords: matrix, block, aij, compressed row, sparse, parallel 1973 1974 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ() 1975 @*/ 1976 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1977 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1978 { 1979 Mat B; 1980 Mat_MPIBAIJ *b; 1981 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg; 1982 int flag1 = 0,flag2 = 0; 1983 1984 PetscFunctionBegin; 1985 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1986 1987 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1988 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -1: value %d",d_nz); 1989 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -1: value %d",o_nz); 1990 if (d_nnz) { 1991 for (i=0; i<m/bs; i++) { 1992 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1993 } 1994 } 1995 if (o_nnz) { 1996 for (i=0; i<m/bs; i++) { 1997 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1998 } 1999 } 2000 2001 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2002 ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr); 2003 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 2004 if (!flag1 && !flag2 && size == 1) { 2005 if (M == PETSC_DECIDE) M = m; 2006 if (N == PETSC_DECIDE) N = n; 2007 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 2008 PetscFunctionReturn(0); 2009 } 2010 2011 *A = 0; 2012 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 2013 PLogObjectCreate(B); 2014 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b); 2015 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2016 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2017 2018 B->ops->destroy = MatDestroy_MPIBAIJ; 2019 B->ops->view = MatView_MPIBAIJ; 2020 B->mapping = 0; 2021 B->factor = 0; 2022 B->assembled = PETSC_FALSE; 2023 2024 B->insertmode = NOT_SET_VALUES; 2025 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 2026 ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr); 2027 2028 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 2029 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 2030 } 2031 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 2032 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 2033 } 2034 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 2035 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 2036 } 2037 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 2038 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 2039 2040 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 2041 work[0] = m; work[1] = n; 2042 mbs = m/bs; nbs = n/bs; 2043 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 2044 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 2045 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 2046 } 2047 if (m == PETSC_DECIDE) { 2048 Mbs = M/bs; 2049 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 2050 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 2051 m = mbs*bs; 2052 } 2053 if (n == PETSC_DECIDE) { 2054 Nbs = N/bs; 2055 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 2056 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 2057 n = nbs*bs; 2058 } 2059 if (mbs*bs != m || nbs*bs != n) { 2060 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 2061 } 2062 2063 b->m = m; B->m = m; 2064 b->n = n; B->n = n; 2065 b->N = N; B->N = N; 2066 b->M = M; B->M = M; 2067 b->bs = bs; 2068 b->bs2 = bs*bs; 2069 b->mbs = mbs; 2070 b->nbs = nbs; 2071 b->Mbs = Mbs; 2072 b->Nbs = Nbs; 2073 2074 /* the information in the maps duplicates the information computed below, eventually 2075 we should remove the duplicate information that is not contained in the maps */ 2076 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 2077 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 2078 2079 /* build local table of row and column ownerships */ 2080 b->rowners = (int *) PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 2081 PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2082 b->cowners = b->rowners + b->size + 2; 2083 b->rowners_bs = b->cowners + b->size + 2; 2084 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2085 b->rowners[0] = 0; 2086 for ( i=2; i<=b->size; i++ ) { 2087 b->rowners[i] += b->rowners[i-1]; 2088 } 2089 for ( i=0; i<=b->size; i++ ) { 2090 b->rowners_bs[i] = b->rowners[i]*bs; 2091 } 2092 b->rstart = b->rowners[b->rank]; 2093 b->rend = b->rowners[b->rank+1]; 2094 b->rstart_bs = b->rstart * bs; 2095 b->rend_bs = b->rend * bs; 2096 2097 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2098 b->cowners[0] = 0; 2099 for ( i=2; i<=b->size; i++ ) { 2100 b->cowners[i] += b->cowners[i-1]; 2101 } 2102 b->cstart = b->cowners[b->rank]; 2103 b->cend = b->cowners[b->rank+1]; 2104 b->cstart_bs = b->cstart * bs; 2105 b->cend_bs = b->cend * bs; 2106 2107 2108 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2109 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2110 PLogObjectParent(B,b->A); 2111 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2112 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2113 PLogObjectParent(B,b->B); 2114 2115 /* build cache for off array entries formed */ 2116 ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr); 2117 ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr); 2118 b->donotstash = 0; 2119 b->colmap = 0; 2120 b->garray = 0; 2121 b->roworiented = 1; 2122 2123 /* stuff used in block assembly */ 2124 b->barray = 0; 2125 2126 /* stuff used for matrix vector multiply */ 2127 b->lvec = 0; 2128 b->Mvctx = 0; 2129 2130 /* stuff for MatGetRow() */ 2131 b->rowindices = 0; 2132 b->rowvalues = 0; 2133 b->getrowactive = PETSC_FALSE; 2134 2135 /* hash table stuff */ 2136 b->ht = 0; 2137 b->hd = 0; 2138 b->ht_size = 0; 2139 b->ht_flag = 0; 2140 b->ht_fact = 0; 2141 b->ht_total_ct = 0; 2142 b->ht_insert_ct = 0; 2143 2144 *A = B; 2145 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2146 if (flg) { 2147 double fact = 1.39; 2148 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2149 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg);CHKERRQ(ierr); 2150 if (fact <= 1.0) fact = 1.39; 2151 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2152 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2153 } 2154 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 2155 "MatStoreValues_MPIBAIJ", 2156 (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2157 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 2158 "MatRetrieveValues_MPIBAIJ", 2159 (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2160 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 2161 "MatGetDiagonalBlock_MPIBAIJ", 2162 (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2163 PetscFunctionReturn(0); 2164 } 2165 2166 #undef __FUNC__ 2167 #define __FUNC__ "MatDuplicate_MPIBAIJ" 2168 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2169 { 2170 Mat mat; 2171 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 2172 int ierr, len=0, flg; 2173 2174 PetscFunctionBegin; 2175 *newmat = 0; 2176 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 2177 PLogObjectCreate(mat); 2178 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a); 2179 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2180 mat->ops->destroy = MatDestroy_MPIBAIJ; 2181 mat->ops->view = MatView_MPIBAIJ; 2182 mat->factor = matin->factor; 2183 mat->assembled = PETSC_TRUE; 2184 mat->insertmode = NOT_SET_VALUES; 2185 2186 a->m = mat->m = oldmat->m; 2187 a->n = mat->n = oldmat->n; 2188 a->M = mat->M = oldmat->M; 2189 a->N = mat->N = oldmat->N; 2190 2191 a->bs = oldmat->bs; 2192 a->bs2 = oldmat->bs2; 2193 a->mbs = oldmat->mbs; 2194 a->nbs = oldmat->nbs; 2195 a->Mbs = oldmat->Mbs; 2196 a->Nbs = oldmat->Nbs; 2197 2198 a->rstart = oldmat->rstart; 2199 a->rend = oldmat->rend; 2200 a->cstart = oldmat->cstart; 2201 a->cend = oldmat->cend; 2202 a->size = oldmat->size; 2203 a->rank = oldmat->rank; 2204 a->donotstash = oldmat->donotstash; 2205 a->roworiented = oldmat->roworiented; 2206 a->rowindices = 0; 2207 a->rowvalues = 0; 2208 a->getrowactive = PETSC_FALSE; 2209 a->barray = 0; 2210 a->rstart_bs = oldmat->rstart_bs; 2211 a->rend_bs = oldmat->rend_bs; 2212 a->cstart_bs = oldmat->cstart_bs; 2213 a->cend_bs = oldmat->cend_bs; 2214 2215 /* hash table stuff */ 2216 a->ht = 0; 2217 a->hd = 0; 2218 a->ht_size = 0; 2219 a->ht_flag = oldmat->ht_flag; 2220 a->ht_fact = oldmat->ht_fact; 2221 a->ht_total_ct = 0; 2222 a->ht_insert_ct = 0; 2223 2224 2225 a->rowners = (int *) PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 2226 PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2227 a->cowners = a->rowners + a->size + 2; 2228 a->rowners_bs = a->cowners + a->size + 2; 2229 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2230 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2231 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2232 if (oldmat->colmap) { 2233 #if defined (PETSC_USE_CTABLE) 2234 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2235 #else 2236 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2237 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2238 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2239 #endif 2240 } else a->colmap = 0; 2241 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 2242 a->garray = (int *) PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray); 2243 PLogObjectMemory(mat,len*sizeof(int)); 2244 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2245 } else a->garray = 0; 2246 2247 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2248 PLogObjectParent(mat,a->lvec); 2249 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2250 2251 PLogObjectParent(mat,a->Mvctx); 2252 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2253 PLogObjectParent(mat,a->A); 2254 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2255 PLogObjectParent(mat,a->B); 2256 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 2257 ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2258 if (flg) { 2259 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 2260 } 2261 *newmat = mat; 2262 PetscFunctionReturn(0); 2263 } 2264 2265 #include "sys.h" 2266 2267 #undef __FUNC__ 2268 #define __FUNC__ "MatLoad_MPIBAIJ" 2269 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2270 { 2271 Mat A; 2272 int i, nz, ierr, j,rstart, rend, fd; 2273 Scalar *vals,*buf; 2274 MPI_Comm comm = ((PetscObject)viewer)->comm; 2275 MPI_Status status; 2276 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2277 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 2278 int flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2279 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2280 int dcount,kmax,k,nzcount,tmp; 2281 2282 PetscFunctionBegin; 2283 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2284 2285 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2286 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2287 if (!rank) { 2288 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2289 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2290 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2291 if (header[3] < 0) { 2292 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2293 } 2294 } 2295 2296 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2297 M = header[1]; N = header[2]; 2298 2299 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2300 2301 /* 2302 This code adds extra rows to make sure the number of rows is 2303 divisible by the blocksize 2304 */ 2305 Mbs = M/bs; 2306 extra_rows = bs - M + bs*(Mbs); 2307 if (extra_rows == bs) extra_rows = 0; 2308 else Mbs++; 2309 if (extra_rows &&!rank) { 2310 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2311 } 2312 2313 /* determine ownership of all rows */ 2314 mbs = Mbs/size + ((Mbs % size) > rank); 2315 m = mbs * bs; 2316 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners); 2317 browners = rowners + size + 1; 2318 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2319 rowners[0] = 0; 2320 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2321 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2322 rstart = rowners[rank]; 2323 rend = rowners[rank+1]; 2324 2325 /* distribute row lengths to all processors */ 2326 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) );CHKPTRQ(locrowlens); 2327 if (!rank) { 2328 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) );CHKPTRQ(rowlengths); 2329 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2330 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2331 sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 2332 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2333 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2334 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2335 } else { 2336 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2337 } 2338 2339 if (!rank) { 2340 /* calculate the number of nonzeros on each processor */ 2341 procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 2342 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2343 for ( i=0; i<size; i++ ) { 2344 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2345 procsnz[i] += rowlengths[j]; 2346 } 2347 } 2348 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2349 2350 /* determine max buffer needed and allocate it */ 2351 maxnz = 0; 2352 for ( i=0; i<size; i++ ) { 2353 maxnz = PetscMax(maxnz,procsnz[i]); 2354 } 2355 cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 2356 2357 /* read in my part of the matrix column indices */ 2358 nz = procsnz[0]; 2359 ibuf = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf); 2360 mycols = ibuf; 2361 if (size == 1) nz -= extra_rows; 2362 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2363 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2364 2365 /* read in every ones (except the last) and ship off */ 2366 for ( i=1; i<size-1; i++ ) { 2367 nz = procsnz[i]; 2368 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2369 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2370 } 2371 /* read in the stuff for the last proc */ 2372 if ( size != 1 ) { 2373 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2374 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2375 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2376 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2377 } 2378 ierr = PetscFree(cols);CHKERRQ(ierr); 2379 } else { 2380 /* determine buffer space needed for message */ 2381 nz = 0; 2382 for ( i=0; i<m; i++ ) { 2383 nz += locrowlens[i]; 2384 } 2385 ibuf = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf); 2386 mycols = ibuf; 2387 /* receive message of column indices*/ 2388 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2389 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2390 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2391 } 2392 2393 /* loop over local rows, determining number of off diagonal entries */ 2394 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) );CHKPTRQ(dlens); 2395 odlens = dlens + (rend-rstart); 2396 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) );CHKPTRQ(mask); 2397 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2398 masked1 = mask + Mbs; 2399 masked2 = masked1 + Mbs; 2400 rowcount = 0; nzcount = 0; 2401 for ( i=0; i<mbs; i++ ) { 2402 dcount = 0; 2403 odcount = 0; 2404 for ( j=0; j<bs; j++ ) { 2405 kmax = locrowlens[rowcount]; 2406 for ( k=0; k<kmax; k++ ) { 2407 tmp = mycols[nzcount++]/bs; 2408 if (!mask[tmp]) { 2409 mask[tmp] = 1; 2410 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2411 else masked1[dcount++] = tmp; 2412 } 2413 } 2414 rowcount++; 2415 } 2416 2417 dlens[i] = dcount; 2418 odlens[i] = odcount; 2419 2420 /* zero out the mask elements we set */ 2421 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2422 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2423 } 2424 2425 /* create our matrix */ 2426 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 2427 A = *newmat; 2428 MatSetOption(A,MAT_COLUMNS_SORTED); 2429 2430 if (!rank) { 2431 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(buf); 2432 /* read in my part of the matrix numerical values */ 2433 nz = procsnz[0]; 2434 vals = buf; 2435 mycols = ibuf; 2436 if (size == 1) nz -= extra_rows; 2437 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2438 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2439 2440 /* insert into matrix */ 2441 jj = rstart*bs; 2442 for ( i=0; i<m; i++ ) { 2443 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2444 mycols += locrowlens[i]; 2445 vals += locrowlens[i]; 2446 jj++; 2447 } 2448 /* read in other processors (except the last one) and ship out */ 2449 for ( i=1; i<size-1; i++ ) { 2450 nz = procsnz[i]; 2451 vals = buf; 2452 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2453 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2454 } 2455 /* the last proc */ 2456 if ( size != 1 ){ 2457 nz = procsnz[i] - extra_rows; 2458 vals = buf; 2459 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2460 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2461 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2462 } 2463 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2464 } else { 2465 /* receive numeric values */ 2466 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(buf); 2467 2468 /* receive message of values*/ 2469 vals = buf; 2470 mycols = ibuf; 2471 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2472 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2473 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2474 2475 /* insert into matrix */ 2476 jj = rstart*bs; 2477 for ( i=0; i<m; i++ ) { 2478 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2479 mycols += locrowlens[i]; 2480 vals += locrowlens[i]; 2481 jj++; 2482 } 2483 } 2484 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2485 ierr = PetscFree(buf);CHKERRQ(ierr); 2486 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2487 ierr = PetscFree(rowners);CHKERRQ(ierr); 2488 ierr = PetscFree(dlens);CHKERRQ(ierr); 2489 ierr = PetscFree(mask);CHKERRQ(ierr); 2490 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2491 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2492 PetscFunctionReturn(0); 2493 } 2494 2495 #undef __FUNC__ 2496 #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2497 /*@ 2498 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2499 2500 Input Parameters: 2501 . mat - the matrix 2502 . fact - factor 2503 2504 Collective on Mat 2505 2506 Level: advanced 2507 2508 Notes: 2509 This can also be set by the command line option: -mat_use_hash_table fact 2510 2511 .keywords: matrix, hashtable, factor, HT 2512 2513 .seealso: MatSetOption() 2514 @*/ 2515 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact) 2516 { 2517 Mat_MPIBAIJ *baij; 2518 2519 PetscFunctionBegin; 2520 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2521 if (mat->type != MATMPIBAIJ) { 2522 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2523 } 2524 baij = (Mat_MPIBAIJ*) mat->data; 2525 baij->ht_fact = fact; 2526 PetscFunctionReturn(0); 2527 } 2528