1 /*$Id: mpibaij.c,v 1.183 1999/11/05 14:45:37 bsmith Exp bsmith $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "mat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 6 extern int MatSetUpMultiply_MPIBAIJ(Mat); 7 extern int DisAssemble_MPIBAIJ(Mat); 8 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 9 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 10 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 11 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 12 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 13 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 14 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 15 extern int MatPrintHelp_SeqBAIJ(Mat); 16 17 EXTERN_C_BEGIN 18 #undef __FUNC__ 19 #define __FUNC__ "MatStoreValues_MPIBAIJ" 20 int MatStoreValues_MPIBAIJ(Mat mat) 21 { 22 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 23 int ierr; 24 25 PetscFunctionBegin; 26 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 27 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 28 PetscFunctionReturn(0); 29 } 30 EXTERN_C_END 31 32 EXTERN_C_BEGIN 33 #undef __FUNC__ 34 #define __FUNC__ "MatRetrieveValues_MPIBAIJ" 35 int MatRetrieveValues_MPIBAIJ(Mat mat) 36 { 37 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 38 int ierr; 39 40 PetscFunctionBegin; 41 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 42 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 43 PetscFunctionReturn(0); 44 } 45 EXTERN_C_END 46 47 /* 48 Local utility routine that creates a mapping from the global column 49 number to the local number in the off-diagonal part of the local 50 storage of the matrix. This is done in a non scable way since the 51 length of colmap equals the global matrix length. 52 */ 53 #undef __FUNC__ 54 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 55 static int CreateColmap_MPIBAIJ_Private(Mat mat) 56 { 57 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 58 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 59 int nbs = B->nbs,i,bs=B->bs,ierr; 60 61 PetscFunctionBegin; 62 #if defined (PETSC_USE_CTABLE) 63 ierr = PetscTableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr); 64 for ( i=0; i<nbs; i++ ){ 65 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 66 } 67 #else 68 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 69 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 70 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 71 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 72 #endif 73 PetscFunctionReturn(0); 74 } 75 76 #define CHUNKSIZE 10 77 78 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 79 { \ 80 \ 81 brow = row/bs; \ 82 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 83 rmax = aimax[brow]; nrow = ailen[brow]; \ 84 bcol = col/bs; \ 85 ridx = row % bs; cidx = col % bs; \ 86 low = 0; high = nrow; \ 87 while (high-low > 3) { \ 88 t = (low+high)/2; \ 89 if (rp[t] > bcol) high = t; \ 90 else low = t; \ 91 } \ 92 for ( _i=low; _i<high; _i++ ) { \ 93 if (rp[_i] > bcol) break; \ 94 if (rp[_i] == bcol) { \ 95 bap = ap + bs2*_i + bs*cidx + ridx; \ 96 if (addv == ADD_VALUES) *bap += value; \ 97 else *bap = value; \ 98 goto a_noinsert; \ 99 } \ 100 } \ 101 if (a->nonew == 1) goto a_noinsert; \ 102 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 103 if (nrow >= rmax) { \ 104 /* there is no extra room in row, therefore enlarge */ \ 105 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 106 Scalar *new_a; \ 107 \ 108 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 109 \ 110 /* malloc new storage space */ \ 111 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 112 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 113 new_j = (int *) (new_a + bs2*new_nz); \ 114 new_i = new_j + new_nz; \ 115 \ 116 /* copy over old data into new slots */ \ 117 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 118 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 119 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 120 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 121 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 122 len*sizeof(int));CHKERRQ(ierr); \ 123 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \ 124 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 125 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 126 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr); \ 127 /* free up old matrix storage */ \ 128 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 129 if (!a->singlemalloc) { \ 130 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 131 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 132 } \ 133 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 134 a->singlemalloc = 1; \ 135 \ 136 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 137 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 138 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 139 a->maxnz += bs2*CHUNKSIZE; \ 140 a->reallocs++; \ 141 a->nz++; \ 142 } \ 143 N = nrow++ - 1; \ 144 /* shift up all the later entries in this row */ \ 145 for ( ii=N; ii>=_i; ii-- ) { \ 146 rp[ii+1] = rp[ii]; \ 147 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \ 148 } \ 149 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr); } \ 150 rp[_i] = bcol; \ 151 ap[bs2*_i + bs*cidx + ridx] = value; \ 152 a_noinsert:; \ 153 ailen[brow] = nrow; \ 154 } 155 156 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 157 { \ 158 \ 159 brow = row/bs; \ 160 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 161 rmax = bimax[brow]; nrow = bilen[brow]; \ 162 bcol = col/bs; \ 163 ridx = row % bs; cidx = col % bs; \ 164 low = 0; high = nrow; \ 165 while (high-low > 3) { \ 166 t = (low+high)/2; \ 167 if (rp[t] > bcol) high = t; \ 168 else low = t; \ 169 } \ 170 for ( _i=low; _i<high; _i++ ) { \ 171 if (rp[_i] > bcol) break; \ 172 if (rp[_i] == bcol) { \ 173 bap = ap + bs2*_i + bs*cidx + ridx; \ 174 if (addv == ADD_VALUES) *bap += value; \ 175 else *bap = value; \ 176 goto b_noinsert; \ 177 } \ 178 } \ 179 if (b->nonew == 1) goto b_noinsert; \ 180 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 181 if (nrow >= rmax) { \ 182 /* there is no extra room in row, therefore enlarge */ \ 183 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 184 Scalar *new_a; \ 185 \ 186 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 187 \ 188 /* malloc new storage space */ \ 189 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 190 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 191 new_j = (int *) (new_a + bs2*new_nz); \ 192 new_i = new_j + new_nz; \ 193 \ 194 /* copy over old data into new slots */ \ 195 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 196 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 197 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 198 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 199 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 200 len*sizeof(int));CHKERRQ(ierr); \ 201 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \ 202 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 203 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 204 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr); \ 205 /* free up old matrix storage */ \ 206 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 207 if (!b->singlemalloc) { \ 208 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 209 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 210 } \ 211 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 212 b->singlemalloc = 1; \ 213 \ 214 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 215 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 216 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 217 b->maxnz += bs2*CHUNKSIZE; \ 218 b->reallocs++; \ 219 b->nz++; \ 220 } \ 221 N = nrow++ - 1; \ 222 /* shift up all the later entries in this row */ \ 223 for ( ii=N; ii>=_i; ii-- ) { \ 224 rp[ii+1] = rp[ii]; \ 225 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \ 226 } \ 227 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr);} \ 228 rp[_i] = bcol; \ 229 ap[bs2*_i + bs*cidx + ridx] = value; \ 230 b_noinsert:; \ 231 bilen[brow] = nrow; \ 232 } 233 234 #undef __FUNC__ 235 #define __FUNC__ "MatSetValues_MPIBAIJ" 236 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 237 { 238 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 239 Scalar value; 240 int ierr,i,j,row,col; 241 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 242 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 243 int cend_orig=baij->cend_bs,bs=baij->bs; 244 245 /* Some Variables required in the macro */ 246 Mat A = baij->A; 247 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 248 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 249 Scalar *aa=a->a; 250 251 Mat B = baij->B; 252 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 253 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 254 Scalar *ba=b->a; 255 256 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 257 int low,high,t,ridx,cidx,bs2=a->bs2; 258 Scalar *ap,*bap; 259 260 PetscFunctionBegin; 261 for ( i=0; i<m; i++ ) { 262 if (im[i] < 0) continue; 263 #if defined(PETSC_USE_BOPT_g) 264 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 265 #endif 266 if (im[i] >= rstart_orig && im[i] < rend_orig) { 267 row = im[i] - rstart_orig; 268 for ( j=0; j<n; j++ ) { 269 if (in[j] >= cstart_orig && in[j] < cend_orig){ 270 col = in[j] - cstart_orig; 271 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 272 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 273 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 274 } else if (in[j] < 0) continue; 275 #if defined(PETSC_USE_BOPT_g) 276 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 277 #endif 278 else { 279 if (mat->was_assembled) { 280 if (!baij->colmap) { 281 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 282 } 283 #if defined (PETSC_USE_CTABLE) 284 ierr = PetscTableFind(baij->colmap, in[j]/bs + 1,&col);CHKERRQ(ierr); 285 col = col - 1 + in[j]%bs; 286 #else 287 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 288 #endif 289 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 290 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 291 col = in[j]; 292 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 293 B = baij->B; 294 b = (Mat_SeqBAIJ *) (B)->data; 295 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 296 ba=b->a; 297 } 298 } else col = in[j]; 299 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 300 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 301 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 302 } 303 } 304 } else { 305 if ( !baij->donotstash) { 306 if (roworiented) { 307 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 308 } else { 309 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 310 } 311 } 312 } 313 } 314 PetscFunctionReturn(0); 315 } 316 317 #undef __FUNC__ 318 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 319 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 320 { 321 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 322 Scalar *value,*barray=baij->barray; 323 int ierr,i,j,ii,jj,row,col; 324 int roworiented = baij->roworiented,rstart=baij->rstart ; 325 int rend=baij->rend,cstart=baij->cstart,stepval; 326 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 327 328 PetscFunctionBegin; 329 if(!barray) { 330 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar));CHKPTRQ(barray); 331 } 332 333 if (roworiented) { 334 stepval = (n-1)*bs; 335 } else { 336 stepval = (m-1)*bs; 337 } 338 for ( i=0; i<m; i++ ) { 339 if (im[i] < 0) continue; 340 #if defined(PETSC_USE_BOPT_g) 341 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 342 #endif 343 if (im[i] >= rstart && im[i] < rend) { 344 row = im[i] - rstart; 345 for ( j=0; j<n; j++ ) { 346 /* If NumCol = 1 then a copy is not required */ 347 if ((roworiented) && (n == 1)) { 348 barray = v + i*bs2; 349 } else if((!roworiented) && (m == 1)) { 350 barray = v + j*bs2; 351 } else { /* Here a copy is required */ 352 if (roworiented) { 353 value = v + i*(stepval+bs)*bs + j*bs; 354 } else { 355 value = v + j*(stepval+bs)*bs + i*bs; 356 } 357 for ( ii=0; ii<bs; ii++,value+=stepval ) { 358 for (jj=0; jj<bs; jj++ ) { 359 *barray++ = *value++; 360 } 361 } 362 barray -=bs2; 363 } 364 365 if (in[j] >= cstart && in[j] < cend){ 366 col = in[j] - cstart; 367 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 368 } 369 else if (in[j] < 0) continue; 370 #if defined(PETSC_USE_BOPT_g) 371 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 372 #endif 373 else { 374 if (mat->was_assembled) { 375 if (!baij->colmap) { 376 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 377 } 378 379 #if defined(PETSC_USE_BOPT_g) 380 #if defined (PETSC_USE_CTABLE) 381 { int data; 382 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 383 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap"); 384 } 385 #else 386 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap"); 387 #endif 388 #endif 389 #if defined (PETSC_USE_CTABLE) 390 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 391 col = (col - 1)/bs; 392 #else 393 col = (baij->colmap[in[j]] - 1)/bs; 394 #endif 395 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 396 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 397 col = in[j]; 398 } 399 } 400 else col = in[j]; 401 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 402 } 403 } 404 } else { 405 if (!baij->donotstash) { 406 if (roworiented) { 407 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 408 } else { 409 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 410 } 411 } 412 } 413 } 414 PetscFunctionReturn(0); 415 } 416 #define HASH_KEY 0.6180339887 417 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 418 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 419 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 420 #undef __FUNC__ 421 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 422 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 423 { 424 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 425 int ierr,i,j,row,col; 426 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 427 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 428 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 429 double tmp; 430 Scalar ** HD = baij->hd,value; 431 #if defined(PETSC_USE_BOPT_g) 432 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 433 #endif 434 435 PetscFunctionBegin; 436 437 for ( i=0; i<m; i++ ) { 438 #if defined(PETSC_USE_BOPT_g) 439 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 440 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 441 #endif 442 row = im[i]; 443 if (row >= rstart_orig && row < rend_orig) { 444 for ( j=0; j<n; j++ ) { 445 col = in[j]; 446 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 447 /* Look up into the Hash Table */ 448 key = (row/bs)*Nbs+(col/bs)+1; 449 h1 = HASH(size,key,tmp); 450 451 452 idx = h1; 453 #if defined(PETSC_USE_BOPT_g) 454 insert_ct++; 455 total_ct++; 456 if (HT[idx] != key) { 457 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 458 if (idx == size) { 459 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 460 if (idx == h1) { 461 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 462 } 463 } 464 } 465 #else 466 if (HT[idx] != key) { 467 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 468 if (idx == size) { 469 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 470 if (idx == h1) { 471 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 472 } 473 } 474 } 475 #endif 476 /* A HASH table entry is found, so insert the values at the correct address */ 477 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 478 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 479 } 480 } else { 481 if (!baij->donotstash) { 482 if (roworiented) { 483 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 484 } else { 485 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 486 } 487 } 488 } 489 } 490 #if defined(PETSC_USE_BOPT_g) 491 baij->ht_total_ct = total_ct; 492 baij->ht_insert_ct = insert_ct; 493 #endif 494 PetscFunctionReturn(0); 495 } 496 497 #undef __FUNC__ 498 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 499 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 500 { 501 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 502 int ierr,i,j,ii,jj,row,col; 503 int roworiented = baij->roworiented,rstart=baij->rstart ; 504 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 505 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 506 double tmp; 507 Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 508 #if defined(PETSC_USE_BOPT_g) 509 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 510 #endif 511 512 PetscFunctionBegin; 513 514 if (roworiented) { 515 stepval = (n-1)*bs; 516 } else { 517 stepval = (m-1)*bs; 518 } 519 for ( i=0; i<m; i++ ) { 520 #if defined(PETSC_USE_BOPT_g) 521 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 522 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 523 #endif 524 row = im[i]; 525 v_t = v + i*bs2; 526 if (row >= rstart && row < rend) { 527 for ( j=0; j<n; j++ ) { 528 col = in[j]; 529 530 /* Look up into the Hash Table */ 531 key = row*Nbs+col+1; 532 h1 = HASH(size,key,tmp); 533 534 idx = h1; 535 #if defined(PETSC_USE_BOPT_g) 536 total_ct++; 537 insert_ct++; 538 if (HT[idx] != key) { 539 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 540 if (idx == size) { 541 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 542 if (idx == h1) { 543 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 544 } 545 } 546 } 547 #else 548 if (HT[idx] != key) { 549 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 550 if (idx == size) { 551 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 552 if (idx == h1) { 553 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 554 } 555 } 556 } 557 #endif 558 baij_a = HD[idx]; 559 if (roworiented) { 560 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 561 /* value = v + (i*(stepval+bs)+j)*bs; */ 562 value = v_t; 563 v_t += bs; 564 if (addv == ADD_VALUES) { 565 for ( ii=0; ii<bs; ii++,value+=stepval) { 566 for ( jj=ii; jj<bs2; jj+=bs ) { 567 baij_a[jj] += *value++; 568 } 569 } 570 } else { 571 for ( ii=0; ii<bs; ii++,value+=stepval) { 572 for ( jj=ii; jj<bs2; jj+=bs ) { 573 baij_a[jj] = *value++; 574 } 575 } 576 } 577 } else { 578 value = v + j*(stepval+bs)*bs + i*bs; 579 if (addv == ADD_VALUES) { 580 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 581 for ( jj=0; jj<bs; jj++ ) { 582 baij_a[jj] += *value++; 583 } 584 } 585 } else { 586 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 587 for ( jj=0; jj<bs; jj++ ) { 588 baij_a[jj] = *value++; 589 } 590 } 591 } 592 } 593 } 594 } else { 595 if (!baij->donotstash) { 596 if (roworiented) { 597 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 598 } else { 599 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 600 } 601 } 602 } 603 } 604 #if defined(PETSC_USE_BOPT_g) 605 baij->ht_total_ct = total_ct; 606 baij->ht_insert_ct = insert_ct; 607 #endif 608 PetscFunctionReturn(0); 609 } 610 611 #undef __FUNC__ 612 #define __FUNC__ "MatGetValues_MPIBAIJ" 613 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 614 { 615 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 616 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 617 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data; 618 619 PetscFunctionBegin; 620 for ( i=0; i<m; i++ ) { 621 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 622 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 623 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 624 row = idxm[i] - bsrstart; 625 for ( j=0; j<n; j++ ) { 626 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 627 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 628 if (idxn[j] >= bscstart && idxn[j] < bscend){ 629 col = idxn[j] - bscstart; 630 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 631 } else { 632 if (!baij->colmap) { 633 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 634 } 635 #if defined (PETSC_USE_CTABLE) 636 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 637 data --; 638 #else 639 data = baij->colmap[idxn[j]/bs]-1; 640 #endif 641 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 642 else { 643 col = data + idxn[j]%bs; 644 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 645 } 646 } 647 } 648 } else { 649 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 650 } 651 } 652 PetscFunctionReturn(0); 653 } 654 655 #undef __FUNC__ 656 #define __FUNC__ "MatNorm_MPIBAIJ" 657 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 658 { 659 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 660 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 661 int ierr, i,bs2=baij->bs2; 662 double sum = 0.0; 663 Scalar *v; 664 665 PetscFunctionBegin; 666 if (baij->size == 1) { 667 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 668 } else { 669 if (type == NORM_FROBENIUS) { 670 v = amat->a; 671 for (i=0; i<amat->nz*bs2; i++ ) { 672 #if defined(PETSC_USE_COMPLEX) 673 sum += PetscReal(PetscConj(*v)*(*v)); v++; 674 #else 675 sum += (*v)*(*v); v++; 676 #endif 677 } 678 v = bmat->a; 679 for (i=0; i<bmat->nz*bs2; i++ ) { 680 #if defined(PETSC_USE_COMPLEX) 681 sum += PetscReal(PetscConj(*v)*(*v)); v++; 682 #else 683 sum += (*v)*(*v); v++; 684 #endif 685 } 686 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 687 *norm = sqrt(*norm); 688 } else { 689 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 690 } 691 } 692 PetscFunctionReturn(0); 693 } 694 695 696 /* 697 Creates the hash table, and sets the table 698 This table is created only once. 699 If new entried need to be added to the matrix 700 then the hash table has to be destroyed and 701 recreated. 702 */ 703 #undef __FUNC__ 704 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 705 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 706 { 707 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 708 Mat A = baij->A, B=baij->B; 709 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 710 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 711 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 712 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 713 int *HT,key; 714 Scalar **HD; 715 double tmp; 716 #if defined(PETSC_USE_BOPT_g) 717 int ct=0,max=0; 718 #endif 719 720 PetscFunctionBegin; 721 baij->ht_size=(int)(factor*nz); 722 size = baij->ht_size; 723 724 if (baij->ht) { 725 PetscFunctionReturn(0); 726 } 727 728 /* Allocate Memory for Hash Table */ 729 baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1);CHKPTRQ(baij->hd); 730 baij->ht = (int*)(baij->hd + size); 731 HD = baij->hd; 732 HT = baij->ht; 733 734 735 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr); 736 737 738 /* Loop Over A */ 739 for ( i=0; i<a->mbs; i++ ) { 740 for ( j=ai[i]; j<ai[i+1]; j++ ) { 741 row = i+rstart; 742 col = aj[j]+cstart; 743 744 key = row*Nbs + col + 1; 745 h1 = HASH(size,key,tmp); 746 for ( k=0; k<size; k++ ){ 747 if (HT[(h1+k)%size] == 0.0) { 748 HT[(h1+k)%size] = key; 749 HD[(h1+k)%size] = a->a + j*bs2; 750 break; 751 #if defined(PETSC_USE_BOPT_g) 752 } else { 753 ct++; 754 #endif 755 } 756 } 757 #if defined(PETSC_USE_BOPT_g) 758 if (k> max) max = k; 759 #endif 760 } 761 } 762 /* Loop Over B */ 763 for ( i=0; i<b->mbs; i++ ) { 764 for ( j=bi[i]; j<bi[i+1]; j++ ) { 765 row = i+rstart; 766 col = garray[bj[j]]; 767 key = row*Nbs + col + 1; 768 h1 = HASH(size,key,tmp); 769 for ( k=0; k<size; k++ ){ 770 if (HT[(h1+k)%size] == 0.0) { 771 HT[(h1+k)%size] = key; 772 HD[(h1+k)%size] = b->a + j*bs2; 773 break; 774 #if defined(PETSC_USE_BOPT_g) 775 } else { 776 ct++; 777 #endif 778 } 779 } 780 #if defined(PETSC_USE_BOPT_g) 781 if (k> max) max = k; 782 #endif 783 } 784 } 785 786 /* Print Summary */ 787 #if defined(PETSC_USE_BOPT_g) 788 for ( i=0,j=0; i<size; i++) { 789 if (HT[i]) {j++;} 790 } 791 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((double)(ct+j))/j,max); 792 #endif 793 PetscFunctionReturn(0); 794 } 795 796 #undef __FUNC__ 797 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 798 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 799 { 800 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 801 int ierr,nstash,reallocs; 802 InsertMode addv; 803 804 PetscFunctionBegin; 805 if (baij->donotstash) { 806 PetscFunctionReturn(0); 807 } 808 809 /* make sure all processors are either in INSERTMODE or ADDMODE */ 810 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 811 if (addv == (ADD_VALUES|INSERT_VALUES)) { 812 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 813 } 814 mat->insertmode = addv; /* in case this processor had no cache */ 815 816 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 817 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 818 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 819 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 820 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 821 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 822 PetscFunctionReturn(0); 823 } 824 825 #undef __FUNC__ 826 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 827 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 828 { 829 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data; 830 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 831 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 832 int *row,*col,other_disassembled,r1,r2,r3; 833 Scalar *val; 834 InsertMode addv = mat->insertmode; 835 836 PetscFunctionBegin; 837 if (!baij->donotstash) { 838 while (1) { 839 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 840 if (!flg) break; 841 842 for ( i=0; i<n; ) { 843 /* Now identify the consecutive vals belonging to the same row */ 844 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 845 if (j < n) ncols = j-i; 846 else ncols = n-i; 847 /* Now assemble all these values with a single function call */ 848 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 849 i = j; 850 } 851 } 852 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 853 /* Now process the block-stash. Since the values are stashed column-oriented, 854 set the roworiented flag to column oriented, and after MatSetValues() 855 restore the original flags */ 856 r1 = baij->roworiented; 857 r2 = a->roworiented; 858 r3 = b->roworiented; 859 baij->roworiented = 0; 860 a->roworiented = 0; 861 b->roworiented = 0; 862 while (1) { 863 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 864 if (!flg) break; 865 866 for ( i=0; i<n; ) { 867 /* Now identify the consecutive vals belonging to the same row */ 868 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 869 if (j < n) ncols = j-i; 870 else ncols = n-i; 871 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 872 i = j; 873 } 874 } 875 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 876 baij->roworiented = r1; 877 a->roworiented = r2; 878 b->roworiented = r3; 879 } 880 881 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 882 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 883 884 /* determine if any processor has disassembled, if so we must 885 also disassemble ourselfs, in order that we may reassemble. */ 886 /* 887 if nonzero structure of submatrix B cannot change then we know that 888 no processor disassembled thus we can skip this stuff 889 */ 890 if (!((Mat_SeqBAIJ*) baij->B->data)->nonew) { 891 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 892 if (mat->was_assembled && !other_disassembled) { 893 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 894 } 895 } 896 897 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 898 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 899 } 900 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 901 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 902 903 #if defined(PETSC_USE_BOPT_g) 904 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 905 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct); 906 baij->ht_total_ct = 0; 907 baij->ht_insert_ct = 0; 908 } 909 #endif 910 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 911 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 912 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 913 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 914 } 915 916 if (baij->rowvalues) { 917 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 918 baij->rowvalues = 0; 919 } 920 PetscFunctionReturn(0); 921 } 922 923 /* 924 #undef __FUNC__ 925 #define __FUNC__ "MatView_MPIBAIJ_Binary" 926 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 927 { 928 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 929 int ierr; 930 931 PetscFunctionBegin; 932 if (baij->size == 1) { 933 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 934 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 935 PetscFunctionReturn(0); 936 } 937 */ 938 939 #undef __FUNC__ 940 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 941 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 942 { 943 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 944 int ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank; 945 PetscTruth isascii,isdraw; 946 947 PetscFunctionBegin; 948 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 949 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 950 if (isascii) { 951 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 952 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 953 MatInfo info; 954 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 955 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 956 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 957 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 958 baij->bs,(int)info.memory);CHKERRQ(ierr); 959 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 960 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 961 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 962 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 963 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 964 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 965 PetscFunctionReturn(0); 966 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 967 ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 968 PetscFunctionReturn(0); 969 } 970 } 971 972 if (isdraw) { 973 Draw draw; 974 PetscTruth isnull; 975 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 976 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 977 } 978 979 if (size == 1) { 980 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 981 } else { 982 /* assemble the entire matrix onto first processor. */ 983 Mat A; 984 Mat_SeqBAIJ *Aloc; 985 int M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals; 986 int mbs = baij->mbs; 987 Scalar *a; 988 989 if (!rank) { 990 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 991 } else { 992 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 993 } 994 PLogObjectParent(mat,A); 995 996 /* copy over the A part */ 997 Aloc = (Mat_SeqBAIJ*) baij->A->data; 998 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 999 rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1000 1001 for ( i=0; i<mbs; i++ ) { 1002 rvals[0] = bs*(baij->rstart + i); 1003 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1004 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1005 col = (baij->cstart+aj[j])*bs; 1006 for (k=0; k<bs; k++ ) { 1007 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1008 col++; a += bs; 1009 } 1010 } 1011 } 1012 /* copy over the B part */ 1013 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1014 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1015 for ( i=0; i<mbs; i++ ) { 1016 rvals[0] = bs*(baij->rstart + i); 1017 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1018 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1019 col = baij->garray[aj[j]]*bs; 1020 for (k=0; k<bs; k++ ) { 1021 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1022 col++; a += bs; 1023 } 1024 } 1025 } 1026 ierr = PetscFree(rvals);CHKERRQ(ierr); 1027 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1028 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1029 /* 1030 Everyone has to call to draw the matrix since the graphics waits are 1031 synchronized across all processors that share the Draw object 1032 */ 1033 if (!rank) { 1034 Viewer sviewer; 1035 ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1036 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1037 ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1038 } 1039 ierr = MatDestroy(A);CHKERRQ(ierr); 1040 } 1041 PetscFunctionReturn(0); 1042 } 1043 1044 1045 1046 #undef __FUNC__ 1047 #define __FUNC__ "MatView_MPIBAIJ" 1048 int MatView_MPIBAIJ(Mat mat,Viewer viewer) 1049 { 1050 int ierr; 1051 PetscTruth isascii,isdraw,issocket,isbinary; 1052 1053 PetscFunctionBegin; 1054 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 1055 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 1056 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 1057 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 1058 if (isascii || isdraw || issocket || isbinary) { 1059 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1060 } else { 1061 SETERRQ1(1,1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1062 } 1063 PetscFunctionReturn(0); 1064 } 1065 1066 #undef __FUNC__ 1067 #define __FUNC__ "MatDestroy_MPIBAIJ" 1068 int MatDestroy_MPIBAIJ(Mat mat) 1069 { 1070 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1071 int ierr; 1072 1073 PetscFunctionBegin; 1074 1075 if (mat->mapping) { 1076 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 1077 } 1078 if (mat->bmapping) { 1079 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 1080 } 1081 if (mat->rmap) { 1082 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 1083 } 1084 if (mat->cmap) { 1085 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 1086 } 1087 #if defined(PETSC_USE_LOG) 1088 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N); 1089 #endif 1090 1091 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1092 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1093 1094 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1095 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1096 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1097 #if defined (PETSC_USE_CTABLE) 1098 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1099 #else 1100 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1101 #endif 1102 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1103 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1104 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1105 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1106 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1107 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1108 ierr = PetscFree(baij);CHKERRQ(ierr); 1109 PLogObjectDestroy(mat); 1110 PetscHeaderDestroy(mat); 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNC__ 1115 #define __FUNC__ "MatMult_MPIBAIJ" 1116 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1117 { 1118 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1119 int ierr, nt; 1120 1121 PetscFunctionBegin; 1122 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1123 if (nt != a->n) { 1124 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1125 } 1126 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1127 if (nt != a->m) { 1128 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1129 } 1130 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1131 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1132 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1133 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1134 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1135 PetscFunctionReturn(0); 1136 } 1137 1138 #undef __FUNC__ 1139 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1140 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1141 { 1142 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1143 int ierr; 1144 1145 PetscFunctionBegin; 1146 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1147 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1148 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1149 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1150 PetscFunctionReturn(0); 1151 } 1152 1153 #undef __FUNC__ 1154 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1155 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1156 { 1157 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1158 int ierr; 1159 1160 PetscFunctionBegin; 1161 /* do nondiagonal part */ 1162 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 1163 /* send it on its way */ 1164 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1165 /* do local part */ 1166 ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr); 1167 /* receive remote parts: note this assumes the values are not actually */ 1168 /* inserted in yy until the next line, which is true for my implementation*/ 1169 /* but is not perhaps always true. */ 1170 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1171 PetscFunctionReturn(0); 1172 } 1173 1174 #undef __FUNC__ 1175 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1176 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1177 { 1178 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1179 int ierr; 1180 1181 PetscFunctionBegin; 1182 /* do nondiagonal part */ 1183 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 1184 /* send it on its way */ 1185 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1186 /* do local part */ 1187 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1188 /* receive remote parts: note this assumes the values are not actually */ 1189 /* inserted in yy until the next line, which is true for my implementation*/ 1190 /* but is not perhaps always true. */ 1191 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1192 PetscFunctionReturn(0); 1193 } 1194 1195 /* 1196 This only works correctly for square matrices where the subblock A->A is the 1197 diagonal block 1198 */ 1199 #undef __FUNC__ 1200 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1201 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1202 { 1203 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1204 int ierr; 1205 1206 PetscFunctionBegin; 1207 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1208 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1209 PetscFunctionReturn(0); 1210 } 1211 1212 #undef __FUNC__ 1213 #define __FUNC__ "MatScale_MPIBAIJ" 1214 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1215 { 1216 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1217 int ierr; 1218 1219 PetscFunctionBegin; 1220 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1221 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1222 PetscFunctionReturn(0); 1223 } 1224 1225 #undef __FUNC__ 1226 #define __FUNC__ "MatGetSize_MPIBAIJ" 1227 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1228 { 1229 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1230 1231 PetscFunctionBegin; 1232 if (m) *m = mat->M; 1233 if (n) *n = mat->N; 1234 PetscFunctionReturn(0); 1235 } 1236 1237 #undef __FUNC__ 1238 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1239 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1240 { 1241 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1242 1243 PetscFunctionBegin; 1244 *m = mat->m; *n = mat->n; 1245 PetscFunctionReturn(0); 1246 } 1247 1248 #undef __FUNC__ 1249 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1250 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1251 { 1252 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1253 1254 PetscFunctionBegin; 1255 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #undef __FUNC__ 1260 #define __FUNC__ "MatGetRow_MPIBAIJ" 1261 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1262 { 1263 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1264 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1265 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1266 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1267 int *cmap, *idx_p,cstart = mat->cstart; 1268 1269 PetscFunctionBegin; 1270 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1271 mat->getrowactive = PETSC_TRUE; 1272 1273 if (!mat->rowvalues && (idx || v)) { 1274 /* 1275 allocate enough space to hold information from the longest row. 1276 */ 1277 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1278 int max = 1,mbs = mat->mbs,tmp; 1279 for ( i=0; i<mbs; i++ ) { 1280 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1281 if (max < tmp) { max = tmp; } 1282 } 1283 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1284 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1285 } 1286 1287 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1288 lrow = row - brstart; 1289 1290 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1291 if (!v) {pvA = 0; pvB = 0;} 1292 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1293 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1294 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1295 nztot = nzA + nzB; 1296 1297 cmap = mat->garray; 1298 if (v || idx) { 1299 if (nztot) { 1300 /* Sort by increasing column numbers, assuming A and B already sorted */ 1301 int imark = -1; 1302 if (v) { 1303 *v = v_p = mat->rowvalues; 1304 for ( i=0; i<nzB; i++ ) { 1305 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1306 else break; 1307 } 1308 imark = i; 1309 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1310 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1311 } 1312 if (idx) { 1313 *idx = idx_p = mat->rowindices; 1314 if (imark > -1) { 1315 for ( i=0; i<imark; i++ ) { 1316 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1317 } 1318 } else { 1319 for ( i=0; i<nzB; i++ ) { 1320 if (cmap[cworkB[i]/bs] < cstart) 1321 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1322 else break; 1323 } 1324 imark = i; 1325 } 1326 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1327 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1328 } 1329 } else { 1330 if (idx) *idx = 0; 1331 if (v) *v = 0; 1332 } 1333 } 1334 *nz = nztot; 1335 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1336 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1337 PetscFunctionReturn(0); 1338 } 1339 1340 #undef __FUNC__ 1341 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1342 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1343 { 1344 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1345 1346 PetscFunctionBegin; 1347 if (baij->getrowactive == PETSC_FALSE) { 1348 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1349 } 1350 baij->getrowactive = PETSC_FALSE; 1351 PetscFunctionReturn(0); 1352 } 1353 1354 #undef __FUNC__ 1355 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1356 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1357 { 1358 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1359 1360 PetscFunctionBegin; 1361 *bs = baij->bs; 1362 PetscFunctionReturn(0); 1363 } 1364 1365 #undef __FUNC__ 1366 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1367 int MatZeroEntries_MPIBAIJ(Mat A) 1368 { 1369 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1370 int ierr; 1371 1372 PetscFunctionBegin; 1373 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1374 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1375 PetscFunctionReturn(0); 1376 } 1377 1378 #undef __FUNC__ 1379 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1380 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1381 { 1382 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1383 Mat A = a->A, B = a->B; 1384 int ierr; 1385 double isend[5], irecv[5]; 1386 1387 PetscFunctionBegin; 1388 info->block_size = (double)a->bs; 1389 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1390 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1391 isend[3] = info->memory; isend[4] = info->mallocs; 1392 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1393 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1394 isend[3] += info->memory; isend[4] += info->mallocs; 1395 if (flag == MAT_LOCAL) { 1396 info->nz_used = isend[0]; 1397 info->nz_allocated = isend[1]; 1398 info->nz_unneeded = isend[2]; 1399 info->memory = isend[3]; 1400 info->mallocs = isend[4]; 1401 } else if (flag == MAT_GLOBAL_MAX) { 1402 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1403 info->nz_used = irecv[0]; 1404 info->nz_allocated = irecv[1]; 1405 info->nz_unneeded = irecv[2]; 1406 info->memory = irecv[3]; 1407 info->mallocs = irecv[4]; 1408 } else if (flag == MAT_GLOBAL_SUM) { 1409 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1410 info->nz_used = irecv[0]; 1411 info->nz_allocated = irecv[1]; 1412 info->nz_unneeded = irecv[2]; 1413 info->memory = irecv[3]; 1414 info->mallocs = irecv[4]; 1415 } else { 1416 SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 1417 } 1418 info->rows_global = (double)a->M; 1419 info->columns_global = (double)a->N; 1420 info->rows_local = (double)a->m; 1421 info->columns_local = (double)a->N; 1422 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1423 info->fill_ratio_needed = 0; 1424 info->factor_mallocs = 0; 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNC__ 1429 #define __FUNC__ "MatSetOption_MPIBAIJ" 1430 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1431 { 1432 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1433 int ierr; 1434 1435 PetscFunctionBegin; 1436 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1437 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1438 op == MAT_COLUMNS_UNSORTED || 1439 op == MAT_COLUMNS_SORTED || 1440 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1441 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1442 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1443 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1444 } else if (op == MAT_ROW_ORIENTED) { 1445 a->roworiented = 1; 1446 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1447 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1448 } else if (op == MAT_ROWS_SORTED || 1449 op == MAT_ROWS_UNSORTED || 1450 op == MAT_SYMMETRIC || 1451 op == MAT_STRUCTURALLY_SYMMETRIC || 1452 op == MAT_YES_NEW_DIAGONALS || 1453 op == MAT_USE_HASH_TABLE) { 1454 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1455 } else if (op == MAT_COLUMN_ORIENTED) { 1456 a->roworiented = 0; 1457 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1458 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1459 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1460 a->donotstash = 1; 1461 } else if (op == MAT_NO_NEW_DIAGONALS) { 1462 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1463 } else if (op == MAT_USE_HASH_TABLE) { 1464 a->ht_flag = 1; 1465 } else { 1466 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1467 } 1468 PetscFunctionReturn(0); 1469 } 1470 1471 #undef __FUNC__ 1472 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1473 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1474 { 1475 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1476 Mat_SeqBAIJ *Aloc; 1477 Mat B; 1478 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1479 int bs=baij->bs,mbs=baij->mbs; 1480 Scalar *a; 1481 1482 PetscFunctionBegin; 1483 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1484 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1485 CHKERRQ(ierr); 1486 1487 /* copy over the A part */ 1488 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1489 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1490 rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1491 1492 for ( i=0; i<mbs; i++ ) { 1493 rvals[0] = bs*(baij->rstart + i); 1494 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1495 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1496 col = (baij->cstart+aj[j])*bs; 1497 for (k=0; k<bs; k++ ) { 1498 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1499 col++; a += bs; 1500 } 1501 } 1502 } 1503 /* copy over the B part */ 1504 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1505 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1506 for ( i=0; i<mbs; i++ ) { 1507 rvals[0] = bs*(baij->rstart + i); 1508 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1509 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1510 col = baij->garray[aj[j]]*bs; 1511 for (k=0; k<bs; k++ ) { 1512 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1513 col++; a += bs; 1514 } 1515 } 1516 } 1517 ierr = PetscFree(rvals);CHKERRQ(ierr); 1518 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1519 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1520 1521 if (matout != PETSC_NULL) { 1522 *matout = B; 1523 } else { 1524 PetscOps *Abops; 1525 MatOps Aops; 1526 1527 /* This isn't really an in-place transpose .... but free data structures from baij */ 1528 ierr = PetscFree(baij->rowners); CHKERRQ(ierr); 1529 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1530 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1531 #if defined (PETSC_USE_CTABLE) 1532 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1533 #else 1534 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1535 #endif 1536 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1537 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1538 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1539 ierr = PetscFree(baij);CHKERRQ(ierr); 1540 1541 /* 1542 This is horrible, horrible code. We need to keep the 1543 A pointers for the bops and ops but copy everything 1544 else from C. 1545 */ 1546 Abops = A->bops; 1547 Aops = A->ops; 1548 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1549 A->bops = Abops; 1550 A->ops = Aops; 1551 1552 PetscHeaderDestroy(B); 1553 } 1554 PetscFunctionReturn(0); 1555 } 1556 1557 #undef __FUNC__ 1558 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1559 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1560 { 1561 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1562 Mat a = baij->A, b = baij->B; 1563 int ierr,s1,s2,s3; 1564 1565 PetscFunctionBegin; 1566 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1567 if (rr) { 1568 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1569 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1570 /* Overlap communication with computation. */ 1571 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1572 } 1573 if (ll) { 1574 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1575 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1576 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1577 } 1578 /* scale the diagonal block */ 1579 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1580 1581 if (rr) { 1582 /* Do a scatter end and then right scale the off-diagonal block */ 1583 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1584 ierr = (*b->ops->diagonalscale)(b,0,baij->lvec);CHKERRQ(ierr); 1585 } 1586 1587 PetscFunctionReturn(0); 1588 } 1589 1590 #undef __FUNC__ 1591 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1592 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1593 { 1594 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1595 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1596 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1597 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1598 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1599 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1600 MPI_Comm comm = A->comm; 1601 MPI_Request *send_waits,*recv_waits; 1602 MPI_Status recv_status,*send_status; 1603 IS istmp; 1604 1605 PetscFunctionBegin; 1606 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 1607 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1608 1609 /* first count number of contributors to each processor */ 1610 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 1611 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1612 procs = nprocs + size; 1613 owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 1614 for ( i=0; i<N; i++ ) { 1615 idx = rows[i]; 1616 found = 0; 1617 for ( j=0; j<size; j++ ) { 1618 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1619 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1620 } 1621 } 1622 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1623 } 1624 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1625 1626 /* inform other processors of number of messages and max length*/ 1627 work = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(work); 1628 ierr = MPI_Allreduce( nprocs, work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 1629 nmax = work[rank]; 1630 nrecvs = work[size+rank]; 1631 ierr = PetscFree(work);CHKERRQ(ierr); 1632 1633 /* post receives: */ 1634 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 1635 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1636 for ( i=0; i<nrecvs; i++ ) { 1637 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1638 } 1639 1640 /* do sends: 1641 1) starts[i] gives the starting index in svalues for stuff going to 1642 the ith processor 1643 */ 1644 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 1645 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1646 starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 1647 starts[0] = 0; 1648 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1649 for ( i=0; i<N; i++ ) { 1650 svalues[starts[owner[i]]++] = rows[i]; 1651 } 1652 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1653 1654 starts[0] = 0; 1655 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1656 count = 0; 1657 for ( i=0; i<size; i++ ) { 1658 if (procs[i]) { 1659 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1660 } 1661 } 1662 ierr = PetscFree(starts);CHKERRQ(ierr); 1663 1664 base = owners[rank]*bs; 1665 1666 /* wait on receives */ 1667 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 1668 source = lens + nrecvs; 1669 count = nrecvs; slen = 0; 1670 while (count) { 1671 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1672 /* unpack receives into our local space */ 1673 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1674 source[imdex] = recv_status.MPI_SOURCE; 1675 lens[imdex] = n; 1676 slen += n; 1677 count--; 1678 } 1679 ierr = PetscFree(recv_waits); CHKERRQ(ierr); 1680 1681 /* move the data into the send scatter */ 1682 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 1683 count = 0; 1684 for ( i=0; i<nrecvs; i++ ) { 1685 values = rvalues + i*nmax; 1686 for ( j=0; j<lens[i]; j++ ) { 1687 lrows[count++] = values[j] - base; 1688 } 1689 } 1690 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1691 ierr = PetscFree(lens);CHKERRQ(ierr); 1692 ierr = PetscFree(owner);CHKERRQ(ierr); 1693 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1694 1695 /* actually zap the local rows */ 1696 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1697 PLogObjectParent(A,istmp); 1698 1699 /* 1700 Zero the required rows. If the "diagonal block" of the matrix 1701 is square and the user wishes to set the diagonal we use seperate 1702 code so that MatSetValues() is not called for each diagonal allocating 1703 new memory, thus calling lots of mallocs and slowing things down. 1704 1705 Contributed by: Mathew Knepley 1706 */ 1707 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1708 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 1709 if (diag && (l->A->M == l->A->N)) { 1710 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 1711 } else if (diag) { 1712 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 1713 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1714 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1715 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1716 } 1717 for ( i = 0; i < slen; i++ ) { 1718 row = lrows[i] + rstart_bs; 1719 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1720 } 1721 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1722 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1723 } else { 1724 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 1725 } 1726 1727 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1728 ierr = PetscFree(lrows);CHKERRQ(ierr); 1729 1730 /* wait on sends */ 1731 if (nsends) { 1732 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1733 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1734 ierr = PetscFree(send_status);CHKERRQ(ierr); 1735 } 1736 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1737 ierr = PetscFree(svalues);CHKERRQ(ierr); 1738 1739 PetscFunctionReturn(0); 1740 } 1741 1742 #undef __FUNC__ 1743 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1744 int MatPrintHelp_MPIBAIJ(Mat A) 1745 { 1746 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1747 MPI_Comm comm = A->comm; 1748 static int called = 0; 1749 int ierr; 1750 1751 PetscFunctionBegin; 1752 if (!a->rank) { 1753 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1754 } 1755 if (called) {PetscFunctionReturn(0);} else called = 1; 1756 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1757 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1758 PetscFunctionReturn(0); 1759 } 1760 1761 #undef __FUNC__ 1762 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1763 int MatSetUnfactored_MPIBAIJ(Mat A) 1764 { 1765 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1766 int ierr; 1767 1768 PetscFunctionBegin; 1769 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1770 PetscFunctionReturn(0); 1771 } 1772 1773 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1774 1775 #undef __FUNC__ 1776 #define __FUNC__ "MatEqual_MPIBAIJ" 1777 int MatEqual_MPIBAIJ(Mat A, Mat B, PetscTruth *flag) 1778 { 1779 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *) B->data,*matA = (Mat_MPIBAIJ *) A->data; 1780 Mat a, b, c, d; 1781 PetscTruth flg; 1782 int ierr; 1783 1784 PetscFunctionBegin; 1785 if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1786 a = matA->A; b = matA->B; 1787 c = matB->A; d = matB->B; 1788 1789 ierr = MatEqual(a, c, &flg);CHKERRQ(ierr); 1790 if (flg == PETSC_TRUE) { 1791 ierr = MatEqual(b, d, &flg);CHKERRQ(ierr); 1792 } 1793 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1794 PetscFunctionReturn(0); 1795 } 1796 1797 /* -------------------------------------------------------------------*/ 1798 static struct _MatOps MatOps_Values = { 1799 MatSetValues_MPIBAIJ, 1800 MatGetRow_MPIBAIJ, 1801 MatRestoreRow_MPIBAIJ, 1802 MatMult_MPIBAIJ, 1803 MatMultAdd_MPIBAIJ, 1804 MatMultTrans_MPIBAIJ, 1805 MatMultTransAdd_MPIBAIJ, 1806 0, 1807 0, 1808 0, 1809 0, 1810 0, 1811 0, 1812 0, 1813 MatTranspose_MPIBAIJ, 1814 MatGetInfo_MPIBAIJ, 1815 MatEqual_MPIBAIJ, 1816 MatGetDiagonal_MPIBAIJ, 1817 MatDiagonalScale_MPIBAIJ, 1818 MatNorm_MPIBAIJ, 1819 MatAssemblyBegin_MPIBAIJ, 1820 MatAssemblyEnd_MPIBAIJ, 1821 0, 1822 MatSetOption_MPIBAIJ, 1823 MatZeroEntries_MPIBAIJ, 1824 MatZeroRows_MPIBAIJ, 1825 0, 1826 0, 1827 0, 1828 0, 1829 MatGetSize_MPIBAIJ, 1830 MatGetLocalSize_MPIBAIJ, 1831 MatGetOwnershipRange_MPIBAIJ, 1832 0, 1833 0, 1834 0, 1835 0, 1836 MatDuplicate_MPIBAIJ, 1837 0, 1838 0, 1839 0, 1840 0, 1841 0, 1842 MatGetSubMatrices_MPIBAIJ, 1843 MatIncreaseOverlap_MPIBAIJ, 1844 MatGetValues_MPIBAIJ, 1845 0, 1846 MatPrintHelp_MPIBAIJ, 1847 MatScale_MPIBAIJ, 1848 0, 1849 0, 1850 0, 1851 MatGetBlockSize_MPIBAIJ, 1852 0, 1853 0, 1854 0, 1855 0, 1856 0, 1857 0, 1858 MatSetUnfactored_MPIBAIJ, 1859 0, 1860 MatSetValuesBlocked_MPIBAIJ, 1861 0, 1862 0, 1863 0, 1864 MatGetMaps_Petsc}; 1865 1866 1867 EXTERN_C_BEGIN 1868 #undef __FUNC__ 1869 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ" 1870 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1871 { 1872 PetscFunctionBegin; 1873 *a = ((Mat_MPIBAIJ *)A->data)->A; 1874 *iscopy = PETSC_FALSE; 1875 PetscFunctionReturn(0); 1876 } 1877 EXTERN_C_END 1878 1879 #undef __FUNC__ 1880 #define __FUNC__ "MatCreateMPIBAIJ" 1881 /*@C 1882 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1883 (block compressed row). For good matrix assembly performance 1884 the user should preallocate the matrix storage by setting the parameters 1885 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1886 performance can be increased by more than a factor of 50. 1887 1888 Collective on MPI_Comm 1889 1890 Input Parameters: 1891 + comm - MPI communicator 1892 . bs - size of blockk 1893 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1894 This value should be the same as the local size used in creating the 1895 y vector for the matrix-vector product y = Ax. 1896 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1897 This value should be the same as the local size used in creating the 1898 x vector for the matrix-vector product y = Ax. 1899 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1900 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1901 . d_nz - number of block nonzeros per block row in diagonal portion of local 1902 submatrix (same for all local rows) 1903 . d_nnz - array containing the number of block nonzeros in the various block rows 1904 of the in diagonal portion of the local (possibly different for each block 1905 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1906 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1907 submatrix (same for all local rows). 1908 - o_nnz - array containing the number of nonzeros in the various block rows of the 1909 off-diagonal portion of the local submatrix (possibly different for 1910 each block row) or PETSC_NULL. 1911 1912 Output Parameter: 1913 . A - the matrix 1914 1915 Options Database Keys: 1916 . -mat_no_unroll - uses code that does not unroll the loops in the 1917 block calculations (much slower) 1918 . -mat_block_size - size of the blocks to use 1919 . -mat_mpi - use the parallel matrix data structures even on one processor 1920 (defaults to using SeqBAIJ format on one processor) 1921 1922 Notes: 1923 The user MUST specify either the local or global matrix dimensions 1924 (possibly both). 1925 1926 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1927 than it must be used on all processors that share the object for that argument. 1928 1929 Storage Information: 1930 For a square global matrix we define each processor's diagonal portion 1931 to be its local rows and the corresponding columns (a square submatrix); 1932 each processor's off-diagonal portion encompasses the remainder of the 1933 local matrix (a rectangular submatrix). 1934 1935 The user can specify preallocated storage for the diagonal part of 1936 the local submatrix with either d_nz or d_nnz (not both). Set 1937 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1938 memory allocation. Likewise, specify preallocated storage for the 1939 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1940 1941 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1942 the figure below we depict these three local rows and all columns (0-11). 1943 1944 .vb 1945 0 1 2 3 4 5 6 7 8 9 10 11 1946 ------------------- 1947 row 3 | o o o d d d o o o o o o 1948 row 4 | o o o d d d o o o o o o 1949 row 5 | o o o d d d o o o o o o 1950 ------------------- 1951 .ve 1952 1953 Thus, any entries in the d locations are stored in the d (diagonal) 1954 submatrix, and any entries in the o locations are stored in the 1955 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1956 stored simply in the MATSEQBAIJ format for compressed row storage. 1957 1958 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1959 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1960 In general, for PDE problems in which most nonzeros are near the diagonal, 1961 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1962 or you will get TERRIBLE performance; see the users' manual chapter on 1963 matrices. 1964 1965 Level: intermediate 1966 1967 .keywords: matrix, block, aij, compressed row, sparse, parallel 1968 1969 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ() 1970 @*/ 1971 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1972 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1973 { 1974 Mat B; 1975 Mat_MPIBAIJ *b; 1976 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 1977 PetscTruth flag1,flag2,flg; 1978 1979 PetscFunctionBegin; 1980 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1981 1982 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1983 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -1: value %d",d_nz); 1984 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -1: value %d",o_nz); 1985 if (d_nnz) { 1986 for (i=0; i<m/bs; i++) { 1987 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1988 } 1989 } 1990 if (o_nnz) { 1991 for (i=0; i<m/bs; i++) { 1992 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1993 } 1994 } 1995 1996 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1997 ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr); 1998 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 1999 if (!flag1 && !flag2 && size == 1) { 2000 if (M == PETSC_DECIDE) M = m; 2001 if (N == PETSC_DECIDE) N = n; 2002 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 2003 PetscFunctionReturn(0); 2004 } 2005 2006 *A = 0; 2007 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 2008 PLogObjectCreate(B); 2009 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b); 2010 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2011 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2012 2013 B->ops->destroy = MatDestroy_MPIBAIJ; 2014 B->ops->view = MatView_MPIBAIJ; 2015 B->mapping = 0; 2016 B->factor = 0; 2017 B->assembled = PETSC_FALSE; 2018 2019 B->insertmode = NOT_SET_VALUES; 2020 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 2021 ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr); 2022 2023 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 2024 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 2025 } 2026 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 2027 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 2028 } 2029 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 2030 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 2031 } 2032 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 2033 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 2034 2035 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 2036 work[0] = m; work[1] = n; 2037 mbs = m/bs; nbs = n/bs; 2038 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 2039 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 2040 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 2041 } 2042 if (m == PETSC_DECIDE) { 2043 Mbs = M/bs; 2044 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 2045 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 2046 m = mbs*bs; 2047 } 2048 if (n == PETSC_DECIDE) { 2049 Nbs = N/bs; 2050 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 2051 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 2052 n = nbs*bs; 2053 } 2054 if (mbs*bs != m || nbs*bs != n) { 2055 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 2056 } 2057 2058 b->m = m; B->m = m; 2059 b->n = n; B->n = n; 2060 b->N = N; B->N = N; 2061 b->M = M; B->M = M; 2062 b->bs = bs; 2063 b->bs2 = bs*bs; 2064 b->mbs = mbs; 2065 b->nbs = nbs; 2066 b->Mbs = Mbs; 2067 b->Nbs = Nbs; 2068 2069 /* the information in the maps duplicates the information computed below, eventually 2070 we should remove the duplicate information that is not contained in the maps */ 2071 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 2072 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 2073 2074 /* build local table of row and column ownerships */ 2075 b->rowners = (int *) PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 2076 PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2077 b->cowners = b->rowners + b->size + 2; 2078 b->rowners_bs = b->cowners + b->size + 2; 2079 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2080 b->rowners[0] = 0; 2081 for ( i=2; i<=b->size; i++ ) { 2082 b->rowners[i] += b->rowners[i-1]; 2083 } 2084 for ( i=0; i<=b->size; i++ ) { 2085 b->rowners_bs[i] = b->rowners[i]*bs; 2086 } 2087 b->rstart = b->rowners[b->rank]; 2088 b->rend = b->rowners[b->rank+1]; 2089 b->rstart_bs = b->rstart * bs; 2090 b->rend_bs = b->rend * bs; 2091 2092 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2093 b->cowners[0] = 0; 2094 for ( i=2; i<=b->size; i++ ) { 2095 b->cowners[i] += b->cowners[i-1]; 2096 } 2097 b->cstart = b->cowners[b->rank]; 2098 b->cend = b->cowners[b->rank+1]; 2099 b->cstart_bs = b->cstart * bs; 2100 b->cend_bs = b->cend * bs; 2101 2102 2103 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2104 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2105 PLogObjectParent(B,b->A); 2106 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2107 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2108 PLogObjectParent(B,b->B); 2109 2110 /* build cache for off array entries formed */ 2111 ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr); 2112 ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr); 2113 b->donotstash = 0; 2114 b->colmap = 0; 2115 b->garray = 0; 2116 b->roworiented = 1; 2117 2118 /* stuff used in block assembly */ 2119 b->barray = 0; 2120 2121 /* stuff used for matrix vector multiply */ 2122 b->lvec = 0; 2123 b->Mvctx = 0; 2124 2125 /* stuff for MatGetRow() */ 2126 b->rowindices = 0; 2127 b->rowvalues = 0; 2128 b->getrowactive = PETSC_FALSE; 2129 2130 /* hash table stuff */ 2131 b->ht = 0; 2132 b->hd = 0; 2133 b->ht_size = 0; 2134 b->ht_flag = 0; 2135 b->ht_fact = 0; 2136 b->ht_total_ct = 0; 2137 b->ht_insert_ct = 0; 2138 2139 *A = B; 2140 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2141 if (flg) { 2142 double fact = 1.39; 2143 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2144 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2145 if (fact <= 1.0) fact = 1.39; 2146 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2147 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2148 } 2149 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2150 "MatStoreValues_MPIBAIJ", 2151 (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2152 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2153 "MatRetrieveValues_MPIBAIJ", 2154 (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2155 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2156 "MatGetDiagonalBlock_MPIBAIJ", 2157 (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2158 PetscFunctionReturn(0); 2159 } 2160 2161 #undef __FUNC__ 2162 #define __FUNC__ "MatDuplicate_MPIBAIJ" 2163 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2164 { 2165 Mat mat; 2166 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 2167 int ierr, len=0; 2168 PetscTruth flg; 2169 2170 PetscFunctionBegin; 2171 *newmat = 0; 2172 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 2173 PLogObjectCreate(mat); 2174 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a); 2175 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2176 mat->ops->destroy = MatDestroy_MPIBAIJ; 2177 mat->ops->view = MatView_MPIBAIJ; 2178 mat->factor = matin->factor; 2179 mat->assembled = PETSC_TRUE; 2180 mat->insertmode = NOT_SET_VALUES; 2181 2182 a->m = mat->m = oldmat->m; 2183 a->n = mat->n = oldmat->n; 2184 a->M = mat->M = oldmat->M; 2185 a->N = mat->N = oldmat->N; 2186 2187 a->bs = oldmat->bs; 2188 a->bs2 = oldmat->bs2; 2189 a->mbs = oldmat->mbs; 2190 a->nbs = oldmat->nbs; 2191 a->Mbs = oldmat->Mbs; 2192 a->Nbs = oldmat->Nbs; 2193 2194 a->rstart = oldmat->rstart; 2195 a->rend = oldmat->rend; 2196 a->cstart = oldmat->cstart; 2197 a->cend = oldmat->cend; 2198 a->size = oldmat->size; 2199 a->rank = oldmat->rank; 2200 a->donotstash = oldmat->donotstash; 2201 a->roworiented = oldmat->roworiented; 2202 a->rowindices = 0; 2203 a->rowvalues = 0; 2204 a->getrowactive = PETSC_FALSE; 2205 a->barray = 0; 2206 a->rstart_bs = oldmat->rstart_bs; 2207 a->rend_bs = oldmat->rend_bs; 2208 a->cstart_bs = oldmat->cstart_bs; 2209 a->cend_bs = oldmat->cend_bs; 2210 2211 /* hash table stuff */ 2212 a->ht = 0; 2213 a->hd = 0; 2214 a->ht_size = 0; 2215 a->ht_flag = oldmat->ht_flag; 2216 a->ht_fact = oldmat->ht_fact; 2217 a->ht_total_ct = 0; 2218 a->ht_insert_ct = 0; 2219 2220 2221 a->rowners = (int *) PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 2222 PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2223 a->cowners = a->rowners + a->size + 2; 2224 a->rowners_bs = a->cowners + a->size + 2; 2225 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2226 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2227 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2228 if (oldmat->colmap) { 2229 #if defined (PETSC_USE_CTABLE) 2230 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2231 #else 2232 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2233 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2234 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2235 #endif 2236 } else a->colmap = 0; 2237 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 2238 a->garray = (int *) PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray); 2239 PLogObjectMemory(mat,len*sizeof(int)); 2240 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2241 } else a->garray = 0; 2242 2243 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2244 PLogObjectParent(mat,a->lvec); 2245 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2246 2247 PLogObjectParent(mat,a->Mvctx); 2248 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2249 PLogObjectParent(mat,a->A); 2250 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2251 PLogObjectParent(mat,a->B); 2252 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 2253 ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2254 if (flg) { 2255 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 2256 } 2257 *newmat = mat; 2258 PetscFunctionReturn(0); 2259 } 2260 2261 #include "sys.h" 2262 2263 #undef __FUNC__ 2264 #define __FUNC__ "MatLoad_MPIBAIJ" 2265 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2266 { 2267 Mat A; 2268 int i, nz, ierr, j,rstart, rend, fd; 2269 Scalar *vals,*buf; 2270 MPI_Comm comm = ((PetscObject)viewer)->comm; 2271 MPI_Status status; 2272 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2273 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 2274 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2275 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2276 int dcount,kmax,k,nzcount,tmp; 2277 2278 PetscFunctionBegin; 2279 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2280 2281 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2282 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2283 if (!rank) { 2284 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2285 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2286 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2287 if (header[3] < 0) { 2288 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2289 } 2290 } 2291 2292 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2293 M = header[1]; N = header[2]; 2294 2295 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2296 2297 /* 2298 This code adds extra rows to make sure the number of rows is 2299 divisible by the blocksize 2300 */ 2301 Mbs = M/bs; 2302 extra_rows = bs - M + bs*(Mbs); 2303 if (extra_rows == bs) extra_rows = 0; 2304 else Mbs++; 2305 if (extra_rows &&!rank) { 2306 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2307 } 2308 2309 /* determine ownership of all rows */ 2310 mbs = Mbs/size + ((Mbs % size) > rank); 2311 m = mbs * bs; 2312 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners); 2313 browners = rowners + size + 1; 2314 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2315 rowners[0] = 0; 2316 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2317 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2318 rstart = rowners[rank]; 2319 rend = rowners[rank+1]; 2320 2321 /* distribute row lengths to all processors */ 2322 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) );CHKPTRQ(locrowlens); 2323 if (!rank) { 2324 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) );CHKPTRQ(rowlengths); 2325 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2326 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2327 sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 2328 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2329 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2330 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2331 } else { 2332 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2333 } 2334 2335 if (!rank) { 2336 /* calculate the number of nonzeros on each processor */ 2337 procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 2338 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2339 for ( i=0; i<size; i++ ) { 2340 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2341 procsnz[i] += rowlengths[j]; 2342 } 2343 } 2344 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2345 2346 /* determine max buffer needed and allocate it */ 2347 maxnz = 0; 2348 for ( i=0; i<size; i++ ) { 2349 maxnz = PetscMax(maxnz,procsnz[i]); 2350 } 2351 cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 2352 2353 /* read in my part of the matrix column indices */ 2354 nz = procsnz[0]; 2355 ibuf = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf); 2356 mycols = ibuf; 2357 if (size == 1) nz -= extra_rows; 2358 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2359 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2360 2361 /* read in every ones (except the last) and ship off */ 2362 for ( i=1; i<size-1; i++ ) { 2363 nz = procsnz[i]; 2364 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2365 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2366 } 2367 /* read in the stuff for the last proc */ 2368 if ( size != 1 ) { 2369 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2370 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2371 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2372 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2373 } 2374 ierr = PetscFree(cols);CHKERRQ(ierr); 2375 } else { 2376 /* determine buffer space needed for message */ 2377 nz = 0; 2378 for ( i=0; i<m; i++ ) { 2379 nz += locrowlens[i]; 2380 } 2381 ibuf = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf); 2382 mycols = ibuf; 2383 /* receive message of column indices*/ 2384 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2385 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2386 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2387 } 2388 2389 /* loop over local rows, determining number of off diagonal entries */ 2390 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) );CHKPTRQ(dlens); 2391 odlens = dlens + (rend-rstart); 2392 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) );CHKPTRQ(mask); 2393 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2394 masked1 = mask + Mbs; 2395 masked2 = masked1 + Mbs; 2396 rowcount = 0; nzcount = 0; 2397 for ( i=0; i<mbs; i++ ) { 2398 dcount = 0; 2399 odcount = 0; 2400 for ( j=0; j<bs; j++ ) { 2401 kmax = locrowlens[rowcount]; 2402 for ( k=0; k<kmax; k++ ) { 2403 tmp = mycols[nzcount++]/bs; 2404 if (!mask[tmp]) { 2405 mask[tmp] = 1; 2406 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2407 else masked1[dcount++] = tmp; 2408 } 2409 } 2410 rowcount++; 2411 } 2412 2413 dlens[i] = dcount; 2414 odlens[i] = odcount; 2415 2416 /* zero out the mask elements we set */ 2417 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2418 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2419 } 2420 2421 /* create our matrix */ 2422 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 2423 A = *newmat; 2424 MatSetOption(A,MAT_COLUMNS_SORTED); 2425 2426 if (!rank) { 2427 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(buf); 2428 /* read in my part of the matrix numerical values */ 2429 nz = procsnz[0]; 2430 vals = buf; 2431 mycols = ibuf; 2432 if (size == 1) nz -= extra_rows; 2433 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2434 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2435 2436 /* insert into matrix */ 2437 jj = rstart*bs; 2438 for ( i=0; i<m; i++ ) { 2439 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2440 mycols += locrowlens[i]; 2441 vals += locrowlens[i]; 2442 jj++; 2443 } 2444 /* read in other processors (except the last one) and ship out */ 2445 for ( i=1; i<size-1; i++ ) { 2446 nz = procsnz[i]; 2447 vals = buf; 2448 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2449 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2450 } 2451 /* the last proc */ 2452 if ( size != 1 ){ 2453 nz = procsnz[i] - extra_rows; 2454 vals = buf; 2455 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2456 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2457 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2458 } 2459 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2460 } else { 2461 /* receive numeric values */ 2462 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(buf); 2463 2464 /* receive message of values*/ 2465 vals = buf; 2466 mycols = ibuf; 2467 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2468 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2469 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2470 2471 /* insert into matrix */ 2472 jj = rstart*bs; 2473 for ( i=0; i<m; i++ ) { 2474 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2475 mycols += locrowlens[i]; 2476 vals += locrowlens[i]; 2477 jj++; 2478 } 2479 } 2480 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2481 ierr = PetscFree(buf);CHKERRQ(ierr); 2482 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2483 ierr = PetscFree(rowners);CHKERRQ(ierr); 2484 ierr = PetscFree(dlens);CHKERRQ(ierr); 2485 ierr = PetscFree(mask);CHKERRQ(ierr); 2486 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2487 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2488 PetscFunctionReturn(0); 2489 } 2490 2491 #undef __FUNC__ 2492 #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2493 /*@ 2494 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2495 2496 Input Parameters: 2497 . mat - the matrix 2498 . fact - factor 2499 2500 Collective on Mat 2501 2502 Level: advanced 2503 2504 Notes: 2505 This can also be set by the command line option: -mat_use_hash_table fact 2506 2507 .keywords: matrix, hashtable, factor, HT 2508 2509 .seealso: MatSetOption() 2510 @*/ 2511 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact) 2512 { 2513 Mat_MPIBAIJ *baij; 2514 2515 PetscFunctionBegin; 2516 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2517 if (mat->type != MATMPIBAIJ) { 2518 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2519 } 2520 baij = (Mat_MPIBAIJ*) mat->data; 2521 baij->ht_fact = fact; 2522 PetscFunctionReturn(0); 2523 } 2524