1 /*$Id: mpisbaij.c,v 1.23 2000/09/27 21:14:30 balay Exp bsmith $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 #include "mpisbaij.h" 6 #include "src/mat/impls/sbaij/seq/sbaij.h" 7 8 extern int MatSetUpMultiply_MPISBAIJ(Mat); 9 extern int DisAssemble_MPISBAIJ(Mat); 10 extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS *,int); 11 extern int MatGetSubMatrices_MPISBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 12 extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *); 13 extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 14 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 15 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**); 16 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**); 17 extern int MatPrintHelp_SeqSBAIJ(Mat); 18 extern int MatZeroRows_SeqSBAIJ(Mat,IS,Scalar*); 19 extern int MatZeroRows_SeqBAIJ(Mat,IS,Scalar *); 20 21 /* UGLY, ugly, ugly 22 When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 23 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 24 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 25 converts the entries into single precision and then calls ..._MatScalar() to put them 26 into the single precision data structures. 27 */ 28 #if defined(PETSC_USE_MAT_SINGLE) 29 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 30 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 31 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 32 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 33 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 34 #else 35 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 36 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 37 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 38 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 39 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 40 #endif 41 42 EXTERN_C_BEGIN 43 #undef __FUNC__ 44 #define __FUNC__ /*<a name="MatStoreValues_MPISBAIJ"></a>*/"MatStoreValues_MPISBAIJ" 45 int MatStoreValues_MPISBAIJ(Mat mat) 46 { 47 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 48 int ierr; 49 50 PetscFunctionBegin; 51 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 52 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 53 PetscFunctionReturn(0); 54 } 55 EXTERN_C_END 56 57 EXTERN_C_BEGIN 58 #undef __FUNC__ 59 #define __FUNC__ /*<a name="MatRetrieveValues_MPISBAIJ"></a>*/"MatRetrieveValues_MPISBAIJ" 60 int MatRetrieveValues_MPISBAIJ(Mat mat) 61 { 62 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 63 int ierr; 64 65 PetscFunctionBegin; 66 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 67 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 68 PetscFunctionReturn(0); 69 } 70 EXTERN_C_END 71 72 /* 73 Local utility routine that creates a mapping from the global column 74 number to the local number in the off-diagonal part of the local 75 storage of the matrix. This is done in a non scable way since the 76 length of colmap equals the global matrix length. 77 */ 78 #undef __FUNC__ 79 #define __FUNC__ /*<a name="CreateColmap_MPISBAIJ_Private"></a>*/"CreateColmap_MPISBAIJ_Private" 80 static int CreateColmap_MPISBAIJ_Private(Mat mat) 81 { 82 PetscFunctionBegin; 83 SETERRQ(1,"Function not yet written for SBAIJ format"); 84 /* PetscFunctionReturn(0); */ 85 } 86 87 #define CHUNKSIZE 10 88 89 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 90 { \ 91 \ 92 brow = row/bs; \ 93 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 94 rmax = aimax[brow]; nrow = ailen[brow]; \ 95 bcol = col/bs; \ 96 ridx = row % bs; cidx = col % bs; \ 97 low = 0; high = nrow; \ 98 while (high-low > 3) { \ 99 t = (low+high)/2; \ 100 if (rp[t] > bcol) high = t; \ 101 else low = t; \ 102 } \ 103 for (_i=low; _i<high; _i++) { \ 104 if (rp[_i] > bcol) break; \ 105 if (rp[_i] == bcol) { \ 106 bap = ap + bs2*_i + bs*cidx + ridx; \ 107 if (addv == ADD_VALUES) *bap += value; \ 108 else *bap = value; \ 109 goto a_noinsert; \ 110 } \ 111 } \ 112 if (a->nonew == 1) goto a_noinsert; \ 113 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 114 if (nrow >= rmax) { \ 115 /* there is no extra room in row, therefore enlarge */ \ 116 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 117 MatScalar *new_a; \ 118 \ 119 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 120 \ 121 /* malloc new storage space */ \ 122 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 123 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 124 new_j = (int*)(new_a + bs2*new_nz); \ 125 new_i = new_j + new_nz; \ 126 \ 127 /* copy over old data into new slots */ \ 128 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 129 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 130 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 131 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 132 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 133 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 134 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 135 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 136 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 137 /* free up old matrix storage */ \ 138 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 139 if (!a->singlemalloc) { \ 140 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 141 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 142 } \ 143 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 144 a->singlemalloc = PETSC_TRUE; \ 145 \ 146 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 147 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 148 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 149 a->s_maxnz += bs2*CHUNKSIZE; \ 150 a->reallocs++; \ 151 a->s_nz++; \ 152 } \ 153 N = nrow++ - 1; \ 154 /* shift up all the later entries in this row */ \ 155 for (ii=N; ii>=_i; ii--) { \ 156 rp[ii+1] = rp[ii]; \ 157 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 158 } \ 159 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 160 rp[_i] = bcol; \ 161 ap[bs2*_i + bs*cidx + ridx] = value; \ 162 a_noinsert:; \ 163 ailen[brow] = nrow; \ 164 } 165 #ifndef MatSetValues_SeqBAIJ_B_Private 166 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 167 { \ 168 brow = row/bs; \ 169 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 170 rmax = bimax[brow]; nrow = bilen[brow]; \ 171 bcol = col/bs; \ 172 ridx = row % bs; cidx = col % bs; \ 173 low = 0; high = nrow; \ 174 while (high-low > 3) { \ 175 t = (low+high)/2; \ 176 if (rp[t] > bcol) high = t; \ 177 else low = t; \ 178 } \ 179 for (_i=low; _i<high; _i++) { \ 180 if (rp[_i] > bcol) break; \ 181 if (rp[_i] == bcol) { \ 182 bap = ap + bs2*_i + bs*cidx + ridx; \ 183 if (addv == ADD_VALUES) *bap += value; \ 184 else *bap = value; \ 185 goto b_noinsert; \ 186 } \ 187 } \ 188 if (b->nonew == 1) goto b_noinsert; \ 189 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 190 if (nrow >= rmax) { \ 191 /* there is no extra room in row, therefore enlarge */ \ 192 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 193 MatScalar *new_a; \ 194 \ 195 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 196 \ 197 /* malloc new storage space */ \ 198 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 199 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 200 new_j = (int*)(new_a + bs2*new_nz); \ 201 new_i = new_j + new_nz; \ 202 \ 203 /* copy over old data into new slots */ \ 204 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 205 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 206 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 207 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 208 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 209 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 210 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 211 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 212 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 213 /* free up old matrix storage */ \ 214 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 215 if (!b->singlemalloc) { \ 216 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 217 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 218 } \ 219 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 220 b->singlemalloc = PETSC_TRUE; \ 221 \ 222 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 223 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 224 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 225 b->maxnz += bs2*CHUNKSIZE; \ 226 b->reallocs++; \ 227 b->nz++; \ 228 } \ 229 N = nrow++ - 1; \ 230 /* shift up all the later entries in this row */ \ 231 for (ii=N; ii>=_i; ii--) { \ 232 rp[ii+1] = rp[ii]; \ 233 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 234 } \ 235 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 236 rp[_i] = bcol; \ 237 ap[bs2*_i + bs*cidx + ridx] = value; \ 238 b_noinsert:; \ 239 bilen[brow] = nrow; \ 240 } 241 #endif 242 243 #if defined(PETSC_USE_MAT_SINGLE) 244 #undef __FUNC__ 245 #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ"></a>*/"MatSetValues_MPISBAIJ" 246 int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 247 { 248 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 249 int ierr,i,N = m*n; 250 MatScalar *vsingle; 251 252 PetscFunctionBegin; 253 if (N > b->setvalueslen) { 254 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 255 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 256 b->setvalueslen = N; 257 } 258 vsingle = b->setvaluescopy; 259 260 for (i=0; i<N; i++) { 261 vsingle[i] = v[i]; 262 } 263 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 264 PetscFunctionReturn(0); 265 } 266 267 #undef __FUNC__ 268 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ"></a>*/"MatSetValuesBlocked_MPISBAIJ" 269 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 270 { 271 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 272 int ierr,i,N = m*n*b->bs2; 273 MatScalar *vsingle; 274 275 PetscFunctionBegin; 276 if (N > b->setvalueslen) { 277 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 278 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 279 b->setvalueslen = N; 280 } 281 vsingle = b->setvaluescopy; 282 for (i=0; i<N; i++) { 283 vsingle[i] = v[i]; 284 } 285 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 286 PetscFunctionReturn(0); 287 } 288 289 #undef __FUNC__ 290 #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ_HT"></a>*/"MatSetValues_MPISBAIJ_HT" 291 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 292 { 293 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 294 int ierr,i,N = m*n; 295 MatScalar *vsingle; 296 297 PetscFunctionBegin; 298 SETERRQ(1,"Function not yet written for SBAIJ format"); 299 /* PetscFunctionReturn(0); */ 300 } 301 302 #undef __FUNC__ 303 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ_HT"></a>*/"MatSetValuesBlocked_MPISBAIJ_HT" 304 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 305 { 306 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 307 int ierr,i,N = m*n*b->bs2; 308 MatScalar *vsingle; 309 310 PetscFunctionBegin; 311 SETERRQ(1,"Function not yet written for SBAIJ format"); 312 /* PetscFunctionReturn(0); */ 313 } 314 #endif 315 316 /* Only add/insert a(i,j) with i<=j (blocks). 317 Any a(i,j) with i>j input by user is ingored. 318 */ 319 #undef __FUNC__ 320 #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ" 321 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 322 { 323 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 324 MatScalar value; 325 int ierr,i,j,row,col; 326 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 327 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 328 int cend_orig=baij->cend_bs,bs=baij->bs; 329 330 /* Some Variables required in the macro */ 331 Mat A = baij->A; 332 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 333 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 334 MatScalar *aa=a->a; 335 336 Mat B = baij->B; 337 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 338 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 339 MatScalar *ba=b->a; 340 341 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 342 int low,high,t,ridx,cidx,bs2=a->bs2; 343 MatScalar *ap,*bap; 344 345 /* for stash */ 346 int n_loc, *in_loc=0; 347 MatScalar *v_loc=0; 348 349 PetscFunctionBegin; 350 351 if(!baij->donotstash){ 352 in_loc = (int*)PetscMalloc(n*sizeof(int));CHKPTRQ(in_loc); 353 v_loc = (MatScalar*)PetscMalloc(n*sizeof(MatScalar));CHKPTRQ(v_loc); 354 } 355 356 for (i=0; i<m; i++) { 357 if (im[i] < 0) continue; 358 #if defined(PETSC_USE_BOPT_g) 359 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 360 #endif 361 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 362 row = im[i] - rstart_orig; /* local row index */ 363 for (j=0; j<n; j++) { 364 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 365 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 366 col = in[j] - cstart_orig; /* local col index */ 367 brow = row/bs; bcol = col/bs; 368 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 369 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 370 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 371 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 372 } else if (in[j] < 0) continue; 373 #if defined(PETSC_USE_BOPT_g) 374 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Col too large");} 375 #endif 376 else { /* off-diag entry (B) */ 377 if (mat->was_assembled) { 378 if (!baij->colmap) { 379 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 380 } 381 #if defined (PETSC_USE_CTABLE) 382 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 383 col = col - 1 + in[j]%bs; 384 #else 385 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 386 #endif 387 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 388 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 389 col = in[j]; 390 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 391 B = baij->B; 392 b = (Mat_SeqBAIJ*)(B)->data; 393 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 394 ba=b->a; 395 } 396 } else col = in[j]; 397 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 398 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 399 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 400 } 401 } 402 } else { /* off processor entry */ 403 if (!baij->donotstash) { 404 n_loc = 0; 405 for (j=0; j<n; j++){ 406 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 407 in_loc[n_loc] = in[j]; 408 if (roworiented) { 409 v_loc[n_loc] = v[i*n+j]; 410 } else { 411 v_loc[n_loc] = v[j*m+i]; 412 } 413 n_loc++; 414 } 415 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 416 } 417 } 418 } 419 420 if(!baij->donotstash){ 421 ierr = PetscFree(in_loc);CHKERRQ(ierr); 422 ierr = PetscFree(v_loc);CHKERRQ(ierr); 423 } 424 PetscFunctionReturn(0); 425 } 426 427 #undef __FUNC__ 428 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ"></a>*/"MatSetValuesBlocked_MPISBAIJ" 429 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 430 { 431 PetscFunctionBegin; 432 SETERRQ(1,"Function not yet written for SBAIJ format"); 433 /* PetscFunctionReturn(0); */ 434 } 435 436 #define HASH_KEY 0.6180339887 437 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 438 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 439 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 440 #undef __FUNC__ 441 #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ_HT_MatScalar"></a>*/"MatSetValues_MPISBAIJ_HT_MatScalar" 442 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 443 { 444 PetscFunctionBegin; 445 SETERRQ(1,"Function not yet written for SBAIJ format"); 446 /* PetscFunctionReturn(0); */ 447 } 448 449 #undef __FUNC__ 450 #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPISBAIJ_HT_MatScalar" 451 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 452 { 453 PetscFunctionBegin; 454 SETERRQ(1,"Function not yet written for SBAIJ format"); 455 /* PetscFunctionReturn(0); */ 456 } 457 458 #undef __FUNC__ 459 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPISBAIJ" 460 int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 461 { 462 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 463 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 464 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 465 466 PetscFunctionBegin; 467 for (i=0; i<m; i++) { 468 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 469 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 470 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 471 row = idxm[i] - bsrstart; 472 for (j=0; j<n; j++) { 473 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 474 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 475 if (idxn[j] >= bscstart && idxn[j] < bscend){ 476 col = idxn[j] - bscstart; 477 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 478 } else { 479 if (!baij->colmap) { 480 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 481 } 482 #if defined (PETSC_USE_CTABLE) 483 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 484 data --; 485 #else 486 data = baij->colmap[idxn[j]/bs]-1; 487 #endif 488 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 489 else { 490 col = data + idxn[j]%bs; 491 ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 492 } 493 } 494 } 495 } else { 496 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 497 } 498 } 499 PetscFunctionReturn(0); 500 } 501 502 #undef __FUNC__ 503 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPISBAIJ" 504 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 505 { 506 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 507 /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */ 508 /* Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ*)baij->B->data; */ 509 int ierr; 510 PetscReal sum[2],*lnorm2; 511 512 PetscFunctionBegin; 513 if (baij->size == 1) { 514 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 515 } else { 516 if (type == NORM_FROBENIUS) { 517 lnorm2 = (double*)PetscMalloc(2*sizeof(double));CHKPTRQ(lnorm2); 518 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 519 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 520 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 521 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 522 /* 523 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 524 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]); 525 */ 526 ierr = MPI_Allreduce(lnorm2,&sum,2,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 527 /* 528 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]); 529 PetscSynchronizedFlush(PETSC_COMM_WORLD); */ 530 531 *norm = sqrt(sum[0] + 2*sum[1]); 532 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 533 } else { 534 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 535 } 536 } 537 PetscFunctionReturn(0); 538 } 539 540 /* 541 Creates the hash table, and sets the table 542 This table is created only once. 543 If new entried need to be added to the matrix 544 then the hash table has to be destroyed and 545 recreated. 546 */ 547 #undef __FUNC__ 548 #define __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPISBAIJ_Private" 549 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor) 550 { 551 PetscFunctionBegin; 552 SETERRQ(1,"Function not yet written for SBAIJ format"); 553 /* PetscFunctionReturn(0); */ 554 } 555 556 #undef __FUNC__ 557 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPISBAIJ" 558 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 559 { 560 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 561 int ierr,nstash,reallocs; 562 InsertMode addv; 563 564 PetscFunctionBegin; 565 if (baij->donotstash) { 566 PetscFunctionReturn(0); 567 } 568 569 /* make sure all processors are either in INSERTMODE or ADDMODE */ 570 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 571 if (addv == (ADD_VALUES|INSERT_VALUES)) { 572 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 573 } 574 mat->insertmode = addv; /* in case this processor had no cache */ 575 576 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 577 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 578 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 579 PLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 580 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 581 PLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 582 PetscFunctionReturn(0); 583 } 584 585 #undef __FUNC__ 586 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPISBAIJ" 587 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 588 { 589 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 590 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 591 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 592 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 593 int *row,*col,other_disassembled; 594 PetscTruth r1,r2,r3; 595 MatScalar *val; 596 InsertMode addv = mat->insertmode; 597 int rank; 598 599 PetscFunctionBegin; 600 /* remove 2 line below later */ 601 ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); 602 603 if (!baij->donotstash) { 604 while (1) { 605 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 606 /* 607 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg); 608 PetscSynchronizedFlush(PETSC_COMM_WORLD); 609 */ 610 if (!flg) break; 611 612 for (i=0; i<n;) { 613 /* Now identify the consecutive vals belonging to the same row */ 614 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 615 if (j < n) ncols = j-i; 616 else ncols = n-i; 617 /* Now assemble all these values with a single function call */ 618 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 619 i = j; 620 } 621 } 622 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 623 /* Now process the block-stash. Since the values are stashed column-oriented, 624 set the roworiented flag to column oriented, and after MatSetValues() 625 restore the original flags */ 626 r1 = baij->roworiented; 627 r2 = a->roworiented; 628 r3 = b->roworiented; 629 baij->roworiented = PETSC_FALSE; 630 a->roworiented = PETSC_FALSE; 631 b->roworiented = PETSC_FALSE; 632 while (1) { 633 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 634 if (!flg) break; 635 636 for (i=0; i<n;) { 637 /* Now identify the consecutive vals belonging to the same row */ 638 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 639 if (j < n) ncols = j-i; 640 else ncols = n-i; 641 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 642 i = j; 643 } 644 } 645 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 646 baij->roworiented = r1; 647 a->roworiented = r2; 648 b->roworiented = r3; 649 } 650 651 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 652 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 653 654 /* determine if any processor has disassembled, if so we must 655 also disassemble ourselfs, in order that we may reassemble. */ 656 /* 657 if nonzero structure of submatrix B cannot change then we know that 658 no processor disassembled thus we can skip this stuff 659 */ 660 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 661 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 662 if (mat->was_assembled && !other_disassembled) { 663 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 664 } 665 } 666 667 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 668 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); 669 } 670 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 671 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 672 673 #if defined(PETSC_USE_BOPT_g) 674 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 675 PLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct); 676 baij->ht_total_ct = 0; 677 baij->ht_insert_ct = 0; 678 } 679 #endif 680 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 681 ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 682 mat->ops->setvalues = MatSetValues_MPISBAIJ_HT; 683 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT; 684 } 685 686 if (baij->rowvalues) { 687 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 688 baij->rowvalues = 0; 689 } 690 PetscFunctionReturn(0); 691 } 692 693 #undef __FUNC__ 694 #define __FUNC__ /*<a name=""></a>*/"MatView_MPISBAIJ_ASCIIorDraworSocket" 695 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 696 { 697 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 698 int ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank; 699 PetscTruth isascii,isdraw; 700 Viewer sviewer; 701 702 PetscFunctionBegin; 703 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 704 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 705 if (isascii) { 706 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 707 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 708 MatInfo info; 709 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 710 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 711 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 712 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 713 baij->bs,(int)info.memory);CHKERRQ(ierr); 714 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 715 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 716 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 717 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 718 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 719 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 720 PetscFunctionReturn(0); 721 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 722 ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 723 PetscFunctionReturn(0); 724 } 725 } 726 727 if (isdraw) { 728 Draw draw; 729 PetscTruth isnull; 730 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 731 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 732 } 733 734 if (size == 1) { 735 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 736 } else { 737 /* assemble the entire matrix onto first processor. */ 738 Mat A; 739 Mat_SeqSBAIJ *Aloc; 740 Mat_SeqBAIJ *Bloc; 741 int M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 742 MatScalar *a; 743 744 if (!rank) { 745 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 746 } else { 747 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 748 } 749 PLogObjectParent(mat,A); 750 751 /* copy over the A part */ 752 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 753 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 754 rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 755 756 for (i=0; i<mbs; i++) { 757 rvals[0] = bs*(baij->rstart + i); 758 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 759 for (j=ai[i]; j<ai[i+1]; j++) { 760 col = (baij->cstart+aj[j])*bs; 761 for (k=0; k<bs; k++) { 762 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 763 col++; a += bs; 764 } 765 } 766 } 767 /* copy over the B part */ 768 Bloc = (Mat_SeqBAIJ*)baij->B->data; 769 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 770 for (i=0; i<mbs; i++) { 771 rvals[0] = bs*(baij->rstart + i); 772 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 773 for (j=ai[i]; j<ai[i+1]; j++) { 774 col = baij->garray[aj[j]]*bs; 775 for (k=0; k<bs; k++) { 776 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 777 col++; a += bs; 778 } 779 } 780 } 781 ierr = PetscFree(rvals);CHKERRQ(ierr); 782 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 783 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 784 /* 785 Everyone has to call to draw the matrix since the graphics waits are 786 synchronized across all processors that share the Draw object 787 */ 788 ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 789 if (!rank) { 790 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 791 } 792 ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 793 ierr = MatDestroy(A);CHKERRQ(ierr); 794 } 795 PetscFunctionReturn(0); 796 } 797 798 #undef __FUNC__ 799 #define __FUNC__ /*<a name=""></a>*/"MatView_MPISBAIJ" 800 int MatView_MPISBAIJ(Mat mat,Viewer viewer) 801 { 802 int ierr; 803 PetscTruth isascii,isdraw,issocket,isbinary; 804 805 PetscFunctionBegin; 806 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 807 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 808 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 809 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 810 if (isascii || isdraw || issocket || isbinary) { 811 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 812 } else { 813 SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 814 } 815 PetscFunctionReturn(0); 816 } 817 818 #undef __FUNC__ 819 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPISBAIJ" 820 int MatDestroy_MPISBAIJ(Mat mat) 821 { 822 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 823 int ierr; 824 825 PetscFunctionBegin; 826 827 if (mat->mapping) { 828 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 829 } 830 if (mat->bmapping) { 831 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 832 } 833 if (mat->rmap) { 834 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 835 } 836 if (mat->cmap) { 837 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 838 } 839 #if defined(PETSC_USE_LOG) 840 PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N); 841 #endif 842 843 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 844 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 845 846 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 847 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 848 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 849 #if defined (PETSC_USE_CTABLE) 850 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 851 #else 852 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 853 #endif 854 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 855 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 856 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 857 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 858 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 859 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 860 #if defined(PETSC_USE_MAT_SINGLE) 861 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 862 #endif 863 ierr = PetscFree(baij);CHKERRQ(ierr); 864 PLogObjectDestroy(mat); 865 PetscHeaderDestroy(mat); 866 PetscFunctionReturn(0); 867 } 868 869 #undef __FUNC__ 870 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPISBAIJ" 871 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 872 { 873 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 874 int ierr,nt; 875 876 PetscFunctionBegin; 877 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 878 if (nt != a->n) { 879 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 880 } 881 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 882 if (nt != a->m) { 883 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 884 } 885 886 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 887 /* do diagonal part */ 888 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 889 /* do supperdiagonal part */ 890 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 891 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 892 /* do subdiagonal part */ 893 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 894 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 895 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 896 897 PetscFunctionReturn(0); 898 } 899 900 #undef __FUNC__ 901 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPISBAIJ" 902 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 903 { 904 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 905 int ierr; 906 907 PetscFunctionBegin; 908 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 909 /* do diagonal part */ 910 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 911 /* do supperdiagonal part */ 912 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 913 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 914 915 /* do subdiagonal part */ 916 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 917 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 918 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 919 920 PetscFunctionReturn(0); 921 } 922 923 #undef __FUNC__ 924 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPISBAIJ" 925 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 926 { 927 PetscFunctionBegin; 928 SETERRQ(1,"Matrix is symmetric. Call MatMult()."); 929 /* PetscFunctionReturn(0); */ 930 } 931 932 #undef __FUNC__ 933 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPISBAIJ" 934 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 935 { 936 PetscFunctionBegin; 937 SETERRQ(1,"Matrix is symmetric. Call MatMultAdd()."); 938 /* PetscFunctionReturn(0); */ 939 } 940 941 /* 942 This only works correctly for square matrices where the subblock A->A is the 943 diagonal block 944 */ 945 #undef __FUNC__ 946 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPISBAIJ" 947 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 948 { 949 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 950 int ierr; 951 952 PetscFunctionBegin; 953 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 954 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 955 PetscFunctionReturn(0); 956 } 957 958 #undef __FUNC__ 959 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPISBAIJ" 960 int MatScale_MPISBAIJ(Scalar *aa,Mat A) 961 { 962 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 963 int ierr; 964 965 PetscFunctionBegin; 966 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 967 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 968 PetscFunctionReturn(0); 969 } 970 971 #undef __FUNC__ 972 #define __FUNC__ /*<a name=""></a>*/"MatGetSize_MPISBAIJ" 973 int MatGetSize_MPISBAIJ(Mat matin,int *m,int *n) 974 { 975 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 976 977 PetscFunctionBegin; 978 if (m) *m = mat->M; 979 if (n) *n = mat->N; 980 PetscFunctionReturn(0); 981 } 982 983 #undef __FUNC__ 984 #define __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPISBAIJ" 985 int MatGetLocalSize_MPISBAIJ(Mat matin,int *m,int *n) 986 { 987 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 988 989 PetscFunctionBegin; 990 *m = mat->m; *n = mat->n; 991 PetscFunctionReturn(0); 992 } 993 994 #undef __FUNC__ 995 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPISBAIJ" 996 int MatGetOwnershipRange_MPISBAIJ(Mat matin,int *m,int *n) 997 { 998 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 999 1000 PetscFunctionBegin; 1001 if (m) *m = mat->rstart*mat->bs; 1002 if (n) *n = mat->rend*mat->bs; 1003 PetscFunctionReturn(0); 1004 } 1005 1006 #undef __FUNC__ 1007 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPISBAIJ" 1008 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1009 { 1010 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1011 Scalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1012 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1013 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1014 int *cmap,*idx_p,cstart = mat->cstart; 1015 1016 PetscFunctionBegin; 1017 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1018 mat->getrowactive = PETSC_TRUE; 1019 1020 if (!mat->rowvalues && (idx || v)) { 1021 /* 1022 allocate enough space to hold information from the longest row. 1023 */ 1024 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1025 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1026 int max = 1,mbs = mat->mbs,tmp; 1027 for (i=0; i<mbs; i++) { 1028 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1029 if (max < tmp) { max = tmp; } 1030 } 1031 mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1032 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1033 } 1034 1035 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1036 lrow = row - brstart; /* local row index */ 1037 1038 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1039 if (!v) {pvA = 0; pvB = 0;} 1040 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1041 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1042 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1043 nztot = nzA + nzB; 1044 1045 cmap = mat->garray; 1046 if (v || idx) { 1047 if (nztot) { 1048 /* Sort by increasing column numbers, assuming A and B already sorted */ 1049 int imark = -1; 1050 if (v) { 1051 *v = v_p = mat->rowvalues; 1052 for (i=0; i<nzB; i++) { 1053 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1054 else break; 1055 } 1056 imark = i; 1057 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1058 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1059 } 1060 if (idx) { 1061 *idx = idx_p = mat->rowindices; 1062 if (imark > -1) { 1063 for (i=0; i<imark; i++) { 1064 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1065 } 1066 } else { 1067 for (i=0; i<nzB; i++) { 1068 if (cmap[cworkB[i]/bs] < cstart) 1069 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1070 else break; 1071 } 1072 imark = i; 1073 } 1074 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1075 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1076 } 1077 } else { 1078 if (idx) *idx = 0; 1079 if (v) *v = 0; 1080 } 1081 } 1082 *nz = nztot; 1083 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1084 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 #undef __FUNC__ 1089 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPISBAIJ" 1090 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1091 { 1092 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1093 1094 PetscFunctionBegin; 1095 if (baij->getrowactive == PETSC_FALSE) { 1096 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1097 } 1098 baij->getrowactive = PETSC_FALSE; 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNC__ 1103 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPISBAIJ" 1104 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs) 1105 { 1106 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1107 1108 PetscFunctionBegin; 1109 *bs = baij->bs; 1110 PetscFunctionReturn(0); 1111 } 1112 1113 #undef __FUNC__ 1114 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPISBAIJ" 1115 int MatZeroEntries_MPISBAIJ(Mat A) 1116 { 1117 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1118 int ierr; 1119 1120 PetscFunctionBegin; 1121 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1122 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1123 PetscFunctionReturn(0); 1124 } 1125 1126 #undef __FUNC__ 1127 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPISBAIJ" 1128 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1129 { 1130 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1131 Mat A = a->A,B = a->B; 1132 int ierr; 1133 PetscReal isend[5],irecv[5]; 1134 1135 PetscFunctionBegin; 1136 info->block_size = (double)a->bs; 1137 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1138 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1139 isend[3] = info->memory; isend[4] = info->mallocs; 1140 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1141 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1142 isend[3] += info->memory; isend[4] += info->mallocs; 1143 if (flag == MAT_LOCAL) { 1144 info->nz_used = isend[0]; 1145 info->nz_allocated = isend[1]; 1146 info->nz_unneeded = isend[2]; 1147 info->memory = isend[3]; 1148 info->mallocs = isend[4]; 1149 } else if (flag == MAT_GLOBAL_MAX) { 1150 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1151 info->nz_used = irecv[0]; 1152 info->nz_allocated = irecv[1]; 1153 info->nz_unneeded = irecv[2]; 1154 info->memory = irecv[3]; 1155 info->mallocs = irecv[4]; 1156 } else if (flag == MAT_GLOBAL_SUM) { 1157 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1158 info->nz_used = irecv[0]; 1159 info->nz_allocated = irecv[1]; 1160 info->nz_unneeded = irecv[2]; 1161 info->memory = irecv[3]; 1162 info->mallocs = irecv[4]; 1163 } else { 1164 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1165 } 1166 info->rows_global = (double)a->M; 1167 info->columns_global = (double)a->N; 1168 info->rows_local = (double)a->m; 1169 info->columns_local = (double)a->N; 1170 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1171 info->fill_ratio_needed = 0; 1172 info->factor_mallocs = 0; 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNC__ 1177 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPISBAIJ" 1178 int MatSetOption_MPISBAIJ(Mat A,MatOption op) 1179 { 1180 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1181 int ierr; 1182 1183 PetscFunctionBegin; 1184 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1185 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1186 op == MAT_COLUMNS_UNSORTED || 1187 op == MAT_COLUMNS_SORTED || 1188 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1189 op == MAT_KEEP_ZEROED_ROWS || 1190 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1191 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1192 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1193 } else if (op == MAT_ROW_ORIENTED) { 1194 a->roworiented = PETSC_TRUE; 1195 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1196 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1197 } else if (op == MAT_ROWS_SORTED || 1198 op == MAT_ROWS_UNSORTED || 1199 op == MAT_SYMMETRIC || 1200 op == MAT_STRUCTURALLY_SYMMETRIC || 1201 op == MAT_YES_NEW_DIAGONALS || 1202 op == MAT_USE_HASH_TABLE) { 1203 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1204 } else if (op == MAT_COLUMN_ORIENTED) { 1205 a->roworiented = PETSC_FALSE; 1206 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1207 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1208 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1209 a->donotstash = PETSC_TRUE; 1210 } else if (op == MAT_NO_NEW_DIAGONALS) { 1211 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1212 } else if (op == MAT_USE_HASH_TABLE) { 1213 a->ht_flag = PETSC_TRUE; 1214 } else { 1215 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1216 } 1217 PetscFunctionReturn(0); 1218 } 1219 1220 #undef __FUNC__ 1221 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPISBAIJ(" 1222 int MatTranspose_MPISBAIJ(Mat A,Mat *matout) 1223 { 1224 PetscFunctionBegin; 1225 SETERRQ(1,"Matrix is symmetric. MatTranspose() should not be called"); 1226 /* PetscFunctionReturn(0); */ 1227 } 1228 1229 #undef __FUNC__ 1230 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPISBAIJ" 1231 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1232 { 1233 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1234 Mat a = baij->A,b = baij->B; 1235 int ierr,s1,s2,s3; 1236 1237 PetscFunctionBegin; 1238 if (ll != rr) { 1239 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1240 } 1241 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1242 if (rr) { 1243 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1244 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1245 /* Overlap communication with computation. */ 1246 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1247 /*} if (ll) { */ 1248 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1249 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1250 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1251 /* } */ 1252 /* scale the diagonal block */ 1253 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1254 1255 /* if (rr) { */ 1256 /* Do a scatter end and then right scale the off-diagonal block */ 1257 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1258 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1259 } 1260 1261 PetscFunctionReturn(0); 1262 } 1263 1264 #undef __FUNC__ 1265 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPISBAIJ" 1266 int MatZeroRows_MPISBAIJ(Mat A,IS is,Scalar *diag) 1267 { 1268 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1269 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1270 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1271 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1272 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1273 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1274 MPI_Comm comm = A->comm; 1275 MPI_Request *send_waits,*recv_waits; 1276 MPI_Status recv_status,*send_status; 1277 IS istmp; 1278 1279 PetscFunctionBegin; 1280 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 1281 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1282 1283 /* first count number of contributors to each processor */ 1284 nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs); 1285 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1286 procs = nprocs + size; 1287 owner = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 1288 for (i=0; i<N; i++) { 1289 idx = rows[i]; 1290 found = 0; 1291 for (j=0; j<size; j++) { 1292 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1293 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1294 } 1295 } 1296 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1297 } 1298 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 1299 1300 /* inform other processors of number of messages and max length*/ 1301 work = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work); 1302 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 1303 nmax = work[rank]; 1304 nrecvs = work[size+rank]; 1305 ierr = PetscFree(work);CHKERRQ(ierr); 1306 1307 /* post receives: */ 1308 rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 1309 recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1310 for (i=0; i<nrecvs; i++) { 1311 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1312 } 1313 1314 /* do sends: 1315 1) starts[i] gives the starting index in svalues for stuff going to 1316 the ith processor 1317 */ 1318 svalues = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues); 1319 send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1320 starts = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts); 1321 starts[0] = 0; 1322 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1323 for (i=0; i<N; i++) { 1324 svalues[starts[owner[i]]++] = rows[i]; 1325 } 1326 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1327 1328 starts[0] = 0; 1329 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1330 count = 0; 1331 for (i=0; i<size; i++) { 1332 if (procs[i]) { 1333 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1334 } 1335 } 1336 ierr = PetscFree(starts);CHKERRQ(ierr); 1337 1338 base = owners[rank]*bs; 1339 1340 /* wait on receives */ 1341 lens = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens); 1342 source = lens + nrecvs; 1343 count = nrecvs; slen = 0; 1344 while (count) { 1345 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1346 /* unpack receives into our local space */ 1347 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1348 source[imdex] = recv_status.MPI_SOURCE; 1349 lens[imdex] = n; 1350 slen += n; 1351 count--; 1352 } 1353 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1354 1355 /* move the data into the send scatter */ 1356 lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows); 1357 count = 0; 1358 for (i=0; i<nrecvs; i++) { 1359 values = rvalues + i*nmax; 1360 for (j=0; j<lens[i]; j++) { 1361 lrows[count++] = values[j] - base; 1362 } 1363 } 1364 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1365 ierr = PetscFree(lens);CHKERRQ(ierr); 1366 ierr = PetscFree(owner);CHKERRQ(ierr); 1367 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1368 1369 /* actually zap the local rows */ 1370 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1371 PLogObjectParent(A,istmp); 1372 1373 /* 1374 Zero the required rows. If the "diagonal block" of the matrix 1375 is square and the user wishes to set the diagonal we use seperate 1376 code so that MatSetValues() is not called for each diagonal allocating 1377 new memory, thus calling lots of mallocs and slowing things down. 1378 1379 Contributed by: Mathew Knepley 1380 */ 1381 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1382 ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1383 if (diag && (l->A->M == l->A->N)) { 1384 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1385 } else if (diag) { 1386 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1387 if (((Mat_SeqSBAIJ*)l->A->data)->nonew) { 1388 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1389 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1390 } 1391 for (i=0; i<slen; i++) { 1392 row = lrows[i] + rstart_bs; 1393 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1394 } 1395 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1396 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1397 } else { 1398 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1399 } 1400 1401 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1402 ierr = PetscFree(lrows);CHKERRQ(ierr); 1403 1404 /* wait on sends */ 1405 if (nsends) { 1406 send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1407 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1408 ierr = PetscFree(send_status);CHKERRQ(ierr); 1409 } 1410 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1411 ierr = PetscFree(svalues);CHKERRQ(ierr); 1412 1413 PetscFunctionReturn(0); 1414 } 1415 1416 #undef __FUNC__ 1417 #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPISBAIJ" 1418 int MatPrintHelp_MPISBAIJ(Mat A) 1419 { 1420 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1421 MPI_Comm comm = A->comm; 1422 static int called = 0; 1423 int ierr; 1424 1425 PetscFunctionBegin; 1426 if (!a->rank) { 1427 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1428 } 1429 if (called) {PetscFunctionReturn(0);} else called = 1; 1430 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1431 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1432 PetscFunctionReturn(0); 1433 } 1434 1435 #undef __FUNC__ 1436 #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPISBAIJ" 1437 int MatSetUnfactored_MPISBAIJ(Mat A) 1438 { 1439 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1440 int ierr; 1441 1442 PetscFunctionBegin; 1443 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1444 PetscFunctionReturn(0); 1445 } 1446 1447 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1448 1449 #undef __FUNC__ 1450 #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPISBAIJ" 1451 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1452 { 1453 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1454 Mat a,b,c,d; 1455 PetscTruth flg; 1456 int ierr; 1457 1458 PetscFunctionBegin; 1459 if (B->type != MATMPISBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1460 a = matA->A; b = matA->B; 1461 c = matB->A; d = matB->B; 1462 1463 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1464 if (flg == PETSC_TRUE) { 1465 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1466 } 1467 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1468 PetscFunctionReturn(0); 1469 } 1470 1471 /* -------------------------------------------------------------------*/ 1472 static struct _MatOps MatOps_Values = { 1473 MatSetValues_MPISBAIJ, 1474 MatGetRow_MPISBAIJ, 1475 MatRestoreRow_MPISBAIJ, 1476 MatMult_MPISBAIJ, 1477 MatMultAdd_MPISBAIJ, 1478 MatMultTranspose_MPISBAIJ, 1479 MatMultTransposeAdd_MPISBAIJ, 1480 0, 1481 0, 1482 0, 1483 0, 1484 0, 1485 0, 1486 0, 1487 MatTranspose_MPISBAIJ, 1488 MatGetInfo_MPISBAIJ, 1489 MatEqual_MPISBAIJ, 1490 MatGetDiagonal_MPISBAIJ, 1491 MatDiagonalScale_MPISBAIJ, 1492 MatNorm_MPISBAIJ, 1493 MatAssemblyBegin_MPISBAIJ, 1494 MatAssemblyEnd_MPISBAIJ, 1495 0, 1496 MatSetOption_MPISBAIJ, 1497 MatZeroEntries_MPISBAIJ, 1498 MatZeroRows_MPISBAIJ, 1499 0, 1500 0, 1501 0, 1502 0, 1503 MatGetSize_MPISBAIJ, 1504 MatGetLocalSize_MPISBAIJ, 1505 MatGetOwnershipRange_MPISBAIJ, 1506 0, 1507 0, 1508 0, 1509 0, 1510 MatDuplicate_MPISBAIJ, 1511 0, 1512 0, 1513 0, 1514 0, 1515 0, 1516 MatGetSubMatrices_MPISBAIJ, 1517 MatIncreaseOverlap_MPISBAIJ, 1518 MatGetValues_MPISBAIJ, 1519 0, 1520 MatPrintHelp_MPISBAIJ, 1521 MatScale_MPISBAIJ, 1522 0, 1523 0, 1524 0, 1525 MatGetBlockSize_MPISBAIJ, 1526 0, 1527 0, 1528 0, 1529 0, 1530 0, 1531 0, 1532 MatSetUnfactored_MPISBAIJ, 1533 0, 1534 MatSetValuesBlocked_MPISBAIJ, 1535 0, 1536 0, 1537 0, 1538 MatGetMaps_Petsc}; 1539 1540 1541 EXTERN_C_BEGIN 1542 #undef __FUNC__ 1543 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPISBAIJ" 1544 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1545 { 1546 PetscFunctionBegin; 1547 *a = ((Mat_MPISBAIJ *)A->data)->A; 1548 *iscopy = PETSC_FALSE; 1549 PetscFunctionReturn(0); 1550 } 1551 EXTERN_C_END 1552 1553 #undef __FUNC__ 1554 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPISBAIJ" 1555 /*@C 1556 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1557 (block compressed row). For good matrix assembly performance 1558 the user should preallocate the matrix storage by setting the parameters 1559 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1560 performance can be increased by more than a factor of 50. 1561 1562 Collective on MPI_Comm 1563 1564 Input Parameters: 1565 + comm - MPI communicator 1566 . bs - size of blockk 1567 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1568 This value should be the same as the local size used in creating the 1569 y vector for the matrix-vector product y = Ax. 1570 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1571 This value should be the same as the local size used in creating the 1572 x vector for the matrix-vector product y = Ax. 1573 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1574 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1575 . d_nz - number of block nonzeros per block row in diagonal portion of local 1576 submatrix (same for all local rows) 1577 . d_nnz - array containing the number of block nonzeros in the various block rows 1578 of the in diagonal portion of the local (possibly different for each block 1579 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1580 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1581 submatrix (same for all local rows). 1582 - o_nnz - array containing the number of nonzeros in the various block rows of the 1583 off-diagonal portion of the local submatrix (possibly different for 1584 each block row) or PETSC_NULL. 1585 1586 Output Parameter: 1587 . A - the matrix 1588 1589 Options Database Keys: 1590 . -mat_no_unroll - uses code that does not unroll the loops in the 1591 block calculations (much slower) 1592 . -mat_block_size - size of the blocks to use 1593 . -mat_mpi - use the parallel matrix data structures even on one processor 1594 (defaults to using SeqBAIJ format on one processor) 1595 1596 Notes: 1597 The user MUST specify either the local or global matrix dimensions 1598 (possibly both). 1599 1600 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1601 than it must be used on all processors that share the object for that argument. 1602 1603 Storage Information: 1604 For a square global matrix we define each processor's diagonal portion 1605 to be its local rows and the corresponding columns (a square submatrix); 1606 each processor's off-diagonal portion encompasses the remainder of the 1607 local matrix (a rectangular submatrix). 1608 1609 The user can specify preallocated storage for the diagonal part of 1610 the local submatrix with either d_nz or d_nnz (not both). Set 1611 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1612 memory allocation. Likewise, specify preallocated storage for the 1613 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1614 1615 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1616 the figure below we depict these three local rows and all columns (0-11). 1617 1618 .vb 1619 0 1 2 3 4 5 6 7 8 9 10 11 1620 ------------------- 1621 row 3 | o o o d d d o o o o o o 1622 row 4 | o o o d d d o o o o o o 1623 row 5 | o o o d d d o o o o o o 1624 ------------------- 1625 .ve 1626 1627 Thus, any entries in the d locations are stored in the d (diagonal) 1628 submatrix, and any entries in the o locations are stored in the 1629 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1630 stored simply in the MATSEQBAIJ format for compressed row storage. 1631 1632 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1633 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1634 In general, for PDE problems in which most nonzeros are near the diagonal, 1635 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1636 or you will get TERRIBLE performance; see the users' manual chapter on 1637 matrices. 1638 1639 Level: intermediate 1640 1641 .keywords: matrix, block, aij, compressed row, sparse, parallel 1642 1643 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1644 @*/ 1645 1646 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1647 { 1648 Mat B; 1649 Mat_MPISBAIJ *b; 1650 int ierr,i,sum[1],work[1],mbs,Mbs=PETSC_DECIDE,size; 1651 PetscTruth flag1,flag2,flg; 1652 1653 PetscFunctionBegin; 1654 if (M != N || m != n){ /* N and n are not used after this */ 1655 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, set M=N and m=n"); 1656 } 1657 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1658 1659 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1660 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than -2: value %d",d_nz); 1661 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than -2: value %d",o_nz); 1662 if (d_nnz) { 1663 for (i=0; i<m/bs; i++) { 1664 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1665 } 1666 } 1667 if (o_nnz) { 1668 for (i=0; i<m/bs; i++) { 1669 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1670 } 1671 } 1672 1673 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1674 ierr = OptionsHasName(PETSC_NULL,"-mat_mpisbaij",&flag1);CHKERRQ(ierr); 1675 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 1676 if (!flag1 && !flag2 && size == 1) { 1677 if (M == PETSC_DECIDE) M = m; 1678 ierr = MatCreateSeqSBAIJ(comm,bs,M,M,d_nz,d_nnz,A);CHKERRQ(ierr); 1679 PetscFunctionReturn(0); 1680 } 1681 1682 *A = 0; 1683 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPISBAIJ,"Mat",comm,MatDestroy,MatView); 1684 PLogObjectCreate(B); 1685 B->data = (void*)(b = PetscNew(Mat_MPISBAIJ));CHKPTRQ(b); 1686 ierr = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1687 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1688 1689 B->ops->destroy = MatDestroy_MPISBAIJ; 1690 B->ops->view = MatView_MPISBAIJ; 1691 B->mapping = 0; 1692 B->factor = 0; 1693 B->assembled = PETSC_FALSE; 1694 1695 B->insertmode = NOT_SET_VALUES; 1696 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 1697 ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr); 1698 1699 if (m == PETSC_DECIDE && (d_nnz || o_nnz)) { 1700 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1701 } 1702 if (M == PETSC_DECIDE && m == PETSC_DECIDE) { 1703 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"either M or m should be specified"); 1704 } 1705 if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1706 1707 if (M == PETSC_DECIDE) { 1708 work[0] = m; mbs = m/bs; 1709 ierr = MPI_Allreduce(work,sum,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1710 M = sum[0]; Mbs = M/bs; 1711 } else { /* M is specified */ 1712 Mbs = M/bs; 1713 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,"No of global rows must be divisible by blocksize"); 1714 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 1715 m = mbs*bs; 1716 } 1717 1718 if (mbs*bs != m) { 1719 SETERRQ(PETSC_ERR_ARG_SIZ,"No of local rows/cols must be divisible by blocksize"); 1720 } 1721 1722 b->m = m; B->m = m; 1723 b->n = m; B->n = m; 1724 b->N = M; B->N = M; 1725 b->M = M; 1726 B->M = M; 1727 b->bs = bs; 1728 b->bs2 = bs*bs; 1729 b->mbs = mbs; 1730 b->nbs = mbs; 1731 b->Mbs = Mbs; 1732 b->Nbs = Mbs; 1733 1734 /* the information in the maps duplicates the information computed below, eventually 1735 we should remove the duplicate information that is not contained in the maps */ 1736 ierr = MapCreateMPI(B->comm,m,M,&B->rmap);CHKERRQ(ierr); 1737 ierr = MapCreateMPI(B->comm,m,M,&B->cmap);CHKERRQ(ierr); 1738 1739 /* build local table of row and column ownerships */ 1740 b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 1741 PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1742 b->cowners = b->rowners + b->size + 2; 1743 b->rowners_bs = b->cowners + b->size + 2; 1744 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1745 b->rowners[0] = 0; 1746 for (i=2; i<=b->size; i++) { 1747 b->rowners[i] += b->rowners[i-1]; 1748 } 1749 for (i=0; i<=b->size; i++) { 1750 b->rowners_bs[i] = b->rowners[i]*bs; 1751 } 1752 b->rstart = b->rowners[b->rank]; 1753 b->rend = b->rowners[b->rank+1]; 1754 b->rstart_bs = b->rstart * bs; 1755 b->rend_bs = b->rend * bs; 1756 1757 b->cstart = b->rstart; 1758 b->cend = b->rend; 1759 b->cstart_bs = b->cstart * bs; 1760 b->cend_bs = b->cend * bs; 1761 1762 1763 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1764 ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,m,m,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1765 PLogObjectParent(B,b->A); 1766 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1767 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,M,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1768 PLogObjectParent(B,b->B); 1769 1770 /* build cache for off array entries formed */ 1771 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1772 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1773 b->donotstash = PETSC_FALSE; 1774 b->colmap = PETSC_NULL; 1775 b->garray = PETSC_NULL; 1776 b->roworiented = PETSC_TRUE; 1777 1778 #if defined(PEYSC_USE_MAT_SINGLE) 1779 /* stuff for MatSetValues_XXX in single precision */ 1780 b->lensetvalues = 0; 1781 b->setvaluescopy = PETSC_NULL; 1782 #endif 1783 1784 /* stuff used in block assembly */ 1785 b->barray = 0; 1786 1787 /* stuff used for matrix vector multiply */ 1788 b->lvec = 0; 1789 b->Mvctx = 0; 1790 1791 /* stuff for MatGetRow() */ 1792 b->rowindices = 0; 1793 b->rowvalues = 0; 1794 b->getrowactive = PETSC_FALSE; 1795 1796 /* hash table stuff */ 1797 b->ht = 0; 1798 b->hd = 0; 1799 b->ht_size = 0; 1800 b->ht_flag = PETSC_FALSE; 1801 b->ht_fact = 0; 1802 b->ht_total_ct = 0; 1803 b->ht_insert_ct = 0; 1804 1805 *A = B; 1806 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1807 if (flg) { 1808 double fact = 1.39; 1809 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1810 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1811 if (fact <= 1.0) fact = 1.39; 1812 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1813 PLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact); 1814 } 1815 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1816 "MatStoreValues_MPISBAIJ", 1817 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1818 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1819 "MatRetrieveValues_MPISBAIJ", 1820 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1821 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1822 "MatGetDiagonalBlock_MPISBAIJ", 1823 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1824 PetscFunctionReturn(0); 1825 } 1826 1827 1828 #undef __FUNC__ 1829 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPISBAIJ" 1830 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1831 { 1832 Mat mat; 1833 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 1834 int ierr,len=0; 1835 PetscTruth flg; 1836 1837 PetscFunctionBegin; 1838 *newmat = 0; 1839 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPISBAIJ,"Mat",matin->comm,MatDestroy,MatView); 1840 PLogObjectCreate(mat); 1841 mat->data = (void*)(a = PetscNew(Mat_MPISBAIJ));CHKPTRQ(a); 1842 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1843 mat->ops->destroy = MatDestroy_MPISBAIJ; 1844 mat->ops->view = MatView_MPISBAIJ; 1845 mat->factor = matin->factor; 1846 mat->assembled = PETSC_TRUE; 1847 mat->insertmode = NOT_SET_VALUES; 1848 1849 a->m = mat->m = oldmat->m; 1850 a->n = mat->n = oldmat->n; 1851 a->M = mat->M = oldmat->M; 1852 a->N = mat->N = oldmat->N; 1853 1854 a->bs = oldmat->bs; 1855 a->bs2 = oldmat->bs2; 1856 a->mbs = oldmat->mbs; 1857 a->nbs = oldmat->nbs; 1858 a->Mbs = oldmat->Mbs; 1859 a->Nbs = oldmat->Nbs; 1860 1861 a->rstart = oldmat->rstart; 1862 a->rend = oldmat->rend; 1863 a->cstart = oldmat->cstart; 1864 a->cend = oldmat->cend; 1865 a->size = oldmat->size; 1866 a->rank = oldmat->rank; 1867 a->donotstash = oldmat->donotstash; 1868 a->roworiented = oldmat->roworiented; 1869 a->rowindices = 0; 1870 a->rowvalues = 0; 1871 a->getrowactive = PETSC_FALSE; 1872 a->barray = 0; 1873 a->rstart_bs = oldmat->rstart_bs; 1874 a->rend_bs = oldmat->rend_bs; 1875 a->cstart_bs = oldmat->cstart_bs; 1876 a->cend_bs = oldmat->cend_bs; 1877 1878 /* hash table stuff */ 1879 a->ht = 0; 1880 a->hd = 0; 1881 a->ht_size = 0; 1882 a->ht_flag = oldmat->ht_flag; 1883 a->ht_fact = oldmat->ht_fact; 1884 a->ht_total_ct = 0; 1885 a->ht_insert_ct = 0; 1886 1887 1888 a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 1889 PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1890 a->cowners = a->rowners + a->size + 2; 1891 a->rowners_bs = a->cowners + a->size + 2; 1892 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1893 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1894 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 1895 if (oldmat->colmap) { 1896 #if defined (PETSC_USE_CTABLE) 1897 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1898 #else 1899 a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 1900 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 1901 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 1902 #endif 1903 } else a->colmap = 0; 1904 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 1905 a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray); 1906 PLogObjectMemory(mat,len*sizeof(int)); 1907 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 1908 } else a->garray = 0; 1909 1910 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1911 PLogObjectParent(mat,a->lvec); 1912 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1913 1914 PLogObjectParent(mat,a->Mvctx); 1915 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1916 PLogObjectParent(mat,a->A); 1917 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1918 PLogObjectParent(mat,a->B); 1919 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 1920 ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 1921 if (flg) { 1922 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 1923 } 1924 *newmat = mat; 1925 PetscFunctionReturn(0); 1926 } 1927 1928 #include "petscsys.h" 1929 1930 #undef __FUNC__ 1931 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPISBAIJ" 1932 int MatLoad_MPISBAIJ(Viewer viewer,MatType type,Mat *newmat) 1933 { 1934 Mat A; 1935 int i,nz,ierr,j,rstart,rend,fd; 1936 Scalar *vals,*buf; 1937 MPI_Comm comm = ((PetscObject)viewer)->comm; 1938 MPI_Status status; 1939 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 1940 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 1941 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 1942 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 1943 int dcount,kmax,k,nzcount,tmp; 1944 1945 PetscFunctionBegin; 1946 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1947 1948 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1949 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1950 if (!rank) { 1951 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1952 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1953 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1954 if (header[3] < 0) { 1955 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 1956 } 1957 } 1958 1959 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1960 M = header[1]; N = header[2]; 1961 1962 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1963 1964 /* 1965 This code adds extra rows to make sure the number of rows is 1966 divisible by the blocksize 1967 */ 1968 Mbs = M/bs; 1969 extra_rows = bs - M + bs*(Mbs); 1970 if (extra_rows == bs) extra_rows = 0; 1971 else Mbs++; 1972 if (extra_rows &&!rank) { 1973 PLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"); 1974 } 1975 1976 /* determine ownership of all rows */ 1977 mbs = Mbs/size + ((Mbs % size) > rank); 1978 m = mbs * bs; 1979 rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners); 1980 browners = rowners + size + 1; 1981 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1982 rowners[0] = 0; 1983 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 1984 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 1985 rstart = rowners[rank]; 1986 rend = rowners[rank+1]; 1987 1988 /* distribute row lengths to all processors */ 1989 locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens); 1990 if (!rank) { 1991 rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1992 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1993 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1994 sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts); 1995 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 1996 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 1997 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1998 } else { 1999 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2000 } 2001 2002 if (!rank) { /* procs[0] */ 2003 /* calculate the number of nonzeros on each processor */ 2004 procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz); 2005 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2006 for (i=0; i<size; i++) { 2007 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2008 procsnz[i] += rowlengths[j]; 2009 } 2010 } 2011 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2012 2013 /* determine max buffer needed and allocate it */ 2014 maxnz = 0; 2015 for (i=0; i<size; i++) { 2016 maxnz = PetscMax(maxnz,procsnz[i]); 2017 } 2018 cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols); 2019 2020 /* read in my part of the matrix column indices */ 2021 nz = procsnz[0]; 2022 ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 2023 mycols = ibuf; 2024 if (size == 1) nz -= extra_rows; 2025 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2026 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2027 2028 /* read in every ones (except the last) and ship off */ 2029 for (i=1; i<size-1; i++) { 2030 nz = procsnz[i]; 2031 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2032 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2033 } 2034 /* read in the stuff for the last proc */ 2035 if (size != 1) { 2036 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2037 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2038 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2039 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2040 } 2041 ierr = PetscFree(cols);CHKERRQ(ierr); 2042 } else { /* procs[i], i>0 */ 2043 /* determine buffer space needed for message */ 2044 nz = 0; 2045 for (i=0; i<m; i++) { 2046 nz += locrowlens[i]; 2047 } 2048 ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 2049 mycols = ibuf; 2050 /* receive message of column indices*/ 2051 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2052 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2053 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2054 } 2055 2056 /* loop over local rows, determining number of off diagonal entries */ 2057 dlens = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens); 2058 odlens = dlens + (rend-rstart); 2059 mask = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask); 2060 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2061 masked1 = mask + Mbs; 2062 masked2 = masked1 + Mbs; 2063 rowcount = 0; nzcount = 0; 2064 for (i=0; i<mbs; i++) { 2065 dcount = 0; 2066 odcount = 0; 2067 for (j=0; j<bs; j++) { 2068 kmax = locrowlens[rowcount]; 2069 for (k=0; k<kmax; k++) { 2070 tmp = mycols[nzcount++]/bs; /* block col. index */ 2071 if (!mask[tmp]) { 2072 mask[tmp] = 1; 2073 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2074 else masked1[dcount++] = tmp; /* entry in diag portion */ 2075 } 2076 } 2077 rowcount++; 2078 } 2079 2080 dlens[i] = dcount; /* d_nzz[i] */ 2081 odlens[i] = odcount; /* o_nzz[i] */ 2082 2083 /* zero out the mask elements we set */ 2084 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2085 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2086 } 2087 2088 /* create our matrix */ 2089 ierr = MatCreateMPISBAIJ(comm,bs,m,m,PETSC_DETERMINE,PETSC_DETERMINE,0,dlens,0,odlens,newmat); 2090 CHKERRQ(ierr); 2091 A = *newmat; 2092 MatSetOption(A,MAT_COLUMNS_SORTED); 2093 2094 if (!rank) { 2095 buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf); 2096 /* read in my part of the matrix numerical values */ 2097 nz = procsnz[0]; 2098 vals = buf; 2099 mycols = ibuf; 2100 if (size == 1) nz -= extra_rows; 2101 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2102 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2103 2104 /* insert into matrix */ 2105 jj = rstart*bs; 2106 for (i=0; i<m; i++) { 2107 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2108 mycols += locrowlens[i]; 2109 vals += locrowlens[i]; 2110 jj++; 2111 } 2112 2113 /* read in other processors (except the last one) and ship out */ 2114 for (i=1; i<size-1; i++) { 2115 nz = procsnz[i]; 2116 vals = buf; 2117 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2118 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2119 } 2120 /* the last proc */ 2121 if (size != 1){ 2122 nz = procsnz[i] - extra_rows; 2123 vals = buf; 2124 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2125 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2126 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2127 } 2128 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2129 2130 } else { 2131 /* receive numeric values */ 2132 buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf); 2133 2134 /* receive message of values*/ 2135 vals = buf; 2136 mycols = ibuf; 2137 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2138 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2139 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2140 2141 /* insert into matrix */ 2142 jj = rstart*bs; 2143 for (i=0; i<m; i++) { 2144 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2145 mycols += locrowlens[i]; 2146 vals += locrowlens[i]; 2147 jj++; 2148 } 2149 } 2150 2151 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2152 ierr = PetscFree(buf);CHKERRQ(ierr); 2153 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2154 ierr = PetscFree(rowners);CHKERRQ(ierr); 2155 ierr = PetscFree(dlens);CHKERRQ(ierr); 2156 ierr = PetscFree(mask);CHKERRQ(ierr); 2157 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2158 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2159 PetscFunctionReturn(0); 2160 } 2161 2162 #undef __FUNC__ 2163 #define __FUNC__ /*<a name=""></a>*/"MatMPISBAIJSetHashTableFactor" 2164 /*@ 2165 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2166 2167 Input Parameters: 2168 . mat - the matrix 2169 . fact - factor 2170 2171 Collective on Mat 2172 2173 Level: advanced 2174 2175 Notes: 2176 This can also be set by the command line option: -mat_use_hash_table fact 2177 2178 .keywords: matrix, hashtable, factor, HT 2179 2180 .seealso: MatSetOption() 2181 @*/ 2182 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2183 { 2184 PetscFunctionBegin; 2185 SETERRQ(1,"Function not yet written for SBAIJ format"); 2186 /* PetscFunctionReturn(0); */ 2187 } 2188