1 /*$Id: mpisbaij.c,v 1.26 2000/10/10 20:46:03 hzhang Exp hzhang $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 #include "mpisbaij.h" 6 #include "src/mat/impls/sbaij/seq/sbaij.h" 7 8 extern int MatSetUpMultiply_MPISBAIJ(Mat); 9 extern int DisAssemble_MPISBAIJ(Mat); 10 extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS *,int); 11 extern int MatGetSubMatrices_MPISBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 12 extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *); 13 extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 14 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 15 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**); 16 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**); 17 extern int MatPrintHelp_SeqSBAIJ(Mat); 18 extern int MatZeroRows_SeqSBAIJ(Mat,IS,Scalar*); 19 extern int MatZeroRows_SeqBAIJ(Mat,IS,Scalar *); 20 extern int MatGetRowMax_MPISBAIJ(Mat,Vec); 21 22 /* UGLY, ugly, ugly 23 When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 24 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 25 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 26 converts the entries into single precision and then calls ..._MatScalar() to put them 27 into the single precision data structures. 28 */ 29 #if defined(PETSC_USE_MAT_SINGLE) 30 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 31 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 32 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 33 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 34 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 35 #else 36 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 37 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 38 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 39 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 40 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 41 #endif 42 43 EXTERN_C_BEGIN 44 #undef __FUNC__ 45 #define __FUNC__ /*<a name="MatStoreValues_MPISBAIJ"></a>*/"MatStoreValues_MPISBAIJ" 46 int MatStoreValues_MPISBAIJ(Mat mat) 47 { 48 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 49 int ierr; 50 51 PetscFunctionBegin; 52 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 53 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 54 PetscFunctionReturn(0); 55 } 56 EXTERN_C_END 57 58 EXTERN_C_BEGIN 59 #undef __FUNC__ 60 #define __FUNC__ /*<a name="MatRetrieveValues_MPISBAIJ"></a>*/"MatRetrieveValues_MPISBAIJ" 61 int MatRetrieveValues_MPISBAIJ(Mat mat) 62 { 63 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 64 int ierr; 65 66 PetscFunctionBegin; 67 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 68 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 69 PetscFunctionReturn(0); 70 } 71 EXTERN_C_END 72 73 /* 74 Local utility routine that creates a mapping from the global column 75 number to the local number in the off-diagonal part of the local 76 storage of the matrix. This is done in a non scable way since the 77 length of colmap equals the global matrix length. 78 */ 79 #undef __FUNC__ 80 #define __FUNC__ /*<a name="CreateColmap_MPISBAIJ_Private"></a>*/"CreateColmap_MPISBAIJ_Private" 81 static int CreateColmap_MPISBAIJ_Private(Mat mat) 82 { 83 PetscFunctionBegin; 84 SETERRQ(1,"Function not yet written for SBAIJ format"); 85 /* PetscFunctionReturn(0); */ 86 } 87 88 #define CHUNKSIZE 10 89 90 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 91 { \ 92 \ 93 brow = row/bs; \ 94 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 95 rmax = aimax[brow]; nrow = ailen[brow]; \ 96 bcol = col/bs; \ 97 ridx = row % bs; cidx = col % bs; \ 98 low = 0; high = nrow; \ 99 while (high-low > 3) { \ 100 t = (low+high)/2; \ 101 if (rp[t] > bcol) high = t; \ 102 else low = t; \ 103 } \ 104 for (_i=low; _i<high; _i++) { \ 105 if (rp[_i] > bcol) break; \ 106 if (rp[_i] == bcol) { \ 107 bap = ap + bs2*_i + bs*cidx + ridx; \ 108 if (addv == ADD_VALUES) *bap += value; \ 109 else *bap = value; \ 110 goto a_noinsert; \ 111 } \ 112 } \ 113 if (a->nonew == 1) goto a_noinsert; \ 114 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 115 if (nrow >= rmax) { \ 116 /* there is no extra room in row, therefore enlarge */ \ 117 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 118 MatScalar *new_a; \ 119 \ 120 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 121 \ 122 /* malloc new storage space */ \ 123 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 124 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 125 new_j = (int*)(new_a + bs2*new_nz); \ 126 new_i = new_j + new_nz; \ 127 \ 128 /* copy over old data into new slots */ \ 129 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 130 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 131 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 132 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 133 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 134 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 135 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 136 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 137 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 138 /* free up old matrix storage */ \ 139 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 140 if (!a->singlemalloc) { \ 141 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 142 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 143 } \ 144 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 145 a->singlemalloc = PETSC_TRUE; \ 146 \ 147 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 148 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 149 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 150 a->s_maxnz += bs2*CHUNKSIZE; \ 151 a->reallocs++; \ 152 a->s_nz++; \ 153 } \ 154 N = nrow++ - 1; \ 155 /* shift up all the later entries in this row */ \ 156 for (ii=N; ii>=_i; ii--) { \ 157 rp[ii+1] = rp[ii]; \ 158 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 159 } \ 160 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 161 rp[_i] = bcol; \ 162 ap[bs2*_i + bs*cidx + ridx] = value; \ 163 a_noinsert:; \ 164 ailen[brow] = nrow; \ 165 } 166 #ifndef MatSetValues_SeqBAIJ_B_Private 167 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 168 { \ 169 brow = row/bs; \ 170 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 171 rmax = bimax[brow]; nrow = bilen[brow]; \ 172 bcol = col/bs; \ 173 ridx = row % bs; cidx = col % bs; \ 174 low = 0; high = nrow; \ 175 while (high-low > 3) { \ 176 t = (low+high)/2; \ 177 if (rp[t] > bcol) high = t; \ 178 else low = t; \ 179 } \ 180 for (_i=low; _i<high; _i++) { \ 181 if (rp[_i] > bcol) break; \ 182 if (rp[_i] == bcol) { \ 183 bap = ap + bs2*_i + bs*cidx + ridx; \ 184 if (addv == ADD_VALUES) *bap += value; \ 185 else *bap = value; \ 186 goto b_noinsert; \ 187 } \ 188 } \ 189 if (b->nonew == 1) goto b_noinsert; \ 190 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 191 if (nrow >= rmax) { \ 192 /* there is no extra room in row, therefore enlarge */ \ 193 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 194 MatScalar *new_a; \ 195 \ 196 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 197 \ 198 /* malloc new storage space */ \ 199 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 200 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 201 new_j = (int*)(new_a + bs2*new_nz); \ 202 new_i = new_j + new_nz; \ 203 \ 204 /* copy over old data into new slots */ \ 205 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 206 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 207 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 208 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 209 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 210 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 211 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 212 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 213 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 214 /* free up old matrix storage */ \ 215 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 216 if (!b->singlemalloc) { \ 217 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 218 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 219 } \ 220 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 221 b->singlemalloc = PETSC_TRUE; \ 222 \ 223 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 224 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 225 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 226 b->maxnz += bs2*CHUNKSIZE; \ 227 b->reallocs++; \ 228 b->nz++; \ 229 } \ 230 N = nrow++ - 1; \ 231 /* shift up all the later entries in this row */ \ 232 for (ii=N; ii>=_i; ii--) { \ 233 rp[ii+1] = rp[ii]; \ 234 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 235 } \ 236 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 237 rp[_i] = bcol; \ 238 ap[bs2*_i + bs*cidx + ridx] = value; \ 239 b_noinsert:; \ 240 bilen[brow] = nrow; \ 241 } 242 #endif 243 244 #if defined(PETSC_USE_MAT_SINGLE) 245 #undef __FUNC__ 246 #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ"></a>*/"MatSetValues_MPISBAIJ" 247 int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 248 { 249 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 250 int ierr,i,N = m*n; 251 MatScalar *vsingle; 252 253 PetscFunctionBegin; 254 if (N > b->setvalueslen) { 255 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 256 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 257 b->setvalueslen = N; 258 } 259 vsingle = b->setvaluescopy; 260 261 for (i=0; i<N; i++) { 262 vsingle[i] = v[i]; 263 } 264 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 265 PetscFunctionReturn(0); 266 } 267 268 #undef __FUNC__ 269 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ"></a>*/"MatSetValuesBlocked_MPISBAIJ" 270 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 271 { 272 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 273 int ierr,i,N = m*n*b->bs2; 274 MatScalar *vsingle; 275 276 PetscFunctionBegin; 277 if (N > b->setvalueslen) { 278 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 279 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 280 b->setvalueslen = N; 281 } 282 vsingle = b->setvaluescopy; 283 for (i=0; i<N; i++) { 284 vsingle[i] = v[i]; 285 } 286 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 287 PetscFunctionReturn(0); 288 } 289 290 #undef __FUNC__ 291 #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ_HT"></a>*/"MatSetValues_MPISBAIJ_HT" 292 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 293 { 294 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 295 int ierr,i,N = m*n; 296 MatScalar *vsingle; 297 298 PetscFunctionBegin; 299 SETERRQ(1,"Function not yet written for SBAIJ format"); 300 /* PetscFunctionReturn(0); */ 301 } 302 303 #undef __FUNC__ 304 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ_HT"></a>*/"MatSetValuesBlocked_MPISBAIJ_HT" 305 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 306 { 307 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 308 int ierr,i,N = m*n*b->bs2; 309 MatScalar *vsingle; 310 311 PetscFunctionBegin; 312 SETERRQ(1,"Function not yet written for SBAIJ format"); 313 /* PetscFunctionReturn(0); */ 314 } 315 #endif 316 317 /* Only add/insert a(i,j) with i<=j (blocks). 318 Any a(i,j) with i>j input by user is ingored. 319 */ 320 #undef __FUNC__ 321 #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ" 322 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 323 { 324 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 325 MatScalar value; 326 int ierr,i,j,row,col; 327 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 328 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 329 int cend_orig=baij->cend_bs,bs=baij->bs; 330 331 /* Some Variables required in the macro */ 332 Mat A = baij->A; 333 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 334 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 335 MatScalar *aa=a->a; 336 337 Mat B = baij->B; 338 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 339 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 340 MatScalar *ba=b->a; 341 342 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 343 int low,high,t,ridx,cidx,bs2=a->bs2; 344 MatScalar *ap,*bap; 345 346 /* for stash */ 347 int n_loc, *in_loc=0; 348 MatScalar *v_loc=0; 349 350 PetscFunctionBegin; 351 352 if(!baij->donotstash){ 353 in_loc = (int*)PetscMalloc(n*sizeof(int));CHKPTRQ(in_loc); 354 v_loc = (MatScalar*)PetscMalloc(n*sizeof(MatScalar));CHKPTRQ(v_loc); 355 } 356 357 for (i=0; i<m; i++) { 358 if (im[i] < 0) continue; 359 #if defined(PETSC_USE_BOPT_g) 360 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 361 #endif 362 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 363 row = im[i] - rstart_orig; /* local row index */ 364 for (j=0; j<n; j++) { 365 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 366 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 367 col = in[j] - cstart_orig; /* local col index */ 368 brow = row/bs; bcol = col/bs; 369 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 370 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 371 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 372 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 373 } else if (in[j] < 0) continue; 374 #if defined(PETSC_USE_BOPT_g) 375 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Col too large");} 376 #endif 377 else { /* off-diag entry (B) */ 378 if (mat->was_assembled) { 379 if (!baij->colmap) { 380 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 381 } 382 #if defined (PETSC_USE_CTABLE) 383 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 384 col = col - 1 + in[j]%bs; 385 #else 386 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 387 #endif 388 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 389 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 390 col = in[j]; 391 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 392 B = baij->B; 393 b = (Mat_SeqBAIJ*)(B)->data; 394 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 395 ba=b->a; 396 } 397 } else col = in[j]; 398 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 399 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 400 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 401 } 402 } 403 } else { /* off processor entry */ 404 if (!baij->donotstash) { 405 n_loc = 0; 406 for (j=0; j<n; j++){ 407 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 408 in_loc[n_loc] = in[j]; 409 if (roworiented) { 410 v_loc[n_loc] = v[i*n+j]; 411 } else { 412 v_loc[n_loc] = v[j*m+i]; 413 } 414 n_loc++; 415 } 416 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 417 } 418 } 419 } 420 421 if(!baij->donotstash){ 422 ierr = PetscFree(in_loc);CHKERRQ(ierr); 423 ierr = PetscFree(v_loc);CHKERRQ(ierr); 424 } 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNC__ 429 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ"></a>*/"MatSetValuesBlocked_MPISBAIJ" 430 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 431 { 432 PetscFunctionBegin; 433 SETERRQ(1,"Function not yet written for SBAIJ format"); 434 /* PetscFunctionReturn(0); */ 435 } 436 437 #define HASH_KEY 0.6180339887 438 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 439 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 440 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 441 #undef __FUNC__ 442 #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ_HT_MatScalar"></a>*/"MatSetValues_MPISBAIJ_HT_MatScalar" 443 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 444 { 445 PetscFunctionBegin; 446 SETERRQ(1,"Function not yet written for SBAIJ format"); 447 /* PetscFunctionReturn(0); */ 448 } 449 450 #undef __FUNC__ 451 #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPISBAIJ_HT_MatScalar" 452 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 453 { 454 PetscFunctionBegin; 455 SETERRQ(1,"Function not yet written for SBAIJ format"); 456 /* PetscFunctionReturn(0); */ 457 } 458 459 #undef __FUNC__ 460 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPISBAIJ" 461 int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 462 { 463 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 464 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 465 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 466 467 PetscFunctionBegin; 468 for (i=0; i<m; i++) { 469 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 470 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 471 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 472 row = idxm[i] - bsrstart; 473 for (j=0; j<n; j++) { 474 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 475 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 476 if (idxn[j] >= bscstart && idxn[j] < bscend){ 477 col = idxn[j] - bscstart; 478 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 479 } else { 480 if (!baij->colmap) { 481 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 482 } 483 #if defined (PETSC_USE_CTABLE) 484 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 485 data --; 486 #else 487 data = baij->colmap[idxn[j]/bs]-1; 488 #endif 489 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 490 else { 491 col = data + idxn[j]%bs; 492 ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 493 } 494 } 495 } 496 } else { 497 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 498 } 499 } 500 PetscFunctionReturn(0); 501 } 502 503 #undef __FUNC__ 504 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPISBAIJ" 505 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 506 { 507 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 508 /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */ 509 /* Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ*)baij->B->data; */ 510 int ierr; 511 PetscReal sum[2],*lnorm2; 512 513 PetscFunctionBegin; 514 if (baij->size == 1) { 515 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 516 } else { 517 if (type == NORM_FROBENIUS) { 518 lnorm2 = (double*)PetscMalloc(2*sizeof(double));CHKPTRQ(lnorm2); 519 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 520 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 521 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 522 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 523 /* 524 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 525 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]); 526 */ 527 ierr = MPI_Allreduce(lnorm2,&sum,2,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 528 /* 529 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]); 530 PetscSynchronizedFlush(PETSC_COMM_WORLD); */ 531 532 *norm = sqrt(sum[0] + 2*sum[1]); 533 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 534 } else { 535 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 536 } 537 } 538 PetscFunctionReturn(0); 539 } 540 541 /* 542 Creates the hash table, and sets the table 543 This table is created only once. 544 If new entried need to be added to the matrix 545 then the hash table has to be destroyed and 546 recreated. 547 */ 548 #undef __FUNC__ 549 #define __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPISBAIJ_Private" 550 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor) 551 { 552 PetscFunctionBegin; 553 SETERRQ(1,"Function not yet written for SBAIJ format"); 554 /* PetscFunctionReturn(0); */ 555 } 556 557 #undef __FUNC__ 558 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPISBAIJ" 559 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 560 { 561 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 562 int ierr,nstash,reallocs; 563 InsertMode addv; 564 565 PetscFunctionBegin; 566 if (baij->donotstash) { 567 PetscFunctionReturn(0); 568 } 569 570 /* make sure all processors are either in INSERTMODE or ADDMODE */ 571 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 572 if (addv == (ADD_VALUES|INSERT_VALUES)) { 573 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 574 } 575 mat->insertmode = addv; /* in case this processor had no cache */ 576 577 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 578 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 579 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 580 PLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 581 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 582 PLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 583 PetscFunctionReturn(0); 584 } 585 586 #undef __FUNC__ 587 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPISBAIJ" 588 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 589 { 590 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 591 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 592 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 593 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 594 int *row,*col,other_disassembled; 595 PetscTruth r1,r2,r3; 596 MatScalar *val; 597 InsertMode addv = mat->insertmode; 598 int rank; 599 600 PetscFunctionBegin; 601 /* remove 2 line below later */ 602 ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); 603 604 if (!baij->donotstash) { 605 while (1) { 606 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 607 /* 608 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg); 609 PetscSynchronizedFlush(PETSC_COMM_WORLD); 610 */ 611 if (!flg) break; 612 613 for (i=0; i<n;) { 614 /* Now identify the consecutive vals belonging to the same row */ 615 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 616 if (j < n) ncols = j-i; 617 else ncols = n-i; 618 /* Now assemble all these values with a single function call */ 619 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 620 i = j; 621 } 622 } 623 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 624 /* Now process the block-stash. Since the values are stashed column-oriented, 625 set the roworiented flag to column oriented, and after MatSetValues() 626 restore the original flags */ 627 r1 = baij->roworiented; 628 r2 = a->roworiented; 629 r3 = b->roworiented; 630 baij->roworiented = PETSC_FALSE; 631 a->roworiented = PETSC_FALSE; 632 b->roworiented = PETSC_FALSE; 633 while (1) { 634 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 635 if (!flg) break; 636 637 for (i=0; i<n;) { 638 /* Now identify the consecutive vals belonging to the same row */ 639 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 640 if (j < n) ncols = j-i; 641 else ncols = n-i; 642 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 643 i = j; 644 } 645 } 646 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 647 baij->roworiented = r1; 648 a->roworiented = r2; 649 b->roworiented = r3; 650 } 651 652 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 653 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 654 655 /* determine if any processor has disassembled, if so we must 656 also disassemble ourselfs, in order that we may reassemble. */ 657 /* 658 if nonzero structure of submatrix B cannot change then we know that 659 no processor disassembled thus we can skip this stuff 660 */ 661 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 662 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 663 if (mat->was_assembled && !other_disassembled) { 664 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 665 } 666 } 667 668 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 669 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); 670 } 671 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 672 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 673 674 #if defined(PETSC_USE_BOPT_g) 675 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 676 PLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct); 677 baij->ht_total_ct = 0; 678 baij->ht_insert_ct = 0; 679 } 680 #endif 681 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 682 ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 683 mat->ops->setvalues = MatSetValues_MPISBAIJ_HT; 684 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT; 685 } 686 687 if (baij->rowvalues) { 688 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 689 baij->rowvalues = 0; 690 } 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNC__ 695 #define __FUNC__ /*<a name=""></a>*/"MatView_MPISBAIJ_ASCIIorDraworSocket" 696 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 697 { 698 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 699 int ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank; 700 PetscTruth isascii,isdraw; 701 Viewer sviewer; 702 703 PetscFunctionBegin; 704 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 705 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 706 if (isascii) { 707 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 708 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 709 MatInfo info; 710 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 711 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 712 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 713 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 714 baij->bs,(int)info.memory);CHKERRQ(ierr); 715 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 716 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 717 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 718 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 719 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 720 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 721 PetscFunctionReturn(0); 722 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 723 ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 724 PetscFunctionReturn(0); 725 } 726 } 727 728 if (isdraw) { 729 Draw draw; 730 PetscTruth isnull; 731 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 732 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 733 } 734 735 if (size == 1) { 736 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 737 } else { 738 /* assemble the entire matrix onto first processor. */ 739 Mat A; 740 Mat_SeqSBAIJ *Aloc; 741 Mat_SeqBAIJ *Bloc; 742 int M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 743 MatScalar *a; 744 745 if (!rank) { 746 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 747 } else { 748 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 749 } 750 PLogObjectParent(mat,A); 751 752 /* copy over the A part */ 753 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 754 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 755 rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 756 757 for (i=0; i<mbs; i++) { 758 rvals[0] = bs*(baij->rstart + i); 759 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 760 for (j=ai[i]; j<ai[i+1]; j++) { 761 col = (baij->cstart+aj[j])*bs; 762 for (k=0; k<bs; k++) { 763 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 764 col++; a += bs; 765 } 766 } 767 } 768 /* copy over the B part */ 769 Bloc = (Mat_SeqBAIJ*)baij->B->data; 770 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 771 for (i=0; i<mbs; i++) { 772 rvals[0] = bs*(baij->rstart + i); 773 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 774 for (j=ai[i]; j<ai[i+1]; j++) { 775 col = baij->garray[aj[j]]*bs; 776 for (k=0; k<bs; k++) { 777 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 778 col++; a += bs; 779 } 780 } 781 } 782 ierr = PetscFree(rvals);CHKERRQ(ierr); 783 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 784 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 785 /* 786 Everyone has to call to draw the matrix since the graphics waits are 787 synchronized across all processors that share the Draw object 788 */ 789 ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 790 if (!rank) { 791 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 792 } 793 ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 794 ierr = MatDestroy(A);CHKERRQ(ierr); 795 } 796 PetscFunctionReturn(0); 797 } 798 799 #undef __FUNC__ 800 #define __FUNC__ /*<a name=""></a>*/"MatView_MPISBAIJ" 801 int MatView_MPISBAIJ(Mat mat,Viewer viewer) 802 { 803 int ierr; 804 PetscTruth isascii,isdraw,issocket,isbinary; 805 806 PetscFunctionBegin; 807 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 808 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 809 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 810 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 811 if (isascii || isdraw || issocket || isbinary) { 812 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 813 } else { 814 SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 815 } 816 PetscFunctionReturn(0); 817 } 818 819 #undef __FUNC__ 820 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPISBAIJ" 821 int MatDestroy_MPISBAIJ(Mat mat) 822 { 823 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 824 int ierr; 825 826 PetscFunctionBegin; 827 828 if (mat->mapping) { 829 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 830 } 831 if (mat->bmapping) { 832 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 833 } 834 if (mat->rmap) { 835 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 836 } 837 if (mat->cmap) { 838 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 839 } 840 #if defined(PETSC_USE_LOG) 841 PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N); 842 #endif 843 844 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 845 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 846 847 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 848 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 849 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 850 #if defined (PETSC_USE_CTABLE) 851 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 852 #else 853 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 854 #endif 855 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 856 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 857 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 858 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 859 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 860 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 861 #if defined(PETSC_USE_MAT_SINGLE) 862 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 863 #endif 864 ierr = PetscFree(baij);CHKERRQ(ierr); 865 PLogObjectDestroy(mat); 866 PetscHeaderDestroy(mat); 867 PetscFunctionReturn(0); 868 } 869 870 #undef __FUNC__ 871 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPISBAIJ" 872 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 873 { 874 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 875 int ierr,nt; 876 877 PetscFunctionBegin; 878 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 879 if (nt != a->n) { 880 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 881 } 882 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 883 if (nt != a->m) { 884 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 885 } 886 887 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 888 /* do diagonal part */ 889 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 890 /* do supperdiagonal part */ 891 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 892 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 893 /* do subdiagonal part */ 894 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 895 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 896 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 897 898 PetscFunctionReturn(0); 899 } 900 901 #undef __FUNC__ 902 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPISBAIJ" 903 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 904 { 905 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 906 int ierr; 907 908 PetscFunctionBegin; 909 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 910 /* do diagonal part */ 911 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 912 /* do supperdiagonal part */ 913 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 914 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 915 916 /* do subdiagonal part */ 917 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 918 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 919 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 920 921 PetscFunctionReturn(0); 922 } 923 924 #undef __FUNC__ 925 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPISBAIJ" 926 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 927 { 928 PetscFunctionBegin; 929 SETERRQ(1,"Matrix is symmetric. Call MatMult()."); 930 /* PetscFunctionReturn(0); */ 931 } 932 933 #undef __FUNC__ 934 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPISBAIJ" 935 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 936 { 937 PetscFunctionBegin; 938 SETERRQ(1,"Matrix is symmetric. Call MatMultAdd()."); 939 /* PetscFunctionReturn(0); */ 940 } 941 942 /* 943 This only works correctly for square matrices where the subblock A->A is the 944 diagonal block 945 */ 946 #undef __FUNC__ 947 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPISBAIJ" 948 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 949 { 950 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 951 int ierr; 952 953 PetscFunctionBegin; 954 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 955 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 956 PetscFunctionReturn(0); 957 } 958 959 #undef __FUNC__ 960 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPISBAIJ" 961 int MatScale_MPISBAIJ(Scalar *aa,Mat A) 962 { 963 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 964 int ierr; 965 966 PetscFunctionBegin; 967 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 968 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } 971 972 #undef __FUNC__ 973 #define __FUNC__ /*<a name=""></a>*/"MatGetSize_MPISBAIJ" 974 int MatGetSize_MPISBAIJ(Mat matin,int *m,int *n) 975 { 976 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 977 978 PetscFunctionBegin; 979 if (m) *m = mat->M; 980 if (n) *n = mat->N; 981 PetscFunctionReturn(0); 982 } 983 984 #undef __FUNC__ 985 #define __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPISBAIJ" 986 int MatGetLocalSize_MPISBAIJ(Mat matin,int *m,int *n) 987 { 988 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 989 990 PetscFunctionBegin; 991 *m = mat->m; *n = mat->n; 992 PetscFunctionReturn(0); 993 } 994 995 #undef __FUNC__ 996 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPISBAIJ" 997 int MatGetOwnershipRange_MPISBAIJ(Mat matin,int *m,int *n) 998 { 999 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1000 1001 PetscFunctionBegin; 1002 if (m) *m = mat->rstart*mat->bs; 1003 if (n) *n = mat->rend*mat->bs; 1004 PetscFunctionReturn(0); 1005 } 1006 1007 #undef __FUNC__ 1008 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPISBAIJ" 1009 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1010 { 1011 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1012 Scalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1013 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1014 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1015 int *cmap,*idx_p,cstart = mat->cstart; 1016 1017 PetscFunctionBegin; 1018 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1019 mat->getrowactive = PETSC_TRUE; 1020 1021 if (!mat->rowvalues && (idx || v)) { 1022 /* 1023 allocate enough space to hold information from the longest row. 1024 */ 1025 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1026 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1027 int max = 1,mbs = mat->mbs,tmp; 1028 for (i=0; i<mbs; i++) { 1029 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1030 if (max < tmp) { max = tmp; } 1031 } 1032 mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1033 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1034 } 1035 1036 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1037 lrow = row - brstart; /* local row index */ 1038 1039 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1040 if (!v) {pvA = 0; pvB = 0;} 1041 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1042 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1043 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1044 nztot = nzA + nzB; 1045 1046 cmap = mat->garray; 1047 if (v || idx) { 1048 if (nztot) { 1049 /* Sort by increasing column numbers, assuming A and B already sorted */ 1050 int imark = -1; 1051 if (v) { 1052 *v = v_p = mat->rowvalues; 1053 for (i=0; i<nzB; i++) { 1054 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1055 else break; 1056 } 1057 imark = i; 1058 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1059 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1060 } 1061 if (idx) { 1062 *idx = idx_p = mat->rowindices; 1063 if (imark > -1) { 1064 for (i=0; i<imark; i++) { 1065 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1066 } 1067 } else { 1068 for (i=0; i<nzB; i++) { 1069 if (cmap[cworkB[i]/bs] < cstart) 1070 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1071 else break; 1072 } 1073 imark = i; 1074 } 1075 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1076 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1077 } 1078 } else { 1079 if (idx) *idx = 0; 1080 if (v) *v = 0; 1081 } 1082 } 1083 *nz = nztot; 1084 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1085 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #undef __FUNC__ 1090 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPISBAIJ" 1091 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1092 { 1093 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1094 1095 PetscFunctionBegin; 1096 if (baij->getrowactive == PETSC_FALSE) { 1097 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1098 } 1099 baij->getrowactive = PETSC_FALSE; 1100 PetscFunctionReturn(0); 1101 } 1102 1103 #undef __FUNC__ 1104 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPISBAIJ" 1105 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs) 1106 { 1107 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1108 1109 PetscFunctionBegin; 1110 *bs = baij->bs; 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNC__ 1115 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPISBAIJ" 1116 int MatZeroEntries_MPISBAIJ(Mat A) 1117 { 1118 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1119 int ierr; 1120 1121 PetscFunctionBegin; 1122 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1123 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1124 PetscFunctionReturn(0); 1125 } 1126 1127 #undef __FUNC__ 1128 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPISBAIJ" 1129 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1130 { 1131 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1132 Mat A = a->A,B = a->B; 1133 int ierr; 1134 PetscReal isend[5],irecv[5]; 1135 1136 PetscFunctionBegin; 1137 info->block_size = (double)a->bs; 1138 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1139 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1140 isend[3] = info->memory; isend[4] = info->mallocs; 1141 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1142 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1143 isend[3] += info->memory; isend[4] += info->mallocs; 1144 if (flag == MAT_LOCAL) { 1145 info->nz_used = isend[0]; 1146 info->nz_allocated = isend[1]; 1147 info->nz_unneeded = isend[2]; 1148 info->memory = isend[3]; 1149 info->mallocs = isend[4]; 1150 } else if (flag == MAT_GLOBAL_MAX) { 1151 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1152 info->nz_used = irecv[0]; 1153 info->nz_allocated = irecv[1]; 1154 info->nz_unneeded = irecv[2]; 1155 info->memory = irecv[3]; 1156 info->mallocs = irecv[4]; 1157 } else if (flag == MAT_GLOBAL_SUM) { 1158 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1159 info->nz_used = irecv[0]; 1160 info->nz_allocated = irecv[1]; 1161 info->nz_unneeded = irecv[2]; 1162 info->memory = irecv[3]; 1163 info->mallocs = irecv[4]; 1164 } else { 1165 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1166 } 1167 info->rows_global = (double)a->M; 1168 info->columns_global = (double)a->N; 1169 info->rows_local = (double)a->m; 1170 info->columns_local = (double)a->N; 1171 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1172 info->fill_ratio_needed = 0; 1173 info->factor_mallocs = 0; 1174 PetscFunctionReturn(0); 1175 } 1176 1177 #undef __FUNC__ 1178 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPISBAIJ" 1179 int MatSetOption_MPISBAIJ(Mat A,MatOption op) 1180 { 1181 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1182 int ierr; 1183 1184 PetscFunctionBegin; 1185 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1186 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1187 op == MAT_COLUMNS_UNSORTED || 1188 op == MAT_COLUMNS_SORTED || 1189 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1190 op == MAT_KEEP_ZEROED_ROWS || 1191 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1192 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1193 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1194 } else if (op == MAT_ROW_ORIENTED) { 1195 a->roworiented = PETSC_TRUE; 1196 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1197 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1198 } else if (op == MAT_ROWS_SORTED || 1199 op == MAT_ROWS_UNSORTED || 1200 op == MAT_SYMMETRIC || 1201 op == MAT_STRUCTURALLY_SYMMETRIC || 1202 op == MAT_YES_NEW_DIAGONALS || 1203 op == MAT_USE_HASH_TABLE) { 1204 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1205 } else if (op == MAT_COLUMN_ORIENTED) { 1206 a->roworiented = PETSC_FALSE; 1207 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1208 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1209 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1210 a->donotstash = PETSC_TRUE; 1211 } else if (op == MAT_NO_NEW_DIAGONALS) { 1212 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1213 } else if (op == MAT_USE_HASH_TABLE) { 1214 a->ht_flag = PETSC_TRUE; 1215 } else { 1216 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1217 } 1218 PetscFunctionReturn(0); 1219 } 1220 1221 #undef __FUNC__ 1222 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPISBAIJ(" 1223 int MatTranspose_MPISBAIJ(Mat A,Mat *matout) 1224 { 1225 PetscFunctionBegin; 1226 SETERRQ(1,"Matrix is symmetric. MatTranspose() should not be called"); 1227 /* PetscFunctionReturn(0); */ 1228 } 1229 1230 #undef __FUNC__ 1231 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPISBAIJ" 1232 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1233 { 1234 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1235 Mat a = baij->A,b = baij->B; 1236 int ierr,s1,s2,s3; 1237 1238 PetscFunctionBegin; 1239 if (ll != rr) { 1240 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1241 } 1242 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1243 if (rr) { 1244 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1245 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1246 /* Overlap communication with computation. */ 1247 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1248 /*} if (ll) { */ 1249 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1250 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1251 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1252 /* } */ 1253 /* scale the diagonal block */ 1254 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1255 1256 /* if (rr) { */ 1257 /* Do a scatter end and then right scale the off-diagonal block */ 1258 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1259 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1260 } 1261 1262 PetscFunctionReturn(0); 1263 } 1264 1265 #undef __FUNC__ 1266 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPISBAIJ" 1267 int MatZeroRows_MPISBAIJ(Mat A,IS is,Scalar *diag) 1268 { 1269 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1270 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1271 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1272 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1273 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1274 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1275 MPI_Comm comm = A->comm; 1276 MPI_Request *send_waits,*recv_waits; 1277 MPI_Status recv_status,*send_status; 1278 IS istmp; 1279 1280 PetscFunctionBegin; 1281 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 1282 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1283 1284 /* first count number of contributors to each processor */ 1285 nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs); 1286 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1287 procs = nprocs + size; 1288 owner = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 1289 for (i=0; i<N; i++) { 1290 idx = rows[i]; 1291 found = 0; 1292 for (j=0; j<size; j++) { 1293 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1294 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1295 } 1296 } 1297 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1298 } 1299 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 1300 1301 /* inform other processors of number of messages and max length*/ 1302 work = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work); 1303 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 1304 nmax = work[rank]; 1305 nrecvs = work[size+rank]; 1306 ierr = PetscFree(work);CHKERRQ(ierr); 1307 1308 /* post receives: */ 1309 rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 1310 recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1311 for (i=0; i<nrecvs; i++) { 1312 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1313 } 1314 1315 /* do sends: 1316 1) starts[i] gives the starting index in svalues for stuff going to 1317 the ith processor 1318 */ 1319 svalues = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues); 1320 send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1321 starts = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts); 1322 starts[0] = 0; 1323 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1324 for (i=0; i<N; i++) { 1325 svalues[starts[owner[i]]++] = rows[i]; 1326 } 1327 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1328 1329 starts[0] = 0; 1330 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1331 count = 0; 1332 for (i=0; i<size; i++) { 1333 if (procs[i]) { 1334 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1335 } 1336 } 1337 ierr = PetscFree(starts);CHKERRQ(ierr); 1338 1339 base = owners[rank]*bs; 1340 1341 /* wait on receives */ 1342 lens = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens); 1343 source = lens + nrecvs; 1344 count = nrecvs; slen = 0; 1345 while (count) { 1346 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1347 /* unpack receives into our local space */ 1348 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1349 source[imdex] = recv_status.MPI_SOURCE; 1350 lens[imdex] = n; 1351 slen += n; 1352 count--; 1353 } 1354 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1355 1356 /* move the data into the send scatter */ 1357 lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows); 1358 count = 0; 1359 for (i=0; i<nrecvs; i++) { 1360 values = rvalues + i*nmax; 1361 for (j=0; j<lens[i]; j++) { 1362 lrows[count++] = values[j] - base; 1363 } 1364 } 1365 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1366 ierr = PetscFree(lens);CHKERRQ(ierr); 1367 ierr = PetscFree(owner);CHKERRQ(ierr); 1368 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1369 1370 /* actually zap the local rows */ 1371 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1372 PLogObjectParent(A,istmp); 1373 1374 /* 1375 Zero the required rows. If the "diagonal block" of the matrix 1376 is square and the user wishes to set the diagonal we use seperate 1377 code so that MatSetValues() is not called for each diagonal allocating 1378 new memory, thus calling lots of mallocs and slowing things down. 1379 1380 Contributed by: Mathew Knepley 1381 */ 1382 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1383 ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1384 if (diag && (l->A->M == l->A->N)) { 1385 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1386 } else if (diag) { 1387 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1388 if (((Mat_SeqSBAIJ*)l->A->data)->nonew) { 1389 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1390 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1391 } 1392 for (i=0; i<slen; i++) { 1393 row = lrows[i] + rstart_bs; 1394 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1395 } 1396 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1397 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1398 } else { 1399 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1400 } 1401 1402 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1403 ierr = PetscFree(lrows);CHKERRQ(ierr); 1404 1405 /* wait on sends */ 1406 if (nsends) { 1407 send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1408 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1409 ierr = PetscFree(send_status);CHKERRQ(ierr); 1410 } 1411 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1412 ierr = PetscFree(svalues);CHKERRQ(ierr); 1413 1414 PetscFunctionReturn(0); 1415 } 1416 1417 #undef __FUNC__ 1418 #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPISBAIJ" 1419 int MatPrintHelp_MPISBAIJ(Mat A) 1420 { 1421 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1422 MPI_Comm comm = A->comm; 1423 static int called = 0; 1424 int ierr; 1425 1426 PetscFunctionBegin; 1427 if (!a->rank) { 1428 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1429 } 1430 if (called) {PetscFunctionReturn(0);} else called = 1; 1431 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1432 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNC__ 1437 #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPISBAIJ" 1438 int MatSetUnfactored_MPISBAIJ(Mat A) 1439 { 1440 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1441 int ierr; 1442 1443 PetscFunctionBegin; 1444 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1445 PetscFunctionReturn(0); 1446 } 1447 1448 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1449 1450 #undef __FUNC__ 1451 #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPISBAIJ" 1452 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1453 { 1454 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1455 Mat a,b,c,d; 1456 PetscTruth flg; 1457 int ierr; 1458 1459 PetscFunctionBegin; 1460 if (B->type != MATMPISBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1461 a = matA->A; b = matA->B; 1462 c = matB->A; d = matB->B; 1463 1464 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1465 if (flg == PETSC_TRUE) { 1466 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1467 } 1468 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1469 PetscFunctionReturn(0); 1470 } 1471 1472 /* -------------------------------------------------------------------*/ 1473 static struct _MatOps MatOps_Values = { 1474 MatSetValues_MPISBAIJ, 1475 MatGetRow_MPISBAIJ, 1476 MatRestoreRow_MPISBAIJ, 1477 MatMult_MPISBAIJ, 1478 MatMultAdd_MPISBAIJ, 1479 MatMultTranspose_MPISBAIJ, 1480 MatMultTransposeAdd_MPISBAIJ, 1481 0, 1482 0, 1483 0, 1484 0, 1485 0, 1486 0, 1487 0, 1488 MatTranspose_MPISBAIJ, 1489 MatGetInfo_MPISBAIJ, 1490 MatEqual_MPISBAIJ, 1491 MatGetDiagonal_MPISBAIJ, 1492 MatDiagonalScale_MPISBAIJ, 1493 MatNorm_MPISBAIJ, 1494 MatAssemblyBegin_MPISBAIJ, 1495 MatAssemblyEnd_MPISBAIJ, 1496 0, 1497 MatSetOption_MPISBAIJ, 1498 MatZeroEntries_MPISBAIJ, 1499 MatZeroRows_MPISBAIJ, 1500 0, 1501 0, 1502 0, 1503 0, 1504 MatGetSize_MPISBAIJ, 1505 MatGetLocalSize_MPISBAIJ, 1506 MatGetOwnershipRange_MPISBAIJ, 1507 0, 1508 0, 1509 0, 1510 0, 1511 MatDuplicate_MPISBAIJ, 1512 0, 1513 0, 1514 0, 1515 0, 1516 0, 1517 MatGetSubMatrices_MPISBAIJ, 1518 MatIncreaseOverlap_MPISBAIJ, 1519 MatGetValues_MPISBAIJ, 1520 0, 1521 MatPrintHelp_MPISBAIJ, 1522 MatScale_MPISBAIJ, 1523 0, 1524 0, 1525 0, 1526 MatGetBlockSize_MPISBAIJ, 1527 0, 1528 0, 1529 0, 1530 0, 1531 0, 1532 0, 1533 MatSetUnfactored_MPISBAIJ, 1534 0, 1535 MatSetValuesBlocked_MPISBAIJ, 1536 0, 1537 0, 1538 0, 1539 MatGetMaps_Petsc, 1540 0, 1541 0, 1542 0, 1543 0, 1544 0, 1545 0, 1546 MatGetRowMax_MPISBAIJ}; 1547 1548 1549 EXTERN_C_BEGIN 1550 #undef __FUNC__ 1551 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPISBAIJ" 1552 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1553 { 1554 PetscFunctionBegin; 1555 *a = ((Mat_MPISBAIJ *)A->data)->A; 1556 *iscopy = PETSC_FALSE; 1557 PetscFunctionReturn(0); 1558 } 1559 EXTERN_C_END 1560 1561 #undef __FUNC__ 1562 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPISBAIJ" 1563 /*@C 1564 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1565 (block compressed row). For good matrix assembly performance 1566 the user should preallocate the matrix storage by setting the parameters 1567 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1568 performance can be increased by more than a factor of 50. 1569 1570 Collective on MPI_Comm 1571 1572 Input Parameters: 1573 + comm - MPI communicator 1574 . bs - size of blockk 1575 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1576 This value should be the same as the local size used in creating the 1577 y vector for the matrix-vector product y = Ax. 1578 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1579 This value should be the same as the local size used in creating the 1580 x vector for the matrix-vector product y = Ax. 1581 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1582 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1583 . d_nz - number of block nonzeros per block row in diagonal portion of local 1584 submatrix (same for all local rows) 1585 . d_nnz - array containing the number of block nonzeros in the various block rows 1586 of the in diagonal portion of the local (possibly different for each block 1587 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1588 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1589 submatrix (same for all local rows). 1590 - o_nnz - array containing the number of nonzeros in the various block rows of the 1591 off-diagonal portion of the local submatrix (possibly different for 1592 each block row) or PETSC_NULL. 1593 1594 Output Parameter: 1595 . A - the matrix 1596 1597 Options Database Keys: 1598 . -mat_no_unroll - uses code that does not unroll the loops in the 1599 block calculations (much slower) 1600 . -mat_block_size - size of the blocks to use 1601 . -mat_mpi - use the parallel matrix data structures even on one processor 1602 (defaults to using SeqBAIJ format on one processor) 1603 1604 Notes: 1605 The user MUST specify either the local or global matrix dimensions 1606 (possibly both). 1607 1608 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1609 than it must be used on all processors that share the object for that argument. 1610 1611 Storage Information: 1612 For a square global matrix we define each processor's diagonal portion 1613 to be its local rows and the corresponding columns (a square submatrix); 1614 each processor's off-diagonal portion encompasses the remainder of the 1615 local matrix (a rectangular submatrix). 1616 1617 The user can specify preallocated storage for the diagonal part of 1618 the local submatrix with either d_nz or d_nnz (not both). Set 1619 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1620 memory allocation. Likewise, specify preallocated storage for the 1621 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1622 1623 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1624 the figure below we depict these three local rows and all columns (0-11). 1625 1626 .vb 1627 0 1 2 3 4 5 6 7 8 9 10 11 1628 ------------------- 1629 row 3 | o o o d d d o o o o o o 1630 row 4 | o o o d d d o o o o o o 1631 row 5 | o o o d d d o o o o o o 1632 ------------------- 1633 .ve 1634 1635 Thus, any entries in the d locations are stored in the d (diagonal) 1636 submatrix, and any entries in the o locations are stored in the 1637 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1638 stored simply in the MATSEQBAIJ format for compressed row storage. 1639 1640 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1641 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1642 In general, for PDE problems in which most nonzeros are near the diagonal, 1643 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1644 or you will get TERRIBLE performance; see the users' manual chapter on 1645 matrices. 1646 1647 Level: intermediate 1648 1649 .keywords: matrix, block, aij, compressed row, sparse, parallel 1650 1651 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1652 @*/ 1653 1654 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1655 { 1656 Mat B; 1657 Mat_MPISBAIJ *b; 1658 int ierr,i,sum[1],work[1],mbs,Mbs=PETSC_DECIDE,size; 1659 PetscTruth flag1,flag2,flg; 1660 1661 PetscFunctionBegin; 1662 if (M != N || m != n){ /* N and n are not used after this */ 1663 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, set M=N and m=n"); 1664 } 1665 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1666 1667 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1668 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than -2: value %d",d_nz); 1669 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than -2: value %d",o_nz); 1670 if (d_nnz) { 1671 for (i=0; i<m/bs; i++) { 1672 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1673 } 1674 } 1675 if (o_nnz) { 1676 for (i=0; i<m/bs; i++) { 1677 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1678 } 1679 } 1680 1681 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1682 ierr = OptionsHasName(PETSC_NULL,"-mat_mpisbaij",&flag1);CHKERRQ(ierr); 1683 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 1684 if (!flag1 && !flag2 && size == 1) { 1685 if (M == PETSC_DECIDE) M = m; 1686 ierr = MatCreateSeqSBAIJ(comm,bs,M,M,d_nz,d_nnz,A);CHKERRQ(ierr); 1687 PetscFunctionReturn(0); 1688 } 1689 1690 *A = 0; 1691 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPISBAIJ,"Mat",comm,MatDestroy,MatView); 1692 PLogObjectCreate(B); 1693 B->data = (void*)(b = PetscNew(Mat_MPISBAIJ));CHKPTRQ(b); 1694 ierr = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1695 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1696 1697 B->ops->destroy = MatDestroy_MPISBAIJ; 1698 B->ops->view = MatView_MPISBAIJ; 1699 B->mapping = 0; 1700 B->factor = 0; 1701 B->assembled = PETSC_FALSE; 1702 1703 B->insertmode = NOT_SET_VALUES; 1704 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 1705 ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr); 1706 1707 if (m == PETSC_DECIDE && (d_nnz || o_nnz)) { 1708 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1709 } 1710 if (M == PETSC_DECIDE && m == PETSC_DECIDE) { 1711 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"either M or m should be specified"); 1712 } 1713 if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1714 1715 if (M == PETSC_DECIDE) { 1716 work[0] = m; mbs = m/bs; 1717 ierr = MPI_Allreduce(work,sum,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1718 M = sum[0]; Mbs = M/bs; 1719 } else { /* M is specified */ 1720 Mbs = M/bs; 1721 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,"No of global rows must be divisible by blocksize"); 1722 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 1723 m = mbs*bs; 1724 } 1725 1726 if (mbs*bs != m) { 1727 SETERRQ(PETSC_ERR_ARG_SIZ,"No of local rows/cols must be divisible by blocksize"); 1728 } 1729 1730 b->m = m; B->m = m; 1731 b->n = m; B->n = m; 1732 b->N = M; B->N = M; 1733 b->M = M; 1734 B->M = M; 1735 b->bs = bs; 1736 b->bs2 = bs*bs; 1737 b->mbs = mbs; 1738 b->nbs = mbs; 1739 b->Mbs = Mbs; 1740 b->Nbs = Mbs; 1741 1742 /* the information in the maps duplicates the information computed below, eventually 1743 we should remove the duplicate information that is not contained in the maps */ 1744 ierr = MapCreateMPI(B->comm,m,M,&B->rmap);CHKERRQ(ierr); 1745 ierr = MapCreateMPI(B->comm,m,M,&B->cmap);CHKERRQ(ierr); 1746 1747 /* build local table of row and column ownerships */ 1748 b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 1749 PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1750 b->cowners = b->rowners + b->size + 2; 1751 b->rowners_bs = b->cowners + b->size + 2; 1752 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1753 b->rowners[0] = 0; 1754 for (i=2; i<=b->size; i++) { 1755 b->rowners[i] += b->rowners[i-1]; 1756 } 1757 for (i=0; i<=b->size; i++) { 1758 b->rowners_bs[i] = b->rowners[i]*bs; 1759 } 1760 b->rstart = b->rowners[b->rank]; 1761 b->rend = b->rowners[b->rank+1]; 1762 b->rstart_bs = b->rstart * bs; 1763 b->rend_bs = b->rend * bs; 1764 1765 b->cstart = b->rstart; 1766 b->cend = b->rend; 1767 b->cstart_bs = b->cstart * bs; 1768 b->cend_bs = b->cend * bs; 1769 1770 1771 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1772 ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,m,m,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1773 PLogObjectParent(B,b->A); 1774 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1775 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,M,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1776 PLogObjectParent(B,b->B); 1777 1778 /* build cache for off array entries formed */ 1779 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1780 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1781 b->donotstash = PETSC_FALSE; 1782 b->colmap = PETSC_NULL; 1783 b->garray = PETSC_NULL; 1784 b->roworiented = PETSC_TRUE; 1785 1786 #if defined(PEYSC_USE_MAT_SINGLE) 1787 /* stuff for MatSetValues_XXX in single precision */ 1788 b->lensetvalues = 0; 1789 b->setvaluescopy = PETSC_NULL; 1790 #endif 1791 1792 /* stuff used in block assembly */ 1793 b->barray = 0; 1794 1795 /* stuff used for matrix vector multiply */ 1796 b->lvec = 0; 1797 b->Mvctx = 0; 1798 1799 /* stuff for MatGetRow() */ 1800 b->rowindices = 0; 1801 b->rowvalues = 0; 1802 b->getrowactive = PETSC_FALSE; 1803 1804 /* hash table stuff */ 1805 b->ht = 0; 1806 b->hd = 0; 1807 b->ht_size = 0; 1808 b->ht_flag = PETSC_FALSE; 1809 b->ht_fact = 0; 1810 b->ht_total_ct = 0; 1811 b->ht_insert_ct = 0; 1812 1813 *A = B; 1814 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1815 if (flg) { 1816 double fact = 1.39; 1817 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1818 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1819 if (fact <= 1.0) fact = 1.39; 1820 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1821 PLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact); 1822 } 1823 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1824 "MatStoreValues_MPISBAIJ", 1825 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1826 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1827 "MatRetrieveValues_MPISBAIJ", 1828 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1829 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1830 "MatGetDiagonalBlock_MPISBAIJ", 1831 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1832 PetscFunctionReturn(0); 1833 } 1834 1835 1836 #undef __FUNC__ 1837 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPISBAIJ" 1838 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1839 { 1840 Mat mat; 1841 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 1842 int ierr,len=0; 1843 PetscTruth flg; 1844 1845 PetscFunctionBegin; 1846 *newmat = 0; 1847 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPISBAIJ,"Mat",matin->comm,MatDestroy,MatView); 1848 PLogObjectCreate(mat); 1849 mat->data = (void*)(a = PetscNew(Mat_MPISBAIJ));CHKPTRQ(a); 1850 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1851 mat->ops->destroy = MatDestroy_MPISBAIJ; 1852 mat->ops->view = MatView_MPISBAIJ; 1853 mat->factor = matin->factor; 1854 mat->assembled = PETSC_TRUE; 1855 mat->insertmode = NOT_SET_VALUES; 1856 1857 a->m = mat->m = oldmat->m; 1858 a->n = mat->n = oldmat->n; 1859 a->M = mat->M = oldmat->M; 1860 a->N = mat->N = oldmat->N; 1861 1862 a->bs = oldmat->bs; 1863 a->bs2 = oldmat->bs2; 1864 a->mbs = oldmat->mbs; 1865 a->nbs = oldmat->nbs; 1866 a->Mbs = oldmat->Mbs; 1867 a->Nbs = oldmat->Nbs; 1868 1869 a->rstart = oldmat->rstart; 1870 a->rend = oldmat->rend; 1871 a->cstart = oldmat->cstart; 1872 a->cend = oldmat->cend; 1873 a->size = oldmat->size; 1874 a->rank = oldmat->rank; 1875 a->donotstash = oldmat->donotstash; 1876 a->roworiented = oldmat->roworiented; 1877 a->rowindices = 0; 1878 a->rowvalues = 0; 1879 a->getrowactive = PETSC_FALSE; 1880 a->barray = 0; 1881 a->rstart_bs = oldmat->rstart_bs; 1882 a->rend_bs = oldmat->rend_bs; 1883 a->cstart_bs = oldmat->cstart_bs; 1884 a->cend_bs = oldmat->cend_bs; 1885 1886 /* hash table stuff */ 1887 a->ht = 0; 1888 a->hd = 0; 1889 a->ht_size = 0; 1890 a->ht_flag = oldmat->ht_flag; 1891 a->ht_fact = oldmat->ht_fact; 1892 a->ht_total_ct = 0; 1893 a->ht_insert_ct = 0; 1894 1895 1896 a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 1897 PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1898 a->cowners = a->rowners + a->size + 2; 1899 a->rowners_bs = a->cowners + a->size + 2; 1900 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1901 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1902 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 1903 if (oldmat->colmap) { 1904 #if defined (PETSC_USE_CTABLE) 1905 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1906 #else 1907 a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 1908 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 1909 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 1910 #endif 1911 } else a->colmap = 0; 1912 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 1913 a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray); 1914 PLogObjectMemory(mat,len*sizeof(int)); 1915 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 1916 } else a->garray = 0; 1917 1918 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1919 PLogObjectParent(mat,a->lvec); 1920 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1921 1922 PLogObjectParent(mat,a->Mvctx); 1923 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1924 PLogObjectParent(mat,a->A); 1925 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1926 PLogObjectParent(mat,a->B); 1927 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 1928 ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 1929 if (flg) { 1930 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 1931 } 1932 *newmat = mat; 1933 PetscFunctionReturn(0); 1934 } 1935 1936 #include "petscsys.h" 1937 1938 #undef __FUNC__ 1939 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPISBAIJ" 1940 int MatLoad_MPISBAIJ(Viewer viewer,MatType type,Mat *newmat) 1941 { 1942 Mat A; 1943 int i,nz,ierr,j,rstart,rend,fd; 1944 Scalar *vals,*buf; 1945 MPI_Comm comm = ((PetscObject)viewer)->comm; 1946 MPI_Status status; 1947 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 1948 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 1949 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 1950 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 1951 int dcount,kmax,k,nzcount,tmp; 1952 1953 PetscFunctionBegin; 1954 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1955 1956 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1957 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1958 if (!rank) { 1959 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1960 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1961 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1962 if (header[3] < 0) { 1963 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 1964 } 1965 } 1966 1967 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1968 M = header[1]; N = header[2]; 1969 1970 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1971 1972 /* 1973 This code adds extra rows to make sure the number of rows is 1974 divisible by the blocksize 1975 */ 1976 Mbs = M/bs; 1977 extra_rows = bs - M + bs*(Mbs); 1978 if (extra_rows == bs) extra_rows = 0; 1979 else Mbs++; 1980 if (extra_rows &&!rank) { 1981 PLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"); 1982 } 1983 1984 /* determine ownership of all rows */ 1985 mbs = Mbs/size + ((Mbs % size) > rank); 1986 m = mbs * bs; 1987 rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners); 1988 browners = rowners + size + 1; 1989 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1990 rowners[0] = 0; 1991 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 1992 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 1993 rstart = rowners[rank]; 1994 rend = rowners[rank+1]; 1995 1996 /* distribute row lengths to all processors */ 1997 locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens); 1998 if (!rank) { 1999 rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2000 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2001 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2002 sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts); 2003 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2004 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2005 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2006 } else { 2007 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2008 } 2009 2010 if (!rank) { /* procs[0] */ 2011 /* calculate the number of nonzeros on each processor */ 2012 procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz); 2013 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2014 for (i=0; i<size; i++) { 2015 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2016 procsnz[i] += rowlengths[j]; 2017 } 2018 } 2019 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2020 2021 /* determine max buffer needed and allocate it */ 2022 maxnz = 0; 2023 for (i=0; i<size; i++) { 2024 maxnz = PetscMax(maxnz,procsnz[i]); 2025 } 2026 cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols); 2027 2028 /* read in my part of the matrix column indices */ 2029 nz = procsnz[0]; 2030 ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 2031 mycols = ibuf; 2032 if (size == 1) nz -= extra_rows; 2033 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2034 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2035 2036 /* read in every ones (except the last) and ship off */ 2037 for (i=1; i<size-1; i++) { 2038 nz = procsnz[i]; 2039 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2040 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2041 } 2042 /* read in the stuff for the last proc */ 2043 if (size != 1) { 2044 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2045 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2046 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2047 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2048 } 2049 ierr = PetscFree(cols);CHKERRQ(ierr); 2050 } else { /* procs[i], i>0 */ 2051 /* determine buffer space needed for message */ 2052 nz = 0; 2053 for (i=0; i<m; i++) { 2054 nz += locrowlens[i]; 2055 } 2056 ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 2057 mycols = ibuf; 2058 /* receive message of column indices*/ 2059 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2060 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2061 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2062 } 2063 2064 /* loop over local rows, determining number of off diagonal entries */ 2065 dlens = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens); 2066 odlens = dlens + (rend-rstart); 2067 mask = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask); 2068 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2069 masked1 = mask + Mbs; 2070 masked2 = masked1 + Mbs; 2071 rowcount = 0; nzcount = 0; 2072 for (i=0; i<mbs; i++) { 2073 dcount = 0; 2074 odcount = 0; 2075 for (j=0; j<bs; j++) { 2076 kmax = locrowlens[rowcount]; 2077 for (k=0; k<kmax; k++) { 2078 tmp = mycols[nzcount++]/bs; /* block col. index */ 2079 if (!mask[tmp]) { 2080 mask[tmp] = 1; 2081 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2082 else masked1[dcount++] = tmp; /* entry in diag portion */ 2083 } 2084 } 2085 rowcount++; 2086 } 2087 2088 dlens[i] = dcount; /* d_nzz[i] */ 2089 odlens[i] = odcount; /* o_nzz[i] */ 2090 2091 /* zero out the mask elements we set */ 2092 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2093 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2094 } 2095 2096 /* create our matrix */ 2097 ierr = MatCreateMPISBAIJ(comm,bs,m,m,PETSC_DETERMINE,PETSC_DETERMINE,0,dlens,0,odlens,newmat); 2098 CHKERRQ(ierr); 2099 A = *newmat; 2100 MatSetOption(A,MAT_COLUMNS_SORTED); 2101 2102 if (!rank) { 2103 buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf); 2104 /* read in my part of the matrix numerical values */ 2105 nz = procsnz[0]; 2106 vals = buf; 2107 mycols = ibuf; 2108 if (size == 1) nz -= extra_rows; 2109 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2110 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2111 2112 /* insert into matrix */ 2113 jj = rstart*bs; 2114 for (i=0; i<m; i++) { 2115 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2116 mycols += locrowlens[i]; 2117 vals += locrowlens[i]; 2118 jj++; 2119 } 2120 2121 /* read in other processors (except the last one) and ship out */ 2122 for (i=1; i<size-1; i++) { 2123 nz = procsnz[i]; 2124 vals = buf; 2125 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2126 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2127 } 2128 /* the last proc */ 2129 if (size != 1){ 2130 nz = procsnz[i] - extra_rows; 2131 vals = buf; 2132 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2133 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2134 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2135 } 2136 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2137 2138 } else { 2139 /* receive numeric values */ 2140 buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf); 2141 2142 /* receive message of values*/ 2143 vals = buf; 2144 mycols = ibuf; 2145 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2146 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2147 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2148 2149 /* insert into matrix */ 2150 jj = rstart*bs; 2151 for (i=0; i<m; i++) { 2152 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2153 mycols += locrowlens[i]; 2154 vals += locrowlens[i]; 2155 jj++; 2156 } 2157 } 2158 2159 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2160 ierr = PetscFree(buf);CHKERRQ(ierr); 2161 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2162 ierr = PetscFree(rowners);CHKERRQ(ierr); 2163 ierr = PetscFree(dlens);CHKERRQ(ierr); 2164 ierr = PetscFree(mask);CHKERRQ(ierr); 2165 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2166 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2167 PetscFunctionReturn(0); 2168 } 2169 2170 #undef __FUNC__ 2171 #define __FUNC__ /*<a name=""></a>*/"MatMPISBAIJSetHashTableFactor" 2172 /*@ 2173 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2174 2175 Input Parameters: 2176 . mat - the matrix 2177 . fact - factor 2178 2179 Collective on Mat 2180 2181 Level: advanced 2182 2183 Notes: 2184 This can also be set by the command line option: -mat_use_hash_table fact 2185 2186 .keywords: matrix, hashtable, factor, HT 2187 2188 .seealso: MatSetOption() 2189 @*/ 2190 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2191 { 2192 PetscFunctionBegin; 2193 SETERRQ(1,"Function not yet written for SBAIJ format"); 2194 /* PetscFunctionReturn(0); */ 2195 } 2196 2197 #undef __FUNC__ 2198 #define __FUNC__ /*<a name=""></a>*/"MatGetRowMax_MPISBAIJ" 2199 int MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2200 { 2201 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2202 int ierr; 2203 2204 PetscFunctionBegin; 2205 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 2206 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2207 /* printf("GetRowMax is called\n"); */ 2208 PetscFunctionReturn(0); 2209 } 2210