1 /*$Id: mpisbaij.c,v 1.5 2000/07/17 18:41:49 hzhang Exp hzhang $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "mpisbaij.h" 6 #include "src/mat/impls/sbaij/seq/sbaij.h" 7 8 extern int MatSetUpMultiply_MPISBAIJ(Mat); 9 extern int DisAssemble_MPISBAIJ(Mat); 10 extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS *,int); 11 extern int MatGetSubMatrices_MPISBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 12 extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *); 13 extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 14 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 15 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**); 16 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**); 17 extern int MatPrintHelp_SeqSBAIJ(Mat); 18 extern int MatZeroRows_SeqSBAIJ(Mat,IS,Scalar*); 19 20 /* UGLY, ugly, ugly 21 When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 22 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 23 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 24 converts the entries into single precision and then calls ..._MatScalar() to put them 25 into the single precision data structures. 26 */ 27 #if defined(PETSC_USE_MAT_SINGLE) 28 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 29 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 30 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 31 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 32 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 33 #else 34 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 35 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 36 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 37 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 38 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 39 #endif 40 41 EXTERN_C_BEGIN 42 #undef __FUNC__ 43 #define __FUNC__ /*<a name="MatStoreValues_MPIBAIJ"></a>*/"MatStoreValues_MPIBAIJ" 44 int MatStoreValues_MPISBAIJ(Mat mat) 45 { 46 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 47 int ierr; 48 49 PetscFunctionBegin; 50 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 51 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 52 PetscFunctionReturn(0); 53 } 54 EXTERN_C_END 55 56 EXTERN_C_BEGIN 57 #undef __FUNC__ 58 #define __FUNC__ /*<a name="MatRetrieveValues_MPIBAIJ"></a>*/"MatRetrieveValues_MPIBAIJ" 59 int MatRetrieveValues_MPISBAIJ(Mat mat) 60 { 61 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 62 int ierr; 63 64 PetscFunctionBegin; 65 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 66 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 67 PetscFunctionReturn(0); 68 } 69 EXTERN_C_END 70 71 /* 72 Local utility routine that creates a mapping from the global column 73 number to the local number in the off-diagonal part of the local 74 storage of the matrix. This is done in a non scable way since the 75 length of colmap equals the global matrix length. 76 */ 77 #undef __FUNC__ 78 #define __FUNC__ /*<a name="CreateColmap_MPISBAIJ_Private"></a>*/"CreateColmap_MPISBAIJ_Private" 79 static int CreateColmap_MPISBAIJ_Private(Mat mat) 80 { 81 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 82 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 83 int nbs = B->nbs,i,bs=B->bs,ierr; 84 85 PetscFunctionBegin; 86 #if defined (PETSC_USE_CTABLE) 87 ierr = PetscTableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr); 88 for (i=0; i<nbs; i++){ 89 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 90 } 91 #else 92 baij->colmap = (int*)PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 93 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 94 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 95 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 96 #endif 97 PetscFunctionReturn(0); 98 } 99 100 #define CHUNKSIZE 10 101 102 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 103 { \ 104 \ 105 brow = row/bs; \ 106 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 107 rmax = aimax[brow]; nrow = ailen[brow]; \ 108 bcol = col/bs; \ 109 ridx = row % bs; cidx = col % bs; \ 110 low = 0; high = nrow; \ 111 while (high-low > 3) { \ 112 t = (low+high)/2; \ 113 if (rp[t] > bcol) high = t; \ 114 else low = t; \ 115 } \ 116 for (_i=low; _i<high; _i++) { \ 117 if (rp[_i] > bcol) break; \ 118 if (rp[_i] == bcol) { \ 119 bap = ap + bs2*_i + bs*cidx + ridx; \ 120 if (addv == ADD_VALUES) *bap += value; \ 121 else *bap = value; \ 122 goto a_noinsert; \ 123 } \ 124 } \ 125 if (a->nonew == 1) goto a_noinsert; \ 126 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 127 if (nrow >= rmax) { \ 128 /* there is no extra room in row, therefore enlarge */ \ 129 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 130 MatScalar *new_a; \ 131 \ 132 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 133 \ 134 /* malloc new storage space */ \ 135 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 136 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 137 new_j = (int*)(new_a + bs2*new_nz); \ 138 new_i = new_j + new_nz; \ 139 \ 140 /* copy over old data into new slots */ \ 141 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 142 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 143 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 144 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 145 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 146 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 147 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 148 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 149 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 150 /* free up old matrix storage */ \ 151 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 152 if (!a->singlemalloc) { \ 153 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 154 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 155 } \ 156 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 157 a->singlemalloc = PETSC_TRUE; \ 158 \ 159 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 160 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 161 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 162 a->s_maxnz += bs2*CHUNKSIZE; \ 163 a->reallocs++; \ 164 a->s_nz++; \ 165 } \ 166 N = nrow++ - 1; \ 167 /* shift up all the later entries in this row */ \ 168 for (ii=N; ii>=_i; ii--) { \ 169 rp[ii+1] = rp[ii]; \ 170 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 171 } \ 172 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 173 rp[_i] = bcol; \ 174 ap[bs2*_i + bs*cidx + ridx] = value; \ 175 a_noinsert:; \ 176 ailen[brow] = nrow; \ 177 } 178 #ifndef MatSetValues_SeqBAIJ_B_Private 179 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 180 { \ 181 brow = row/bs; \ 182 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 183 rmax = bimax[brow]; nrow = bilen[brow]; \ 184 bcol = col/bs; \ 185 ridx = row % bs; cidx = col % bs; \ 186 low = 0; high = nrow; \ 187 while (high-low > 3) { \ 188 t = (low+high)/2; \ 189 if (rp[t] > bcol) high = t; \ 190 else low = t; \ 191 } \ 192 for (_i=low; _i<high; _i++) { \ 193 if (rp[_i] > bcol) break; \ 194 if (rp[_i] == bcol) { \ 195 bap = ap + bs2*_i + bs*cidx + ridx; \ 196 if (addv == ADD_VALUES) *bap += value; \ 197 else *bap = value; \ 198 goto b_noinsert; \ 199 } \ 200 } \ 201 if (b->nonew == 1) goto b_noinsert; \ 202 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 203 if (nrow >= rmax) { \ 204 /* there is no extra room in row, therefore enlarge */ \ 205 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 206 MatScalar *new_a; \ 207 \ 208 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 209 \ 210 /* malloc new storage space */ \ 211 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 212 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 213 new_j = (int*)(new_a + bs2*new_nz); \ 214 new_i = new_j + new_nz; \ 215 \ 216 /* copy over old data into new slots */ \ 217 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 218 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 219 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 220 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 221 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 222 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 223 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 224 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 225 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 226 /* free up old matrix storage */ \ 227 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 228 if (!b->singlemalloc) { \ 229 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 230 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 231 } \ 232 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 233 b->singlemalloc = PETSC_TRUE; \ 234 \ 235 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 236 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 237 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 238 b->maxnz += bs2*CHUNKSIZE; \ 239 b->reallocs++; \ 240 b->nz++; \ 241 } \ 242 N = nrow++ - 1; \ 243 /* shift up all the later entries in this row */ \ 244 for (ii=N; ii>=_i; ii--) { \ 245 rp[ii+1] = rp[ii]; \ 246 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 247 } \ 248 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 249 rp[_i] = bcol; \ 250 ap[bs2*_i + bs*cidx + ridx] = value; \ 251 b_noinsert:; \ 252 bilen[brow] = nrow; \ 253 } 254 #endif 255 256 #if defined(PETSC_USE_MAT_SINGLE) 257 #undef __FUNC__ 258 #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ"></a>*/"MatSetValues_MPISBAIJ" 259 int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 260 { 261 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 262 int ierr,i,N = m*n; 263 MatScalar *vsingle; 264 265 PetscFunctionBegin; 266 if (N > b->setvalueslen) { 267 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 268 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 269 b->setvalueslen = N; 270 } 271 vsingle = b->setvaluescopy; 272 273 for (i=0; i<N; i++) { 274 vsingle[i] = v[i]; 275 } 276 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 277 PetscFunctionReturn(0); 278 } 279 280 #undef __FUNC__ 281 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ"></a>*/"MatSetValuesBlocked_MPISBAIJ" 282 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 283 { 284 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 285 int ierr,i,N = m*n*b->bs2; 286 MatScalar *vsingle; 287 288 PetscFunctionBegin; 289 if (N > b->setvalueslen) { 290 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 291 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 292 b->setvalueslen = N; 293 } 294 vsingle = b->setvaluescopy; 295 for (i=0; i<N; i++) { 296 vsingle[i] = v[i]; 297 } 298 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 299 PetscFunctionReturn(0); 300 } 301 302 #undef __FUNC__ 303 #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ_HT"></a>*/"MatSetValues_MPISBAIJ_HT" 304 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 305 { 306 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 307 int ierr,i,N = m*n; 308 MatScalar *vsingle; 309 310 PetscFunctionBegin; 311 if (N > b->setvalueslen) { 312 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 313 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 314 b->setvalueslen = N; 315 } 316 vsingle = b->setvaluescopy; 317 for (i=0; i<N; i++) { 318 vsingle[i] = v[i]; 319 } 320 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 321 PetscFunctionReturn(0); 322 } 323 324 #undef __FUNC__ 325 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ_HT"></a>*/"MatSetValuesBlocked_MPISBAIJ_HT" 326 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 327 { 328 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 329 int ierr,i,N = m*n*b->bs2; 330 MatScalar *vsingle; 331 332 PetscFunctionBegin; 333 if (N > b->setvalueslen) { 334 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 335 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 336 b->setvalueslen = N; 337 } 338 vsingle = b->setvaluescopy; 339 for (i=0; i<N; i++) { 340 vsingle[i] = v[i]; 341 } 342 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 343 PetscFunctionReturn(0); 344 } 345 #endif 346 347 /* Only add/insert a(i,j) with i<=j (blocks). 348 Any a(i,j) with i>j input by user is ingored. 349 */ 350 #undef __FUNC__ 351 #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ" 352 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 353 { 354 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 355 MatScalar value; 356 int ierr,i,j,row,col; 357 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 358 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 359 int cend_orig=baij->cend_bs,bs=baij->bs; 360 361 /* Some Variables required in the macro */ 362 Mat A = baij->A; 363 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 364 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 365 MatScalar *aa=a->a; 366 367 Mat B = baij->B; 368 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 369 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 370 MatScalar *ba=b->a; 371 372 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 373 int low,high,t,ridx,cidx,bs2=a->bs2; 374 MatScalar *ap,*bap; 375 376 /* for stash */ 377 int n_loc, *in_loc=0; 378 MatScalar *v_loc=0; 379 380 PetscFunctionBegin; 381 382 if(!baij->donotstash){ 383 in_loc = (int*)PetscMalloc(n*sizeof(int)); CHKPTRQ(in_loc); 384 v_loc = (MatScalar*)PetscMalloc(n*sizeof(MatScalar)); CHKPTRQ(v_loc); 385 } 386 387 for (i=0; i<m; i++) { 388 if (im[i] < 0) continue; 389 #if defined(PETSC_USE_BOPT_g) 390 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 391 #endif 392 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 393 row = im[i] - rstart_orig; /* local row index */ 394 for (j=0; j<n; j++) { 395 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 396 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 397 col = in[j] - cstart_orig; /* local col index */ 398 brow = row/bs; bcol = col/bs; 399 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 400 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 401 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 402 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 403 } else if (in[j] < 0) continue; 404 #if defined(PETSC_USE_BOPT_g) 405 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 406 #endif 407 else { /* off-diag entry (B) */ 408 if (mat->was_assembled) { 409 if (!baij->colmap) { 410 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 411 } 412 #if defined (PETSC_USE_CTABLE) 413 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 414 col = col - 1 + in[j]%bs; 415 #else 416 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 417 #endif 418 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 419 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 420 col = in[j]; 421 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 422 B = baij->B; 423 b = (Mat_SeqBAIJ*)(B)->data; 424 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 425 ba=b->a; 426 } 427 } else col = in[j]; 428 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 429 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 430 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 431 } 432 } 433 } else { /* off processor entry */ 434 if (!baij->donotstash) { 435 n_loc = 0; 436 for (j=0; j<n; j++){ 437 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 438 in_loc[n_loc] = in[j]; 439 if (roworiented) { 440 v_loc[n_loc] = v[i*n+j]; 441 } else { 442 v_loc[n_loc] = v[j*m+i]; 443 } 444 n_loc++; 445 } 446 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 447 } 448 } 449 } 450 451 if(!baij->donotstash){ 452 ierr = PetscFree(in_loc);CHKERRQ(ierr); 453 ierr = PetscFree(v_loc);CHKERRQ(ierr); 454 } 455 PetscFunctionReturn(0); 456 } 457 458 #undef __FUNC__ 459 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ"></a>*/"MatSetValuesBlocked_MPISBAIJ" 460 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 461 { 462 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 463 MatScalar *value,*barray=baij->barray; 464 int ierr,i,j,ii,jj,row,col; 465 int roworiented = baij->roworiented,rstart=baij->rstart ; 466 int rend=baij->rend,cstart=baij->cstart,stepval; 467 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 468 469 PetscFunctionBegin; 470 if(!barray) { 471 baij->barray = barray = (MatScalar*)PetscMalloc(bs2*sizeof(MatScalar));CHKPTRQ(barray); 472 } 473 474 if (roworiented) { 475 stepval = (n-1)*bs; 476 } else { 477 stepval = (m-1)*bs; 478 } 479 for (i=0; i<m; i++) { 480 if (im[i] < 0) continue; 481 #if defined(PETSC_USE_BOPT_g) 482 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 483 #endif 484 if (im[i] >= rstart && im[i] < rend) { 485 row = im[i] - rstart; 486 for (j=0; j<n; j++) { 487 /* If NumCol = 1 then a copy is not required */ 488 if ((roworiented) && (n == 1)) { 489 barray = v + i*bs2; 490 } else if((!roworiented) && (m == 1)) { 491 barray = v + j*bs2; 492 } else { /* Here a copy is required */ 493 if (roworiented) { 494 value = v + i*(stepval+bs)*bs + j*bs; 495 } else { 496 value = v + j*(stepval+bs)*bs + i*bs; 497 } 498 for (ii=0; ii<bs; ii++,value+=stepval) { 499 for (jj=0; jj<bs; jj++) { 500 *barray++ = *value++; 501 } 502 } 503 barray -=bs2; 504 } 505 506 if (in[j] >= cstart && in[j] < cend){ 507 col = in[j] - cstart; 508 ierr = MatSetValuesBlocked_SeqSBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 509 } 510 else if (in[j] < 0) continue; 511 #if defined(PETSC_USE_BOPT_g) 512 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 513 #endif 514 else { 515 if (mat->was_assembled) { 516 if (!baij->colmap) { 517 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 518 } 519 520 #if defined(PETSC_USE_BOPT_g) 521 #if defined (PETSC_USE_CTABLE) 522 { int data; 523 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 524 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap"); 525 } 526 #else 527 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap"); 528 #endif 529 #endif 530 #if defined (PETSC_USE_CTABLE) 531 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 532 col = (col - 1)/bs; 533 #else 534 col = (baij->colmap[in[j]] - 1)/bs; 535 #endif 536 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 537 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 538 col = in[j]; 539 } 540 } 541 else col = in[j]; 542 ierr = MatSetValuesBlocked_SeqSBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 543 } 544 } 545 } else { 546 if (!baij->donotstash) { 547 if (roworiented) { 548 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 549 } else { 550 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 551 } 552 } 553 } 554 } 555 PetscFunctionReturn(0); 556 } 557 558 #define HASH_KEY 0.6180339887 559 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 560 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 561 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 562 #undef __FUNC__ 563 #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ_HT_MatScalar"></a>*/"MatSetValues_MPISBAIJ_HT_MatScalar" 564 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 565 { 566 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 567 int ierr,i,j,row,col; 568 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 569 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 570 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 571 PetscReal tmp; 572 MatScalar **HD = baij->hd,value; 573 #if defined(PETSC_USE_BOPT_g) 574 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 575 #endif 576 577 PetscFunctionBegin; 578 579 for (i=0; i<m; i++) { 580 #if defined(PETSC_USE_BOPT_g) 581 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 582 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 583 #endif 584 row = im[i]; 585 if (row >= rstart_orig && row < rend_orig) { 586 for (j=0; j<n; j++) { 587 col = in[j]; 588 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 589 /* Look up into the Hash Table */ 590 key = (row/bs)*Nbs+(col/bs)+1; 591 h1 = HASH(size,key,tmp); 592 593 594 idx = h1; 595 #if defined(PETSC_USE_BOPT_g) 596 insert_ct++; 597 total_ct++; 598 if (HT[idx] != key) { 599 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 600 if (idx == size) { 601 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 602 if (idx == h1) { 603 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 604 } 605 } 606 } 607 #else 608 if (HT[idx] != key) { 609 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 610 if (idx == size) { 611 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 612 if (idx == h1) { 613 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 614 } 615 } 616 } 617 #endif 618 /* A HASH table entry is found, so insert the values at the correct address */ 619 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 620 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 621 } 622 } else { 623 if (!baij->donotstash) { 624 if (roworiented) { 625 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 626 } else { 627 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 628 } 629 } 630 } 631 } 632 #if defined(PETSC_USE_BOPT_g) 633 baij->ht_total_ct = total_ct; 634 baij->ht_insert_ct = insert_ct; 635 #endif 636 PetscFunctionReturn(0); 637 } 638 639 #undef __FUNC__ 640 #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPISBAIJ_HT_MatScalar" 641 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 642 { 643 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 644 int ierr,i,j,ii,jj,row,col; 645 int roworiented = baij->roworiented,rstart=baij->rstart ; 646 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 647 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 648 PetscReal tmp; 649 MatScalar **HD = baij->hd,*baij_a; 650 MatScalar *v_t,*value; 651 #if defined(PETSC_USE_BOPT_g) 652 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 653 #endif 654 655 PetscFunctionBegin; 656 657 if (roworiented) { 658 stepval = (n-1)*bs; 659 } else { 660 stepval = (m-1)*bs; 661 } 662 for (i=0; i<m; i++) { 663 #if defined(PETSC_USE_BOPT_g) 664 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 665 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 666 #endif 667 row = im[i]; 668 v_t = v + i*bs2; 669 if (row >= rstart && row < rend) { 670 for (j=0; j<n; j++) { 671 col = in[j]; 672 673 /* Look up into the Hash Table */ 674 key = row*Nbs+col+1; 675 h1 = HASH(size,key,tmp); 676 677 idx = h1; 678 #if defined(PETSC_USE_BOPT_g) 679 total_ct++; 680 insert_ct++; 681 if (HT[idx] != key) { 682 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 683 if (idx == size) { 684 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 685 if (idx == h1) { 686 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 687 } 688 } 689 } 690 #else 691 if (HT[idx] != key) { 692 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 693 if (idx == size) { 694 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 695 if (idx == h1) { 696 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 697 } 698 } 699 } 700 #endif 701 baij_a = HD[idx]; 702 if (roworiented) { 703 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 704 /* value = v + (i*(stepval+bs)+j)*bs; */ 705 value = v_t; 706 v_t += bs; 707 if (addv == ADD_VALUES) { 708 for (ii=0; ii<bs; ii++,value+=stepval) { 709 for (jj=ii; jj<bs2; jj+=bs) { 710 baij_a[jj] += *value++; 711 } 712 } 713 } else { 714 for (ii=0; ii<bs; ii++,value+=stepval) { 715 for (jj=ii; jj<bs2; jj+=bs) { 716 baij_a[jj] = *value++; 717 } 718 } 719 } 720 } else { 721 value = v + j*(stepval+bs)*bs + i*bs; 722 if (addv == ADD_VALUES) { 723 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 724 for (jj=0; jj<bs; jj++) { 725 baij_a[jj] += *value++; 726 } 727 } 728 } else { 729 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 730 for (jj=0; jj<bs; jj++) { 731 baij_a[jj] = *value++; 732 } 733 } 734 } 735 } 736 } 737 } else { 738 if (!baij->donotstash) { 739 if (roworiented) { 740 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 741 } else { 742 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 743 } 744 } 745 } 746 } 747 #if defined(PETSC_USE_BOPT_g) 748 baij->ht_total_ct = total_ct; 749 baij->ht_insert_ct = insert_ct; 750 #endif 751 PetscFunctionReturn(0); 752 } 753 754 #undef __FUNC__ 755 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPISBAIJ" 756 int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 757 { 758 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 759 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 760 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 761 762 PetscFunctionBegin; 763 for (i=0; i<m; i++) { 764 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 765 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 766 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 767 row = idxm[i] - bsrstart; 768 for (j=0; j<n; j++) { 769 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 770 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 771 if (idxn[j] >= bscstart && idxn[j] < bscend){ 772 col = idxn[j] - bscstart; 773 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 774 } else { 775 if (!baij->colmap) { 776 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 777 } 778 #if defined (PETSC_USE_CTABLE) 779 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 780 data --; 781 #else 782 data = baij->colmap[idxn[j]/bs]-1; 783 #endif 784 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 785 else { 786 col = data + idxn[j]%bs; 787 ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 788 } 789 } 790 } 791 } else { 792 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 793 } 794 } 795 PetscFunctionReturn(0); 796 } 797 798 #undef __FUNC__ 799 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPISBAIJ" 800 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 801 { 802 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 803 /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */ 804 /* Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ*)baij->B->data; */ 805 int ierr; 806 PetscReal sum[2],*lnorm2; 807 808 PetscFunctionBegin; 809 if (baij->size == 1) { 810 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 811 } else { 812 if (type == NORM_FROBENIUS) { 813 lnorm2 = (double*)PetscMalloc(2*sizeof(double));CHKPTRQ(lnorm2); 814 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 815 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 816 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 817 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 818 /* 819 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 820 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]); 821 */ 822 ierr = MPI_Allreduce(lnorm2,&sum,2,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 823 /* 824 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]); 825 PetscSynchronizedFlush(PETSC_COMM_WORLD); */ 826 827 *norm = sqrt(sum[0] + 2*sum[1]); 828 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 829 } else { 830 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 831 } 832 } 833 PetscFunctionReturn(0); 834 } 835 836 /* 837 Creates the hash table, and sets the table 838 This table is created only once. 839 If new entried need to be added to the matrix 840 then the hash table has to be destroyed and 841 recreated. 842 */ 843 #undef __FUNC__ 844 #define __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPISBAIJ_Private" 845 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor) 846 { 847 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 848 Mat A = baij->A,B=baij->B; 849 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 850 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 851 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 852 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 853 int *HT,key; 854 MatScalar **HD; 855 PetscReal tmp; 856 #if defined(PETSC_USE_BOPT_g) 857 int ct=0,max=0; 858 #endif 859 860 PetscFunctionBegin; 861 baij->ht_size=(int)(factor*nz); 862 size = baij->ht_size; 863 864 if (baij->ht) { 865 PetscFunctionReturn(0); 866 } 867 868 /* Allocate Memory for Hash Table */ 869 baij->hd = (MatScalar**)PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1);CHKPTRQ(baij->hd); 870 baij->ht = (int*)(baij->hd + size); 871 HD = baij->hd; 872 HT = baij->ht; 873 874 875 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr); 876 877 878 /* Loop Over A */ 879 for (i=0; i<a->mbs; i++) { 880 for (j=ai[i]; j<ai[i+1]; j++) { 881 row = i+rstart; 882 col = aj[j]+cstart; 883 884 key = row*Nbs + col + 1; 885 h1 = HASH(size,key,tmp); 886 for (k=0; k<size; k++){ 887 if (HT[(h1+k)%size] == 0.0) { 888 HT[(h1+k)%size] = key; 889 HD[(h1+k)%size] = a->a + j*bs2; 890 break; 891 #if defined(PETSC_USE_BOPT_g) 892 } else { 893 ct++; 894 #endif 895 } 896 } 897 #if defined(PETSC_USE_BOPT_g) 898 if (k> max) max = k; 899 #endif 900 } 901 } 902 /* Loop Over B */ 903 for (i=0; i<b->mbs; i++) { 904 for (j=bi[i]; j<bi[i+1]; j++) { 905 row = i+rstart; 906 col = garray[bj[j]]; 907 key = row*Nbs + col + 1; 908 h1 = HASH(size,key,tmp); 909 for (k=0; k<size; k++){ 910 if (HT[(h1+k)%size] == 0.0) { 911 HT[(h1+k)%size] = key; 912 HD[(h1+k)%size] = b->a + j*bs2; 913 break; 914 #if defined(PETSC_USE_BOPT_g) 915 } else { 916 ct++; 917 #endif 918 } 919 } 920 #if defined(PETSC_USE_BOPT_g) 921 if (k> max) max = k; 922 #endif 923 } 924 } 925 926 /* Print Summary */ 927 #if defined(PETSC_USE_BOPT_g) 928 for (i=0,j=0; i<size; i++) { 929 if (HT[i]) {j++;} 930 } 931 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max); 932 #endif 933 PetscFunctionReturn(0); 934 } 935 936 #undef __FUNC__ 937 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPISBAIJ" 938 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 939 { 940 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 941 int ierr,nstash,reallocs; 942 InsertMode addv; 943 944 PetscFunctionBegin; 945 if (baij->donotstash) { 946 PetscFunctionReturn(0); 947 } 948 949 /* make sure all processors are either in INSERTMODE or ADDMODE */ 950 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 951 if (addv == (ADD_VALUES|INSERT_VALUES)) { 952 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 953 } 954 mat->insertmode = addv; /* in case this processor had no cache */ 955 956 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 957 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 958 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 959 PLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 960 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 961 PLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 962 PetscFunctionReturn(0); 963 } 964 965 #undef __FUNC__ 966 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPISBAIJ" 967 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 968 { 969 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 970 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 971 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 972 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 973 int *row,*col,other_disassembled; 974 PetscTruth r1,r2,r3; 975 MatScalar *val; 976 InsertMode addv = mat->insertmode; 977 int rank; 978 979 PetscFunctionBegin; 980 /* remove 2 line below later */ 981 ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); 982 983 if (!baij->donotstash) { 984 while (1) { 985 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 986 /* 987 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg); 988 PetscSynchronizedFlush(PETSC_COMM_WORLD); 989 */ 990 if (!flg) break; 991 992 for (i=0; i<n;) { 993 /* Now identify the consecutive vals belonging to the same row */ 994 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 995 if (j < n) ncols = j-i; 996 else ncols = n-i; 997 /* Now assemble all these values with a single function call */ 998 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 999 i = j; 1000 } 1001 } 1002 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1003 /* Now process the block-stash. Since the values are stashed column-oriented, 1004 set the roworiented flag to column oriented, and after MatSetValues() 1005 restore the original flags */ 1006 r1 = baij->roworiented; 1007 r2 = a->roworiented; 1008 r3 = b->roworiented; 1009 baij->roworiented = PETSC_FALSE; 1010 a->roworiented = PETSC_FALSE; 1011 b->roworiented = PETSC_FALSE; 1012 while (1) { 1013 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1014 if (!flg) break; 1015 1016 for (i=0; i<n;) { 1017 /* Now identify the consecutive vals belonging to the same row */ 1018 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1019 if (j < n) ncols = j-i; 1020 else ncols = n-i; 1021 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1022 i = j; 1023 } 1024 } 1025 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1026 baij->roworiented = r1; 1027 a->roworiented = r2; 1028 b->roworiented = r3; 1029 } 1030 1031 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1032 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1033 1034 /* determine if any processor has disassembled, if so we must 1035 also disassemble ourselfs, in order that we may reassemble. */ 1036 /* 1037 if nonzero structure of submatrix B cannot change then we know that 1038 no processor disassembled thus we can skip this stuff 1039 */ 1040 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1041 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1042 if (mat->was_assembled && !other_disassembled) { 1043 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 1044 } 1045 } 1046 1047 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1048 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); 1049 } 1050 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1051 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1052 1053 #if defined(PETSC_USE_BOPT_g) 1054 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1055 PLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct); 1056 baij->ht_total_ct = 0; 1057 baij->ht_insert_ct = 0; 1058 } 1059 #endif 1060 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1061 ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1062 mat->ops->setvalues = MatSetValues_MPISBAIJ_HT; 1063 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT; 1064 } 1065 1066 if (baij->rowvalues) { 1067 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1068 baij->rowvalues = 0; 1069 } 1070 PetscFunctionReturn(0); 1071 } 1072 1073 #undef __FUNC__ 1074 #define __FUNC__ /*<a name=""></a>*/"MatView_MPISBAIJ_ASCIIorDraworSocket" 1075 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 1076 { 1077 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1078 int ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank; 1079 PetscTruth isascii,isdraw; 1080 Viewer sviewer; 1081 1082 PetscFunctionBegin; 1083 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 1084 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 1085 if (isascii) { 1086 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1087 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 1088 MatInfo info; 1089 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1090 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1091 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 1092 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 1093 baij->bs,(int)info.memory);CHKERRQ(ierr); 1094 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1095 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1096 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1097 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1098 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 1099 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 1102 ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 1103 PetscFunctionReturn(0); 1104 } 1105 } 1106 1107 if (isdraw) { 1108 Draw draw; 1109 PetscTruth isnull; 1110 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1111 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1112 } 1113 1114 if (size == 1) { 1115 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1116 } else { 1117 /* assemble the entire matrix onto first processor. */ 1118 Mat A; 1119 Mat_SeqSBAIJ *Aloc; 1120 Mat_SeqBAIJ *Bloc; 1121 int M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1122 MatScalar *a; 1123 1124 if (!rank) { 1125 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1126 } else { 1127 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1128 } 1129 PLogObjectParent(mat,A); 1130 1131 /* copy over the A part */ 1132 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 1133 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1134 rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1135 1136 for (i=0; i<mbs; i++) { 1137 rvals[0] = bs*(baij->rstart + i); 1138 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1139 for (j=ai[i]; j<ai[i+1]; j++) { 1140 col = (baij->cstart+aj[j])*bs; 1141 for (k=0; k<bs; k++) { 1142 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1143 col++; a += bs; 1144 } 1145 } 1146 } 1147 /* copy over the B part */ 1148 Bloc = (Mat_SeqBAIJ*)baij->B->data; 1149 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 1150 for (i=0; i<mbs; i++) { 1151 rvals[0] = bs*(baij->rstart + i); 1152 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1153 for (j=ai[i]; j<ai[i+1]; j++) { 1154 col = baij->garray[aj[j]]*bs; 1155 for (k=0; k<bs; k++) { 1156 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1157 col++; a += bs; 1158 } 1159 } 1160 } 1161 ierr = PetscFree(rvals);CHKERRQ(ierr); 1162 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1163 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1164 /* 1165 Everyone has to call to draw the matrix since the graphics waits are 1166 synchronized across all processors that share the Draw object 1167 */ 1168 ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1169 if (!rank) { 1170 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1171 } 1172 ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1173 ierr = MatDestroy(A);CHKERRQ(ierr); 1174 } 1175 PetscFunctionReturn(0); 1176 } 1177 1178 #undef __FUNC__ 1179 #define __FUNC__ /*<a name=""></a>*/"MatView_MPISBAIJ" 1180 int MatView_MPISBAIJ(Mat mat,Viewer viewer) 1181 { 1182 int ierr; 1183 PetscTruth isascii,isdraw,issocket,isbinary; 1184 1185 PetscFunctionBegin; 1186 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 1187 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 1188 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 1189 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 1190 if (isascii || isdraw || issocket || isbinary) { 1191 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1192 } else { 1193 SETERRQ1(1,1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 1194 } 1195 PetscFunctionReturn(0); 1196 } 1197 1198 #undef __FUNC__ 1199 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPISBAIJ" 1200 int MatDestroy_MPISBAIJ(Mat mat) 1201 { 1202 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1203 int ierr; 1204 1205 PetscFunctionBegin; 1206 1207 if (mat->mapping) { 1208 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 1209 } 1210 if (mat->bmapping) { 1211 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 1212 } 1213 if (mat->rmap) { 1214 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 1215 } 1216 if (mat->cmap) { 1217 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 1218 } 1219 #if defined(PETSC_USE_LOG) 1220 PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N); 1221 #endif 1222 1223 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1224 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1225 1226 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1227 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1228 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1229 #if defined (PETSC_USE_CTABLE) 1230 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1231 #else 1232 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1233 #endif 1234 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1235 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1236 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1237 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1238 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1239 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1240 #if defined(PETSC_USE_MAT_SINGLE) 1241 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 1242 #endif 1243 ierr = PetscFree(baij);CHKERRQ(ierr); 1244 PLogObjectDestroy(mat); 1245 PetscHeaderDestroy(mat); 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNC__ 1250 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPISBAIJ" 1251 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1252 { 1253 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1254 int ierr,nt; 1255 1256 PetscFunctionBegin; 1257 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1258 if (nt != a->n) { 1259 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1260 } 1261 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1262 if (nt != a->m) { 1263 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1264 } 1265 1266 /* do diagonal part */ 1267 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1268 1269 /* do supperdiagonal part */ 1270 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1271 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1272 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1273 1274 /* do subdiagonal part */ 1275 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1276 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1277 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1278 #ifdef old 1279 /* do subdiagonal part */ 1280 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1281 /* send it on its way */ 1282 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1283 /* do local part */ 1284 ierr = (*a->B->ops->mult)(a->B,xx,yy);CHKERRQ(ierr); 1285 ierr = (*a->A->ops->multadd)(a->A,xx,yy,yy);CHKERRQ(ierr); 1286 /* receive remote parts */ 1287 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1288 #endif 1289 PetscFunctionReturn(0); 1290 } 1291 1292 #undef __FUNC__ 1293 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPISBAIJ" 1294 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1295 { 1296 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1297 int ierr; 1298 1299 PetscFunctionBegin; 1300 /* do subdiagonal part */ 1301 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1302 /* send it on its way */ 1303 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1304 /* do local part */ 1305 ierr = (*a->B->ops->multadd)(a->B,xx,yy,zz);CHKERRQ(ierr); 1306 ierr = (*a->A->ops->multadd)(a->A,xx,zz,zz);CHKERRQ(ierr); 1307 /* receive remote parts */ 1308 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1309 1310 PetscFunctionReturn(0); 1311 } 1312 1313 #undef __FUNC__ 1314 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPISBAIJ" 1315 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 1316 { 1317 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1318 int ierr; 1319 1320 PetscFunctionBegin; 1321 /* do nondiagonal part */ 1322 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1323 /* send it on its way */ 1324 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1325 /* do local part */ 1326 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1327 /* receive remote parts: note this assumes the values are not actually */ 1328 /* inserted in yy until the next line, which is true for my implementation*/ 1329 /* but is not perhaps always true. */ 1330 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1331 PetscFunctionReturn(0); 1332 } 1333 1334 #undef __FUNC__ 1335 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPISBAIJ" 1336 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1337 { 1338 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1339 int ierr; 1340 1341 PetscFunctionBegin; 1342 /* do nondiagonal part */ 1343 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1344 /* send it on its way */ 1345 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1346 /* do local part */ 1347 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1348 /* receive remote parts: note this assumes the values are not actually */ 1349 /* inserted in yy until the next line, which is true for my implementation*/ 1350 /* but is not perhaps always true. */ 1351 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1352 PetscFunctionReturn(0); 1353 } 1354 1355 /* 1356 This only works correctly for square matrices where the subblock A->A is the 1357 diagonal block 1358 */ 1359 #undef __FUNC__ 1360 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPISBAIJ" 1361 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1362 { 1363 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1364 int ierr; 1365 1366 PetscFunctionBegin; 1367 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); */ 1368 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1369 PetscFunctionReturn(0); 1370 } 1371 1372 #undef __FUNC__ 1373 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPISBAIJ" 1374 int MatScale_MPISBAIJ(Scalar *aa,Mat A) 1375 { 1376 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1377 int ierr; 1378 1379 PetscFunctionBegin; 1380 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1381 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1382 PetscFunctionReturn(0); 1383 } 1384 1385 #undef __FUNC__ 1386 #define __FUNC__ /*<a name=""></a>*/"MatGetSize_MPISBAIJ" 1387 int MatGetSize_MPISBAIJ(Mat matin,int *m,int *n) 1388 { 1389 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1390 1391 PetscFunctionBegin; 1392 if (m) *m = mat->M; 1393 if (n) *n = mat->N; 1394 PetscFunctionReturn(0); 1395 } 1396 1397 #undef __FUNC__ 1398 #define __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPISBAIJ" 1399 int MatGetLocalSize_MPISBAIJ(Mat matin,int *m,int *n) 1400 { 1401 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1402 1403 PetscFunctionBegin; 1404 *m = mat->m; *n = mat->n; 1405 PetscFunctionReturn(0); 1406 } 1407 1408 #undef __FUNC__ 1409 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPISBAIJ" 1410 int MatGetOwnershipRange_MPISBAIJ(Mat matin,int *m,int *n) 1411 { 1412 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1413 1414 PetscFunctionBegin; 1415 if (m) *m = mat->rstart*mat->bs; 1416 if (n) *n = mat->rend*mat->bs; 1417 PetscFunctionReturn(0); 1418 } 1419 1420 #undef __FUNC__ 1421 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPISBAIJ" 1422 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1423 { 1424 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1425 Scalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1426 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1427 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1428 int *cmap,*idx_p,cstart = mat->cstart; 1429 1430 PetscFunctionBegin; 1431 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1432 mat->getrowactive = PETSC_TRUE; 1433 1434 if (!mat->rowvalues && (idx || v)) { 1435 /* 1436 allocate enough space to hold information from the longest row. 1437 */ 1438 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1439 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1440 int max = 1,mbs = mat->mbs,tmp; 1441 for (i=0; i<mbs; i++) { 1442 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1443 if (max < tmp) { max = tmp; } 1444 } 1445 mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1446 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1447 } 1448 1449 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1450 lrow = row - brstart; /* local row index */ 1451 1452 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1453 if (!v) {pvA = 0; pvB = 0;} 1454 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1455 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1456 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1457 nztot = nzA + nzB; 1458 1459 cmap = mat->garray; 1460 if (v || idx) { 1461 if (nztot) { 1462 /* Sort by increasing column numbers, assuming A and B already sorted */ 1463 int imark = -1; 1464 if (v) { 1465 *v = v_p = mat->rowvalues; 1466 for (i=0; i<nzB; i++) { 1467 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1468 else break; 1469 } 1470 imark = i; 1471 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1472 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1473 } 1474 if (idx) { 1475 *idx = idx_p = mat->rowindices; 1476 if (imark > -1) { 1477 for (i=0; i<imark; i++) { 1478 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1479 } 1480 } else { 1481 for (i=0; i<nzB; i++) { 1482 if (cmap[cworkB[i]/bs] < cstart) 1483 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1484 else break; 1485 } 1486 imark = i; 1487 } 1488 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1489 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1490 } 1491 } else { 1492 if (idx) *idx = 0; 1493 if (v) *v = 0; 1494 } 1495 } 1496 *nz = nztot; 1497 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1498 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1499 PetscFunctionReturn(0); 1500 } 1501 1502 #undef __FUNC__ 1503 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPISBAIJ" 1504 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1505 { 1506 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1507 1508 PetscFunctionBegin; 1509 if (baij->getrowactive == PETSC_FALSE) { 1510 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1511 } 1512 baij->getrowactive = PETSC_FALSE; 1513 PetscFunctionReturn(0); 1514 } 1515 1516 #undef __FUNC__ 1517 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPISBAIJ" 1518 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs) 1519 { 1520 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1521 1522 PetscFunctionBegin; 1523 *bs = baij->bs; 1524 PetscFunctionReturn(0); 1525 } 1526 1527 #undef __FUNC__ 1528 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPISBAIJ" 1529 int MatZeroEntries_MPISBAIJ(Mat A) 1530 { 1531 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1532 int ierr; 1533 1534 PetscFunctionBegin; 1535 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1536 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1537 PetscFunctionReturn(0); 1538 } 1539 1540 #undef __FUNC__ 1541 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPISBAIJ" 1542 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1543 { 1544 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1545 Mat A = a->A,B = a->B; 1546 int ierr; 1547 PetscReal isend[5],irecv[5]; 1548 1549 PetscFunctionBegin; 1550 info->block_size = (double)a->bs; 1551 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1552 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1553 isend[3] = info->memory; isend[4] = info->mallocs; 1554 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1555 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1556 isend[3] += info->memory; isend[4] += info->mallocs; 1557 if (flag == MAT_LOCAL) { 1558 info->nz_used = isend[0]; 1559 info->nz_allocated = isend[1]; 1560 info->nz_unneeded = isend[2]; 1561 info->memory = isend[3]; 1562 info->mallocs = isend[4]; 1563 } else if (flag == MAT_GLOBAL_MAX) { 1564 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1565 info->nz_used = irecv[0]; 1566 info->nz_allocated = irecv[1]; 1567 info->nz_unneeded = irecv[2]; 1568 info->memory = irecv[3]; 1569 info->mallocs = irecv[4]; 1570 } else if (flag == MAT_GLOBAL_SUM) { 1571 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1572 info->nz_used = irecv[0]; 1573 info->nz_allocated = irecv[1]; 1574 info->nz_unneeded = irecv[2]; 1575 info->memory = irecv[3]; 1576 info->mallocs = irecv[4]; 1577 } else { 1578 SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 1579 } 1580 info->rows_global = (double)a->M; 1581 info->columns_global = (double)a->N; 1582 info->rows_local = (double)a->m; 1583 info->columns_local = (double)a->N; 1584 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1585 info->fill_ratio_needed = 0; 1586 info->factor_mallocs = 0; 1587 PetscFunctionReturn(0); 1588 } 1589 1590 #undef __FUNC__ 1591 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPISBAIJ" 1592 int MatSetOption_MPISBAIJ(Mat A,MatOption op) 1593 { 1594 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1595 int ierr; 1596 1597 PetscFunctionBegin; 1598 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1599 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1600 op == MAT_COLUMNS_UNSORTED || 1601 op == MAT_COLUMNS_SORTED || 1602 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1603 op == MAT_KEEP_ZEROED_ROWS || 1604 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1605 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1606 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1607 } else if (op == MAT_ROW_ORIENTED) { 1608 a->roworiented = PETSC_TRUE; 1609 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1610 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1611 } else if (op == MAT_ROWS_SORTED || 1612 op == MAT_ROWS_UNSORTED || 1613 op == MAT_SYMMETRIC || 1614 op == MAT_STRUCTURALLY_SYMMETRIC || 1615 op == MAT_YES_NEW_DIAGONALS || 1616 op == MAT_USE_HASH_TABLE) { 1617 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1618 } else if (op == MAT_COLUMN_ORIENTED) { 1619 a->roworiented = PETSC_FALSE; 1620 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1621 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1622 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1623 a->donotstash = PETSC_TRUE; 1624 } else if (op == MAT_NO_NEW_DIAGONALS) { 1625 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1626 } else if (op == MAT_USE_HASH_TABLE) { 1627 a->ht_flag = PETSC_TRUE; 1628 } else { 1629 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1630 } 1631 PetscFunctionReturn(0); 1632 } 1633 1634 #undef __FUNC__ 1635 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPISBAIJ(" 1636 int MatTranspose_MPISBAIJ(Mat A,Mat *matout) 1637 { 1638 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1639 Mat_SeqBAIJ *Aloc; 1640 Mat B; 1641 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1642 int bs=baij->bs,mbs=baij->mbs; 1643 MatScalar *a; 1644 1645 PetscFunctionBegin; 1646 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1647 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1648 1649 /* copy over the A part */ 1650 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1651 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1652 rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1653 1654 for (i=0; i<mbs; i++) { 1655 rvals[0] = bs*(baij->rstart + i); 1656 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1657 for (j=ai[i]; j<ai[i+1]; j++) { 1658 col = (baij->cstart+aj[j])*bs; 1659 for (k=0; k<bs; k++) { 1660 ierr = MatSetValues_MPISBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1661 col++; a += bs; 1662 } 1663 } 1664 } 1665 /* copy over the B part */ 1666 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1667 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1668 for (i=0; i<mbs; i++) { 1669 rvals[0] = bs*(baij->rstart + i); 1670 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1671 for (j=ai[i]; j<ai[i+1]; j++) { 1672 col = baij->garray[aj[j]]*bs; 1673 for (k=0; k<bs; k++) { 1674 ierr = MatSetValues_MPISBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1675 col++; a += bs; 1676 } 1677 } 1678 } 1679 ierr = PetscFree(rvals);CHKERRQ(ierr); 1680 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1681 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1682 1683 if (matout) { 1684 *matout = B; 1685 } else { 1686 PetscOps *Abops; 1687 MatOps Aops; 1688 1689 /* This isn't really an in-place transpose .... but free data structures from baij */ 1690 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1691 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1692 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1693 #if defined (PETSC_USE_CTABLE) 1694 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1695 #else 1696 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1697 #endif 1698 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1699 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1700 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1701 ierr = PetscFree(baij);CHKERRQ(ierr); 1702 1703 /* 1704 This is horrible, horrible code. We need to keep the 1705 A pointers for the bops and ops but copy everything 1706 else from C. 1707 */ 1708 Abops = A->bops; 1709 Aops = A->ops; 1710 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1711 A->bops = Abops; 1712 A->ops = Aops; 1713 1714 PetscHeaderDestroy(B); 1715 } 1716 PetscFunctionReturn(0); 1717 } 1718 1719 #undef __FUNC__ 1720 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPISBAIJ" 1721 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1722 { 1723 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1724 Mat a = baij->A,b = baij->B; 1725 int ierr,s1,s2,s3; 1726 1727 PetscFunctionBegin; 1728 if (ll != rr) { 1729 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"For symmetric format, left and right scaling vectors must be same\n"); 1730 } 1731 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1732 if (rr) { 1733 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1734 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1735 /* Overlap communication with computation. */ 1736 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1737 /*} if (ll) { */ 1738 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1739 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1740 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1741 /* } */ 1742 /* scale the diagonal block */ 1743 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1744 1745 /* if (rr) { */ 1746 /* Do a scatter end and then right scale the off-diagonal block */ 1747 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1748 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1749 } 1750 1751 PetscFunctionReturn(0); 1752 } 1753 1754 #undef __FUNC__ 1755 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPISBAIJ" 1756 int MatZeroRows_MPISBAIJ(Mat A,IS is,Scalar *diag) 1757 { 1758 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1759 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1760 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1761 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1762 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1763 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1764 MPI_Comm comm = A->comm; 1765 MPI_Request *send_waits,*recv_waits; 1766 MPI_Status recv_status,*send_status; 1767 IS istmp; 1768 1769 PetscFunctionBegin; 1770 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 1771 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1772 1773 /* first count number of contributors to each processor */ 1774 nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs); 1775 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1776 procs = nprocs + size; 1777 owner = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 1778 for (i=0; i<N; i++) { 1779 idx = rows[i]; 1780 found = 0; 1781 for (j=0; j<size; j++) { 1782 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1783 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1784 } 1785 } 1786 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1787 } 1788 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 1789 1790 /* inform other processors of number of messages and max length*/ 1791 work = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work); 1792 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 1793 nmax = work[rank]; 1794 nrecvs = work[size+rank]; 1795 ierr = PetscFree(work);CHKERRQ(ierr); 1796 1797 /* post receives: */ 1798 rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 1799 recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1800 for (i=0; i<nrecvs; i++) { 1801 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1802 } 1803 1804 /* do sends: 1805 1) starts[i] gives the starting index in svalues for stuff going to 1806 the ith processor 1807 */ 1808 svalues = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues); 1809 send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1810 starts = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts); 1811 starts[0] = 0; 1812 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1813 for (i=0; i<N; i++) { 1814 svalues[starts[owner[i]]++] = rows[i]; 1815 } 1816 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1817 1818 starts[0] = 0; 1819 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1820 count = 0; 1821 for (i=0; i<size; i++) { 1822 if (procs[i]) { 1823 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1824 } 1825 } 1826 ierr = PetscFree(starts);CHKERRQ(ierr); 1827 1828 base = owners[rank]*bs; 1829 1830 /* wait on receives */ 1831 lens = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens); 1832 source = lens + nrecvs; 1833 count = nrecvs; slen = 0; 1834 while (count) { 1835 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1836 /* unpack receives into our local space */ 1837 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1838 source[imdex] = recv_status.MPI_SOURCE; 1839 lens[imdex] = n; 1840 slen += n; 1841 count--; 1842 } 1843 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1844 1845 /* move the data into the send scatter */ 1846 lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows); 1847 count = 0; 1848 for (i=0; i<nrecvs; i++) { 1849 values = rvalues + i*nmax; 1850 for (j=0; j<lens[i]; j++) { 1851 lrows[count++] = values[j] - base; 1852 } 1853 } 1854 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1855 ierr = PetscFree(lens);CHKERRQ(ierr); 1856 ierr = PetscFree(owner);CHKERRQ(ierr); 1857 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1858 1859 /* actually zap the local rows */ 1860 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1861 PLogObjectParent(A,istmp); 1862 1863 /* 1864 Zero the required rows. If the "diagonal block" of the matrix 1865 is square and the user wishes to set the diagonal we use seperate 1866 code so that MatSetValues() is not called for each diagonal allocating 1867 new memory, thus calling lots of mallocs and slowing things down. 1868 1869 Contributed by: Mathew Knepley 1870 */ 1871 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1872 ierr = MatZeroRows_SeqSBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1873 if (diag && (l->A->M == l->A->N)) { 1874 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1875 } else if (diag) { 1876 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1877 if (((Mat_SeqSBAIJ*)l->A->data)->nonew) { 1878 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1879 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1880 } 1881 for (i=0; i<slen; i++) { 1882 row = lrows[i] + rstart_bs; 1883 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1884 } 1885 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1886 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1887 } else { 1888 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1889 } 1890 1891 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1892 ierr = PetscFree(lrows);CHKERRQ(ierr); 1893 1894 /* wait on sends */ 1895 if (nsends) { 1896 send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1897 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1898 ierr = PetscFree(send_status);CHKERRQ(ierr); 1899 } 1900 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1901 ierr = PetscFree(svalues);CHKERRQ(ierr); 1902 1903 PetscFunctionReturn(0); 1904 } 1905 1906 #undef __FUNC__ 1907 #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPISBAIJ" 1908 int MatPrintHelp_MPISBAIJ(Mat A) 1909 { 1910 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1911 MPI_Comm comm = A->comm; 1912 static int called = 0; 1913 int ierr; 1914 1915 PetscFunctionBegin; 1916 if (!a->rank) { 1917 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1918 } 1919 if (called) {PetscFunctionReturn(0);} else called = 1; 1920 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1921 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1922 PetscFunctionReturn(0); 1923 } 1924 1925 #undef __FUNC__ 1926 #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPISBAIJ" 1927 int MatSetUnfactored_MPISBAIJ(Mat A) 1928 { 1929 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1930 int ierr; 1931 1932 PetscFunctionBegin; 1933 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1934 PetscFunctionReturn(0); 1935 } 1936 1937 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1938 1939 #undef __FUNC__ 1940 #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPISBAIJ" 1941 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1942 { 1943 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1944 Mat a,b,c,d; 1945 PetscTruth flg; 1946 int ierr; 1947 1948 PetscFunctionBegin; 1949 if (B->type != MATMPISBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1950 a = matA->A; b = matA->B; 1951 c = matB->A; d = matB->B; 1952 1953 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1954 if (flg == PETSC_TRUE) { 1955 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1956 } 1957 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1958 PetscFunctionReturn(0); 1959 } 1960 1961 /* -------------------------------------------------------------------*/ 1962 static struct _MatOps MatOps_Values = { 1963 MatSetValues_MPISBAIJ, 1964 MatGetRow_MPISBAIJ, 1965 MatRestoreRow_MPISBAIJ, 1966 MatMult_MPISBAIJ, 1967 MatMultAdd_MPISBAIJ, 1968 MatMultTranspose_MPISBAIJ, 1969 MatMultTransposeAdd_MPISBAIJ, 1970 0, 1971 0, 1972 0, 1973 0, 1974 0, 1975 0, 1976 0, 1977 MatTranspose_MPISBAIJ, 1978 MatGetInfo_MPISBAIJ, 1979 MatEqual_MPISBAIJ, 1980 MatGetDiagonal_MPISBAIJ, 1981 MatDiagonalScale_MPISBAIJ, 1982 MatNorm_MPISBAIJ, 1983 MatAssemblyBegin_MPISBAIJ, 1984 MatAssemblyEnd_MPISBAIJ, 1985 0, 1986 MatSetOption_MPISBAIJ, 1987 MatZeroEntries_MPISBAIJ, 1988 MatZeroRows_MPISBAIJ, 1989 0, 1990 0, 1991 0, 1992 0, 1993 MatGetSize_MPISBAIJ, 1994 MatGetLocalSize_MPISBAIJ, 1995 MatGetOwnershipRange_MPISBAIJ, 1996 0, 1997 0, 1998 0, 1999 0, 2000 MatDuplicate_MPISBAIJ, 2001 0, 2002 0, 2003 0, 2004 0, 2005 0, 2006 MatGetSubMatrices_MPISBAIJ, 2007 MatIncreaseOverlap_MPISBAIJ, 2008 MatGetValues_MPISBAIJ, 2009 0, 2010 MatPrintHelp_MPISBAIJ, 2011 MatScale_MPISBAIJ, 2012 0, 2013 0, 2014 0, 2015 MatGetBlockSize_MPISBAIJ, 2016 0, 2017 0, 2018 0, 2019 0, 2020 0, 2021 0, 2022 MatSetUnfactored_MPISBAIJ, 2023 0, 2024 MatSetValuesBlocked_MPISBAIJ, 2025 0, 2026 0, 2027 0, 2028 MatGetMaps_Petsc}; 2029 2030 2031 EXTERN_C_BEGIN 2032 #undef __FUNC__ 2033 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPISBAIJ" 2034 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2035 { 2036 PetscFunctionBegin; 2037 *a = ((Mat_MPISBAIJ *)A->data)->A; 2038 *iscopy = PETSC_FALSE; 2039 PetscFunctionReturn(0); 2040 } 2041 EXTERN_C_END 2042 2043 #undef __FUNC__ 2044 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPISBAIJ" 2045 /*@C 2046 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2047 (block compressed row). For good matrix assembly performance 2048 the user should preallocate the matrix storage by setting the parameters 2049 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2050 performance can be increased by more than a factor of 50. 2051 2052 Collective on MPI_Comm 2053 2054 Input Parameters: 2055 + comm - MPI communicator 2056 . bs - size of blockk 2057 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2058 This value should be the same as the local size used in creating the 2059 y vector for the matrix-vector product y = Ax. 2060 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2061 This value should be the same as the local size used in creating the 2062 x vector for the matrix-vector product y = Ax. 2063 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2064 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2065 . d_nz - number of block nonzeros per block row in diagonal portion of local 2066 submatrix (same for all local rows) 2067 . d_nnz - array containing the number of block nonzeros in the various block rows 2068 of the in diagonal portion of the local (possibly different for each block 2069 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2070 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2071 submatrix (same for all local rows). 2072 - o_nnz - array containing the number of nonzeros in the various block rows of the 2073 off-diagonal portion of the local submatrix (possibly different for 2074 each block row) or PETSC_NULL. 2075 2076 Output Parameter: 2077 . A - the matrix 2078 2079 Options Database Keys: 2080 . -mat_no_unroll - uses code that does not unroll the loops in the 2081 block calculations (much slower) 2082 . -mat_block_size - size of the blocks to use 2083 . -mat_mpi - use the parallel matrix data structures even on one processor 2084 (defaults to using SeqBAIJ format on one processor) 2085 2086 Notes: 2087 The user MUST specify either the local or global matrix dimensions 2088 (possibly both). 2089 2090 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2091 than it must be used on all processors that share the object for that argument. 2092 2093 Storage Information: 2094 For a square global matrix we define each processor's diagonal portion 2095 to be its local rows and the corresponding columns (a square submatrix); 2096 each processor's off-diagonal portion encompasses the remainder of the 2097 local matrix (a rectangular submatrix). 2098 2099 The user can specify preallocated storage for the diagonal part of 2100 the local submatrix with either d_nz or d_nnz (not both). Set 2101 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2102 memory allocation. Likewise, specify preallocated storage for the 2103 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2104 2105 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2106 the figure below we depict these three local rows and all columns (0-11). 2107 2108 .vb 2109 0 1 2 3 4 5 6 7 8 9 10 11 2110 ------------------- 2111 row 3 | o o o d d d o o o o o o 2112 row 4 | o o o d d d o o o o o o 2113 row 5 | o o o d d d o o o o o o 2114 ------------------- 2115 .ve 2116 2117 Thus, any entries in the d locations are stored in the d (diagonal) 2118 submatrix, and any entries in the o locations are stored in the 2119 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2120 stored simply in the MATSEQBAIJ format for compressed row storage. 2121 2122 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2123 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2124 In general, for PDE problems in which most nonzeros are near the diagonal, 2125 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2126 or you will get TERRIBLE performance; see the users' manual chapter on 2127 matrices. 2128 2129 Level: intermediate 2130 2131 .keywords: matrix, block, aij, compressed row, sparse, parallel 2132 2133 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPISBAIJ() 2134 @*/ 2135 2136 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2137 { 2138 Mat B; 2139 Mat_MPISBAIJ *b; 2140 int ierr,i,sum[1],work[1],mbs,Mbs=PETSC_DECIDE,size; 2141 PetscTruth flag1,flag2,flg; 2142 2143 PetscFunctionBegin; 2144 if (M != N || m != n){ /* N and n are not used after this */ 2145 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"For symmetric format, set M=N and m=n"); 2146 } 2147 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2148 2149 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 2150 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -2: value %d",d_nz); 2151 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -2: value %d",o_nz); 2152 if (d_nnz) { 2153 for (i=0; i<m/bs; i++) { 2154 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 2155 } 2156 } 2157 if (o_nnz) { 2158 for (i=0; i<m/bs; i++) { 2159 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 2160 } 2161 } 2162 2163 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2164 ierr = OptionsHasName(PETSC_NULL,"-mat_mpisbaij",&flag1);CHKERRQ(ierr); 2165 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 2166 if (!flag1 && !flag2 && size == 1) { 2167 if (M == PETSC_DECIDE) M = m; 2168 ierr = MatCreateSeqSBAIJ(comm,bs,M,M,d_nz,d_nnz,A);CHKERRQ(ierr); 2169 PetscFunctionReturn(0); 2170 } 2171 2172 *A = 0; 2173 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPISBAIJ,"Mat",comm,MatDestroy,MatView); 2174 PLogObjectCreate(B); 2175 B->data = (void*)(b = PetscNew(Mat_MPISBAIJ));CHKPTRQ(b); 2176 ierr = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 2177 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2178 2179 B->ops->destroy = MatDestroy_MPISBAIJ; 2180 B->ops->view = MatView_MPISBAIJ; 2181 B->mapping = 0; 2182 B->factor = 0; 2183 B->assembled = PETSC_FALSE; 2184 2185 B->insertmode = NOT_SET_VALUES; 2186 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 2187 ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr); 2188 2189 if (m == PETSC_DECIDE && (d_nnz || o_nnz)) { 2190 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 2191 } 2192 if (M == PETSC_DECIDE && m == PETSC_DECIDE) { 2193 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 2194 } 2195 if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 2196 2197 if (M == PETSC_DECIDE) { 2198 work[0] = m; mbs = m/bs; 2199 ierr = MPI_Allreduce(work,sum,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2200 M = sum[0]; Mbs = M/bs; 2201 } else { /* M is specified */ 2202 Mbs = M/bs; 2203 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 2204 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 2205 m = mbs*bs; 2206 } 2207 2208 if (mbs*bs != m) { 2209 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows/cols must be divisible by blocksize"); 2210 } 2211 2212 b->m = m; B->m = m; 2213 b->n = m; B->n = m; 2214 b->N = M; B->N = M; 2215 b->M = M; 2216 B->M = M; 2217 b->bs = bs; 2218 b->bs2 = bs*bs; 2219 b->mbs = mbs; 2220 b->nbs = mbs; 2221 b->Mbs = Mbs; 2222 b->Nbs = Mbs; 2223 2224 /* the information in the maps duplicates the information computed below, eventually 2225 we should remove the duplicate information that is not contained in the maps */ 2226 ierr = MapCreateMPI(B->comm,m,M,&B->rmap);CHKERRQ(ierr); 2227 ierr = MapCreateMPI(B->comm,m,M,&B->cmap);CHKERRQ(ierr); 2228 2229 /* build local table of row and column ownerships */ 2230 b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 2231 PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 2232 b->cowners = b->rowners + b->size + 2; 2233 b->rowners_bs = b->cowners + b->size + 2; 2234 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2235 b->rowners[0] = 0; 2236 for (i=2; i<=b->size; i++) { 2237 b->rowners[i] += b->rowners[i-1]; 2238 } 2239 for (i=0; i<=b->size; i++) { 2240 b->rowners_bs[i] = b->rowners[i]*bs; 2241 } 2242 b->rstart = b->rowners[b->rank]; 2243 b->rend = b->rowners[b->rank+1]; 2244 b->rstart_bs = b->rstart * bs; 2245 b->rend_bs = b->rend * bs; 2246 2247 b->cstart = b->rstart; 2248 b->cend = b->rend; 2249 b->cstart_bs = b->cstart * bs; 2250 b->cend_bs = b->cend * bs; 2251 2252 2253 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2254 ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,m,m,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2255 PLogObjectParent(B,b->A); 2256 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2257 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,M,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2258 PLogObjectParent(B,b->B); 2259 2260 /* build cache for off array entries formed */ 2261 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2262 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2263 b->donotstash = PETSC_FALSE; 2264 b->colmap = PETSC_NULL; 2265 b->garray = PETSC_NULL; 2266 b->roworiented = PETSC_TRUE; 2267 2268 #if defined(PEYSC_USE_MAT_SINGLE) 2269 /* stuff for MatSetValues_XXX in single precision */ 2270 b->lensetvalues = 0; 2271 b->setvaluescopy = PETSC_NULL; 2272 #endif 2273 2274 /* stuff used in block assembly */ 2275 b->barray = 0; 2276 2277 /* stuff used for matrix vector multiply */ 2278 b->lvec = 0; 2279 b->Mvctx = 0; 2280 2281 /* stuff for MatGetRow() */ 2282 b->rowindices = 0; 2283 b->rowvalues = 0; 2284 b->getrowactive = PETSC_FALSE; 2285 2286 /* hash table stuff */ 2287 b->ht = 0; 2288 b->hd = 0; 2289 b->ht_size = 0; 2290 b->ht_flag = PETSC_FALSE; 2291 b->ht_fact = 0; 2292 b->ht_total_ct = 0; 2293 b->ht_insert_ct = 0; 2294 2295 *A = B; 2296 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2297 if (flg) { 2298 double fact = 1.39; 2299 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2300 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2301 if (fact <= 1.0) fact = 1.39; 2302 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2303 PLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact); 2304 } 2305 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2306 "MatStoreValues_MPISBAIJ", 2307 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2308 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2309 "MatRetrieveValues_MPISBAIJ", 2310 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2311 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2312 "MatGetDiagonalBlock_MPISBAIJ", 2313 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 2314 PetscFunctionReturn(0); 2315 } 2316 2317 2318 #undef __FUNC__ 2319 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPISBAIJ" 2320 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2321 { 2322 Mat mat; 2323 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2324 int ierr,len=0; 2325 PetscTruth flg; 2326 2327 PetscFunctionBegin; 2328 *newmat = 0; 2329 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPISBAIJ,"Mat",matin->comm,MatDestroy,MatView); 2330 PLogObjectCreate(mat); 2331 mat->data = (void*)(a = PetscNew(Mat_MPISBAIJ));CHKPTRQ(a); 2332 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2333 mat->ops->destroy = MatDestroy_MPISBAIJ; 2334 mat->ops->view = MatView_MPISBAIJ; 2335 mat->factor = matin->factor; 2336 mat->assembled = PETSC_TRUE; 2337 mat->insertmode = NOT_SET_VALUES; 2338 2339 a->m = mat->m = oldmat->m; 2340 a->n = mat->n = oldmat->n; 2341 a->M = mat->M = oldmat->M; 2342 a->N = mat->N = oldmat->N; 2343 2344 a->bs = oldmat->bs; 2345 a->bs2 = oldmat->bs2; 2346 a->mbs = oldmat->mbs; 2347 a->nbs = oldmat->nbs; 2348 a->Mbs = oldmat->Mbs; 2349 a->Nbs = oldmat->Nbs; 2350 2351 a->rstart = oldmat->rstart; 2352 a->rend = oldmat->rend; 2353 a->cstart = oldmat->cstart; 2354 a->cend = oldmat->cend; 2355 a->size = oldmat->size; 2356 a->rank = oldmat->rank; 2357 a->donotstash = oldmat->donotstash; 2358 a->roworiented = oldmat->roworiented; 2359 a->rowindices = 0; 2360 a->rowvalues = 0; 2361 a->getrowactive = PETSC_FALSE; 2362 a->barray = 0; 2363 a->rstart_bs = oldmat->rstart_bs; 2364 a->rend_bs = oldmat->rend_bs; 2365 a->cstart_bs = oldmat->cstart_bs; 2366 a->cend_bs = oldmat->cend_bs; 2367 2368 /* hash table stuff */ 2369 a->ht = 0; 2370 a->hd = 0; 2371 a->ht_size = 0; 2372 a->ht_flag = oldmat->ht_flag; 2373 a->ht_fact = oldmat->ht_fact; 2374 a->ht_total_ct = 0; 2375 a->ht_insert_ct = 0; 2376 2377 2378 a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 2379 PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 2380 a->cowners = a->rowners + a->size + 2; 2381 a->rowners_bs = a->cowners + a->size + 2; 2382 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2383 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2384 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2385 if (oldmat->colmap) { 2386 #if defined (PETSC_USE_CTABLE) 2387 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2388 #else 2389 a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2390 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2391 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2392 #endif 2393 } else a->colmap = 0; 2394 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2395 a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray); 2396 PLogObjectMemory(mat,len*sizeof(int)); 2397 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2398 } else a->garray = 0; 2399 2400 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2401 PLogObjectParent(mat,a->lvec); 2402 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2403 2404 PLogObjectParent(mat,a->Mvctx); 2405 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2406 PLogObjectParent(mat,a->A); 2407 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2408 PLogObjectParent(mat,a->B); 2409 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 2410 ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2411 if (flg) { 2412 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 2413 } 2414 *newmat = mat; 2415 PetscFunctionReturn(0); 2416 } 2417 2418 #include "petscsys.h" 2419 2420 #undef __FUNC__ 2421 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPISBAIJ" 2422 int MatLoad_MPISBAIJ(Viewer viewer,MatType type,Mat *newmat) 2423 { 2424 Mat A; 2425 int i,nz,ierr,j,rstart,rend,fd; 2426 Scalar *vals,*buf; 2427 MPI_Comm comm = ((PetscObject)viewer)->comm; 2428 MPI_Status status; 2429 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2430 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2431 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2432 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2433 int dcount,kmax,k,nzcount,tmp; 2434 2435 PetscFunctionBegin; 2436 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2437 2438 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2439 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2440 if (!rank) { 2441 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2442 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2443 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2444 if (header[3] < 0) { 2445 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPISBAIJ"); 2446 } 2447 } 2448 2449 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2450 M = header[1]; N = header[2]; 2451 2452 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2453 2454 /* 2455 This code adds extra rows to make sure the number of rows is 2456 divisible by the blocksize 2457 */ 2458 Mbs = M/bs; 2459 extra_rows = bs - M + bs*(Mbs); 2460 if (extra_rows == bs) extra_rows = 0; 2461 else Mbs++; 2462 if (extra_rows &&!rank) { 2463 PLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"); 2464 } 2465 2466 /* determine ownership of all rows */ 2467 mbs = Mbs/size + ((Mbs % size) > rank); 2468 m = mbs * bs; 2469 rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners); 2470 browners = rowners + size + 1; 2471 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2472 rowners[0] = 0; 2473 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2474 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2475 rstart = rowners[rank]; 2476 rend = rowners[rank+1]; 2477 2478 /* distribute row lengths to all processors */ 2479 locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens); 2480 if (!rank) { 2481 rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2482 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2483 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2484 sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts); 2485 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2486 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2487 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2488 } else { 2489 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2490 } 2491 2492 if (!rank) { 2493 /* calculate the number of nonzeros on each processor */ 2494 procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz); 2495 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2496 for (i=0; i<size; i++) { 2497 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2498 procsnz[i] += rowlengths[j]; 2499 } 2500 } 2501 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2502 2503 /* determine max buffer needed and allocate it */ 2504 maxnz = 0; 2505 for (i=0; i<size; i++) { 2506 maxnz = PetscMax(maxnz,procsnz[i]); 2507 } 2508 cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols); 2509 2510 /* read in my part of the matrix column indices */ 2511 nz = procsnz[0]; 2512 ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 2513 mycols = ibuf; 2514 if (size == 1) nz -= extra_rows; 2515 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2516 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2517 2518 /* read in every ones (except the last) and ship off */ 2519 for (i=1; i<size-1; i++) { 2520 nz = procsnz[i]; 2521 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2522 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2523 } 2524 /* read in the stuff for the last proc */ 2525 if (size != 1) { 2526 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2527 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2528 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2529 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2530 } 2531 ierr = PetscFree(cols);CHKERRQ(ierr); 2532 } else { 2533 /* determine buffer space needed for message */ 2534 nz = 0; 2535 for (i=0; i<m; i++) { 2536 nz += locrowlens[i]; 2537 } 2538 ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 2539 mycols = ibuf; 2540 /* receive message of column indices*/ 2541 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2542 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2543 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2544 } 2545 2546 /* loop over local rows, determining number of off diagonal entries */ 2547 dlens = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens); 2548 odlens = dlens + (rend-rstart); 2549 mask = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask); 2550 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2551 masked1 = mask + Mbs; 2552 masked2 = masked1 + Mbs; 2553 rowcount = 0; nzcount = 0; 2554 for (i=0; i<mbs; i++) { 2555 dcount = 0; 2556 odcount = 0; 2557 for (j=0; j<bs; j++) { 2558 kmax = locrowlens[rowcount]; 2559 for (k=0; k<kmax; k++) { 2560 tmp = mycols[nzcount++]/bs; 2561 if (!mask[tmp]) { 2562 mask[tmp] = 1; 2563 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2564 else masked1[dcount++] = tmp; 2565 } 2566 } 2567 rowcount++; 2568 } 2569 2570 dlens[i] = dcount; 2571 odlens[i] = odcount; 2572 2573 /* zero out the mask elements we set */ 2574 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2575 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2576 } 2577 2578 /* create our matrix */ 2579 ierr = MatCreateMPISBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 2580 A = *newmat; 2581 MatSetOption(A,MAT_COLUMNS_SORTED); 2582 2583 if (!rank) { 2584 buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf); 2585 /* read in my part of the matrix numerical values */ 2586 nz = procsnz[0]; 2587 vals = buf; 2588 mycols = ibuf; 2589 if (size == 1) nz -= extra_rows; 2590 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2591 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2592 2593 /* insert into matrix */ 2594 jj = rstart*bs; 2595 for (i=0; i<m; i++) { 2596 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2597 mycols += locrowlens[i]; 2598 vals += locrowlens[i]; 2599 jj++; 2600 } 2601 /* read in other processors (except the last one) and ship out */ 2602 for (i=1; i<size-1; i++) { 2603 nz = procsnz[i]; 2604 vals = buf; 2605 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2606 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2607 } 2608 /* the last proc */ 2609 if (size != 1){ 2610 nz = procsnz[i] - extra_rows; 2611 vals = buf; 2612 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2613 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2614 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2615 } 2616 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2617 } else { 2618 /* receive numeric values */ 2619 buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf); 2620 2621 /* receive message of values*/ 2622 vals = buf; 2623 mycols = ibuf; 2624 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2625 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2626 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2627 2628 /* insert into matrix */ 2629 jj = rstart*bs; 2630 for (i=0; i<m; i++) { 2631 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2632 mycols += locrowlens[i]; 2633 vals += locrowlens[i]; 2634 jj++; 2635 } 2636 } 2637 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2638 ierr = PetscFree(buf);CHKERRQ(ierr); 2639 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2640 ierr = PetscFree(rowners);CHKERRQ(ierr); 2641 ierr = PetscFree(dlens);CHKERRQ(ierr); 2642 ierr = PetscFree(mask);CHKERRQ(ierr); 2643 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2644 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2645 PetscFunctionReturn(0); 2646 } 2647 2648 #undef __FUNC__ 2649 #define __FUNC__ /*<a name=""></a>*/"MatMPISBAIJSetHashTableFactor" 2650 /*@ 2651 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2652 2653 Input Parameters: 2654 . mat - the matrix 2655 . fact - factor 2656 2657 Collective on Mat 2658 2659 Level: advanced 2660 2661 Notes: 2662 This can also be set by the command line option: -mat_use_hash_table fact 2663 2664 .keywords: matrix, hashtable, factor, HT 2665 2666 .seealso: MatSetOption() 2667 @*/ 2668 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2669 { 2670 Mat_MPIBAIJ *baij; 2671 2672 PetscFunctionBegin; 2673 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2674 if (mat->type != MATMPIBAIJ) { 2675 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2676 } 2677 baij = (Mat_MPIBAIJ*)mat->data; 2678 baij->ht_fact = fact; 2679 PetscFunctionReturn(0); 2680 } 2681