1 /*$Id: mpisbaij.c,v 1.11 2000/08/03 15:03:00 balay Exp balay $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 #include "mpisbaij.h" 6 #include "src/mat/impls/sbaij/seq/sbaij.h" 7 8 extern int MatSetUpMultiply_MPISBAIJ(Mat); 9 extern int DisAssemble_MPISBAIJ(Mat); 10 extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS *,int); 11 extern int MatGetSubMatrices_MPISBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 12 extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *); 13 extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 14 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 15 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**); 16 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**); 17 extern int MatPrintHelp_SeqSBAIJ(Mat); 18 extern int MatZeroRows_SeqSBAIJ(Mat,IS,Scalar*); 19 20 /* UGLY, ugly, ugly 21 When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 22 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 23 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 24 converts the entries into single precision and then calls ..._MatScalar() to put them 25 into the single precision data structures. 26 */ 27 #if defined(PETSC_USE_MAT_SINGLE) 28 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 29 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 30 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 31 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 32 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 33 #else 34 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 35 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 36 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 37 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 38 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 39 #endif 40 41 EXTERN_C_BEGIN 42 #undef __FUNC__ 43 #define __FUNC__ /*<a name="MatStoreValues_MPIBAIJ"></a>*/"MatStoreValues_MPIBAIJ" 44 int MatStoreValues_MPISBAIJ(Mat mat) 45 { 46 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 47 int ierr; 48 49 PetscFunctionBegin; 50 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 51 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 52 PetscFunctionReturn(0); 53 } 54 EXTERN_C_END 55 56 EXTERN_C_BEGIN 57 #undef __FUNC__ 58 #define __FUNC__ /*<a name="MatRetrieveValues_MPIBAIJ"></a>*/"MatRetrieveValues_MPIBAIJ" 59 int MatRetrieveValues_MPISBAIJ(Mat mat) 60 { 61 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 62 int ierr; 63 64 PetscFunctionBegin; 65 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 66 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 67 PetscFunctionReturn(0); 68 } 69 EXTERN_C_END 70 71 /* 72 Local utility routine that creates a mapping from the global column 73 number to the local number in the off-diagonal part of the local 74 storage of the matrix. This is done in a non scable way since the 75 length of colmap equals the global matrix length. 76 */ 77 #undef __FUNC__ 78 #define __FUNC__ /*<a name="CreateColmap_MPISBAIJ_Private"></a>*/"CreateColmap_MPISBAIJ_Private" 79 static int CreateColmap_MPISBAIJ_Private(Mat mat) 80 { 81 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 82 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 83 int nbs = B->nbs,i,bs=B->bs,ierr; 84 85 PetscFunctionBegin; 86 #if defined (PETSC_USE_CTABLE) 87 ierr = PetscTableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr); 88 for (i=0; i<nbs; i++){ 89 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 90 } 91 #else 92 baij->colmap = (int*)PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 93 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 94 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 95 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 96 #endif 97 PetscFunctionReturn(0); 98 } 99 100 #define CHUNKSIZE 10 101 102 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 103 { \ 104 \ 105 brow = row/bs; \ 106 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 107 rmax = aimax[brow]; nrow = ailen[brow]; \ 108 bcol = col/bs; \ 109 ridx = row % bs; cidx = col % bs; \ 110 low = 0; high = nrow; \ 111 while (high-low > 3) { \ 112 t = (low+high)/2; \ 113 if (rp[t] > bcol) high = t; \ 114 else low = t; \ 115 } \ 116 for (_i=low; _i<high; _i++) { \ 117 if (rp[_i] > bcol) break; \ 118 if (rp[_i] == bcol) { \ 119 bap = ap + bs2*_i + bs*cidx + ridx; \ 120 if (addv == ADD_VALUES) *bap += value; \ 121 else *bap = value; \ 122 goto a_noinsert; \ 123 } \ 124 } \ 125 if (a->nonew == 1) goto a_noinsert; \ 126 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 127 if (nrow >= rmax) { \ 128 /* there is no extra room in row, therefore enlarge */ \ 129 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 130 MatScalar *new_a; \ 131 \ 132 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 133 \ 134 /* malloc new storage space */ \ 135 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 136 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 137 new_j = (int*)(new_a + bs2*new_nz); \ 138 new_i = new_j + new_nz; \ 139 \ 140 /* copy over old data into new slots */ \ 141 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 142 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 143 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 144 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 145 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 146 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 147 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 148 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 149 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 150 /* free up old matrix storage */ \ 151 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 152 if (!a->singlemalloc) { \ 153 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 154 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 155 } \ 156 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 157 a->singlemalloc = PETSC_TRUE; \ 158 \ 159 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 160 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 161 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 162 a->s_maxnz += bs2*CHUNKSIZE; \ 163 a->reallocs++; \ 164 a->s_nz++; \ 165 } \ 166 N = nrow++ - 1; \ 167 /* shift up all the later entries in this row */ \ 168 for (ii=N; ii>=_i; ii--) { \ 169 rp[ii+1] = rp[ii]; \ 170 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 171 } \ 172 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 173 rp[_i] = bcol; \ 174 ap[bs2*_i + bs*cidx + ridx] = value; \ 175 a_noinsert:; \ 176 ailen[brow] = nrow; \ 177 } 178 #ifndef MatSetValues_SeqBAIJ_B_Private 179 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 180 { \ 181 brow = row/bs; \ 182 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 183 rmax = bimax[brow]; nrow = bilen[brow]; \ 184 bcol = col/bs; \ 185 ridx = row % bs; cidx = col % bs; \ 186 low = 0; high = nrow; \ 187 while (high-low > 3) { \ 188 t = (low+high)/2; \ 189 if (rp[t] > bcol) high = t; \ 190 else low = t; \ 191 } \ 192 for (_i=low; _i<high; _i++) { \ 193 if (rp[_i] > bcol) break; \ 194 if (rp[_i] == bcol) { \ 195 bap = ap + bs2*_i + bs*cidx + ridx; \ 196 if (addv == ADD_VALUES) *bap += value; \ 197 else *bap = value; \ 198 goto b_noinsert; \ 199 } \ 200 } \ 201 if (b->nonew == 1) goto b_noinsert; \ 202 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 203 if (nrow >= rmax) { \ 204 /* there is no extra room in row, therefore enlarge */ \ 205 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 206 MatScalar *new_a; \ 207 \ 208 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 209 \ 210 /* malloc new storage space */ \ 211 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 212 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 213 new_j = (int*)(new_a + bs2*new_nz); \ 214 new_i = new_j + new_nz; \ 215 \ 216 /* copy over old data into new slots */ \ 217 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 218 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 219 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 220 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 221 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 222 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 223 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 224 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 225 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 226 /* free up old matrix storage */ \ 227 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 228 if (!b->singlemalloc) { \ 229 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 230 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 231 } \ 232 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 233 b->singlemalloc = PETSC_TRUE; \ 234 \ 235 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 236 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 237 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 238 b->maxnz += bs2*CHUNKSIZE; \ 239 b->reallocs++; \ 240 b->nz++; \ 241 } \ 242 N = nrow++ - 1; \ 243 /* shift up all the later entries in this row */ \ 244 for (ii=N; ii>=_i; ii--) { \ 245 rp[ii+1] = rp[ii]; \ 246 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 247 } \ 248 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 249 rp[_i] = bcol; \ 250 ap[bs2*_i + bs*cidx + ridx] = value; \ 251 b_noinsert:; \ 252 bilen[brow] = nrow; \ 253 } 254 #endif 255 256 #if defined(PETSC_USE_MAT_SINGLE) 257 #undef __FUNC__ 258 #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ"></a>*/"MatSetValues_MPISBAIJ" 259 int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 260 { 261 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 262 int ierr,i,N = m*n; 263 MatScalar *vsingle; 264 265 PetscFunctionBegin; 266 if (N > b->setvalueslen) { 267 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 268 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 269 b->setvalueslen = N; 270 } 271 vsingle = b->setvaluescopy; 272 273 for (i=0; i<N; i++) { 274 vsingle[i] = v[i]; 275 } 276 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 277 PetscFunctionReturn(0); 278 } 279 280 #undef __FUNC__ 281 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ"></a>*/"MatSetValuesBlocked_MPISBAIJ" 282 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 283 { 284 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 285 int ierr,i,N = m*n*b->bs2; 286 MatScalar *vsingle; 287 288 PetscFunctionBegin; 289 if (N > b->setvalueslen) { 290 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 291 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 292 b->setvalueslen = N; 293 } 294 vsingle = b->setvaluescopy; 295 for (i=0; i<N; i++) { 296 vsingle[i] = v[i]; 297 } 298 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 299 PetscFunctionReturn(0); 300 } 301 302 #undef __FUNC__ 303 #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ_HT"></a>*/"MatSetValues_MPISBAIJ_HT" 304 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 305 { 306 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 307 int ierr,i,N = m*n; 308 MatScalar *vsingle; 309 310 PetscFunctionBegin; 311 if (N > b->setvalueslen) { 312 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 313 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 314 b->setvalueslen = N; 315 } 316 vsingle = b->setvaluescopy; 317 for (i=0; i<N; i++) { 318 vsingle[i] = v[i]; 319 } 320 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 321 PetscFunctionReturn(0); 322 } 323 324 #undef __FUNC__ 325 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ_HT"></a>*/"MatSetValuesBlocked_MPISBAIJ_HT" 326 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 327 { 328 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 329 int ierr,i,N = m*n*b->bs2; 330 MatScalar *vsingle; 331 332 PetscFunctionBegin; 333 if (N > b->setvalueslen) { 334 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 335 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 336 b->setvalueslen = N; 337 } 338 vsingle = b->setvaluescopy; 339 for (i=0; i<N; i++) { 340 vsingle[i] = v[i]; 341 } 342 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 343 PetscFunctionReturn(0); 344 } 345 #endif 346 347 /* Only add/insert a(i,j) with i<=j (blocks). 348 Any a(i,j) with i>j input by user is ingored. 349 */ 350 #undef __FUNC__ 351 #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ" 352 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 353 { 354 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 355 MatScalar value; 356 int ierr,i,j,row,col; 357 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 358 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 359 int cend_orig=baij->cend_bs,bs=baij->bs; 360 361 /* Some Variables required in the macro */ 362 Mat A = baij->A; 363 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 364 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 365 MatScalar *aa=a->a; 366 367 Mat B = baij->B; 368 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 369 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 370 MatScalar *ba=b->a; 371 372 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 373 int low,high,t,ridx,cidx,bs2=a->bs2; 374 MatScalar *ap,*bap; 375 376 /* for stash */ 377 int n_loc, *in_loc=0; 378 MatScalar *v_loc=0; 379 380 PetscFunctionBegin; 381 382 if(!baij->donotstash){ 383 in_loc = (int*)PetscMalloc(n*sizeof(int)); CHKPTRQ(in_loc); 384 v_loc = (MatScalar*)PetscMalloc(n*sizeof(MatScalar)); CHKPTRQ(v_loc); 385 } 386 387 for (i=0; i<m; i++) { 388 if (im[i] < 0) continue; 389 #if defined(PETSC_USE_BOPT_g) 390 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 391 #endif 392 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 393 row = im[i] - rstart_orig; /* local row index */ 394 for (j=0; j<n; j++) { 395 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 396 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 397 col = in[j] - cstart_orig; /* local col index */ 398 brow = row/bs; bcol = col/bs; 399 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 400 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 401 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 402 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 403 } else if (in[j] < 0) continue; 404 #if defined(PETSC_USE_BOPT_g) 405 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 406 #endif 407 else { /* off-diag entry (B) */ 408 if (mat->was_assembled) { 409 if (!baij->colmap) { 410 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 411 } 412 #if defined (PETSC_USE_CTABLE) 413 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 414 col = col - 1 + in[j]%bs; 415 #else 416 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 417 #endif 418 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 419 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 420 col = in[j]; 421 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 422 B = baij->B; 423 b = (Mat_SeqBAIJ*)(B)->data; 424 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 425 ba=b->a; 426 } 427 } else col = in[j]; 428 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 429 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 430 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 431 } 432 } 433 } else { /* off processor entry */ 434 if (!baij->donotstash) { 435 n_loc = 0; 436 for (j=0; j<n; j++){ 437 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 438 in_loc[n_loc] = in[j]; 439 if (roworiented) { 440 v_loc[n_loc] = v[i*n+j]; 441 } else { 442 v_loc[n_loc] = v[j*m+i]; 443 } 444 n_loc++; 445 } 446 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 447 } 448 } 449 } 450 451 if(!baij->donotstash){ 452 ierr = PetscFree(in_loc);CHKERRQ(ierr); 453 ierr = PetscFree(v_loc);CHKERRQ(ierr); 454 } 455 PetscFunctionReturn(0); 456 } 457 458 #undef __FUNC__ 459 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ"></a>*/"MatSetValuesBlocked_MPISBAIJ" 460 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 461 { 462 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 463 MatScalar *value,*barray=baij->barray; 464 int ierr,i,j,ii,jj,row,col; 465 int roworiented = baij->roworiented,rstart=baij->rstart ; 466 int rend=baij->rend,cstart=baij->cstart,stepval; 467 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 468 469 PetscFunctionBegin; 470 if(!barray) { 471 baij->barray = barray = (MatScalar*)PetscMalloc(bs2*sizeof(MatScalar));CHKPTRQ(barray); 472 } 473 474 if (roworiented) { 475 stepval = (n-1)*bs; 476 } else { 477 stepval = (m-1)*bs; 478 } 479 for (i=0; i<m; i++) { 480 if (im[i] < 0) continue; 481 #if defined(PETSC_USE_BOPT_g) 482 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 483 #endif 484 if (im[i] >= rstart && im[i] < rend) { 485 row = im[i] - rstart; 486 for (j=0; j<n; j++) { 487 /* If NumCol = 1 then a copy is not required */ 488 if ((roworiented) && (n == 1)) { 489 barray = v + i*bs2; 490 } else if((!roworiented) && (m == 1)) { 491 barray = v + j*bs2; 492 } else { /* Here a copy is required */ 493 if (roworiented) { 494 value = v + i*(stepval+bs)*bs + j*bs; 495 } else { 496 value = v + j*(stepval+bs)*bs + i*bs; 497 } 498 for (ii=0; ii<bs; ii++,value+=stepval) { 499 for (jj=0; jj<bs; jj++) { 500 *barray++ = *value++; 501 } 502 } 503 barray -=bs2; 504 } 505 506 if (in[j] >= cstart && in[j] < cend){ 507 col = in[j] - cstart; 508 ierr = MatSetValuesBlocked_SeqSBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 509 } 510 else if (in[j] < 0) continue; 511 #if defined(PETSC_USE_BOPT_g) 512 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 513 #endif 514 else { 515 if (mat->was_assembled) { 516 if (!baij->colmap) { 517 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 518 } 519 520 #if defined(PETSC_USE_BOPT_g) 521 #if defined (PETSC_USE_CTABLE) 522 { int data; 523 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 524 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap"); 525 } 526 #else 527 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap"); 528 #endif 529 #endif 530 #if defined (PETSC_USE_CTABLE) 531 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 532 col = (col - 1)/bs; 533 #else 534 col = (baij->colmap[in[j]] - 1)/bs; 535 #endif 536 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 537 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 538 col = in[j]; 539 } 540 } 541 else col = in[j]; 542 ierr = MatSetValuesBlocked_SeqSBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 543 } 544 } 545 } else { 546 if (!baij->donotstash) { 547 if (roworiented) { 548 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 549 } else { 550 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 551 } 552 } 553 } 554 } 555 PetscFunctionReturn(0); 556 } 557 558 #define HASH_KEY 0.6180339887 559 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 560 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 561 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 562 #undef __FUNC__ 563 #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ_HT_MatScalar"></a>*/"MatSetValues_MPISBAIJ_HT_MatScalar" 564 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 565 { 566 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 567 int ierr,i,j,row,col; 568 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 569 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 570 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 571 PetscReal tmp; 572 MatScalar **HD = baij->hd,value; 573 #if defined(PETSC_USE_BOPT_g) 574 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 575 #endif 576 577 PetscFunctionBegin; 578 579 for (i=0; i<m; i++) { 580 #if defined(PETSC_USE_BOPT_g) 581 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 582 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 583 #endif 584 row = im[i]; 585 if (row >= rstart_orig && row < rend_orig) { 586 for (j=0; j<n; j++) { 587 col = in[j]; 588 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 589 /* Look up into the Hash Table */ 590 key = (row/bs)*Nbs+(col/bs)+1; 591 h1 = HASH(size,key,tmp); 592 593 594 idx = h1; 595 #if defined(PETSC_USE_BOPT_g) 596 insert_ct++; 597 total_ct++; 598 if (HT[idx] != key) { 599 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 600 if (idx == size) { 601 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 602 if (idx == h1) { 603 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 604 } 605 } 606 } 607 #else 608 if (HT[idx] != key) { 609 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 610 if (idx == size) { 611 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 612 if (idx == h1) { 613 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 614 } 615 } 616 } 617 #endif 618 /* A HASH table entry is found, so insert the values at the correct address */ 619 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 620 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 621 } 622 } else { 623 if (!baij->donotstash) { 624 if (roworiented) { 625 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 626 } else { 627 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 628 } 629 } 630 } 631 } 632 #if defined(PETSC_USE_BOPT_g) 633 baij->ht_total_ct = total_ct; 634 baij->ht_insert_ct = insert_ct; 635 #endif 636 PetscFunctionReturn(0); 637 } 638 639 #undef __FUNC__ 640 #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPISBAIJ_HT_MatScalar" 641 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 642 { 643 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 644 int ierr,i,j,ii,jj,row,col; 645 int roworiented = baij->roworiented,rstart=baij->rstart ; 646 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 647 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 648 PetscReal tmp; 649 MatScalar **HD = baij->hd,*baij_a; 650 MatScalar *v_t,*value; 651 #if defined(PETSC_USE_BOPT_g) 652 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 653 #endif 654 655 PetscFunctionBegin; 656 657 if (roworiented) { 658 stepval = (n-1)*bs; 659 } else { 660 stepval = (m-1)*bs; 661 } 662 for (i=0; i<m; i++) { 663 #if defined(PETSC_USE_BOPT_g) 664 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 665 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 666 #endif 667 row = im[i]; 668 v_t = v + i*bs2; 669 if (row >= rstart && row < rend) { 670 for (j=0; j<n; j++) { 671 col = in[j]; 672 673 /* Look up into the Hash Table */ 674 key = row*Nbs+col+1; 675 h1 = HASH(size,key,tmp); 676 677 idx = h1; 678 #if defined(PETSC_USE_BOPT_g) 679 total_ct++; 680 insert_ct++; 681 if (HT[idx] != key) { 682 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 683 if (idx == size) { 684 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 685 if (idx == h1) { 686 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 687 } 688 } 689 } 690 #else 691 if (HT[idx] != key) { 692 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 693 if (idx == size) { 694 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 695 if (idx == h1) { 696 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 697 } 698 } 699 } 700 #endif 701 baij_a = HD[idx]; 702 if (roworiented) { 703 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 704 /* value = v + (i*(stepval+bs)+j)*bs; */ 705 value = v_t; 706 v_t += bs; 707 if (addv == ADD_VALUES) { 708 for (ii=0; ii<bs; ii++,value+=stepval) { 709 for (jj=ii; jj<bs2; jj+=bs) { 710 baij_a[jj] += *value++; 711 } 712 } 713 } else { 714 for (ii=0; ii<bs; ii++,value+=stepval) { 715 for (jj=ii; jj<bs2; jj+=bs) { 716 baij_a[jj] = *value++; 717 } 718 } 719 } 720 } else { 721 value = v + j*(stepval+bs)*bs + i*bs; 722 if (addv == ADD_VALUES) { 723 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 724 for (jj=0; jj<bs; jj++) { 725 baij_a[jj] += *value++; 726 } 727 } 728 } else { 729 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 730 for (jj=0; jj<bs; jj++) { 731 baij_a[jj] = *value++; 732 } 733 } 734 } 735 } 736 } 737 } else { 738 if (!baij->donotstash) { 739 if (roworiented) { 740 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 741 } else { 742 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 743 } 744 } 745 } 746 } 747 #if defined(PETSC_USE_BOPT_g) 748 baij->ht_total_ct = total_ct; 749 baij->ht_insert_ct = insert_ct; 750 #endif 751 PetscFunctionReturn(0); 752 } 753 754 #undef __FUNC__ 755 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPISBAIJ" 756 int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 757 { 758 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 759 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 760 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 761 762 PetscFunctionBegin; 763 for (i=0; i<m; i++) { 764 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 765 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 766 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 767 row = idxm[i] - bsrstart; 768 for (j=0; j<n; j++) { 769 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 770 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 771 if (idxn[j] >= bscstart && idxn[j] < bscend){ 772 col = idxn[j] - bscstart; 773 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 774 } else { 775 if (!baij->colmap) { 776 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 777 } 778 #if defined (PETSC_USE_CTABLE) 779 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 780 data --; 781 #else 782 data = baij->colmap[idxn[j]/bs]-1; 783 #endif 784 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 785 else { 786 col = data + idxn[j]%bs; 787 ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 788 } 789 } 790 } 791 } else { 792 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 793 } 794 } 795 PetscFunctionReturn(0); 796 } 797 798 #undef __FUNC__ 799 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPISBAIJ" 800 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 801 { 802 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 803 /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */ 804 /* Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ*)baij->B->data; */ 805 int ierr; 806 PetscReal sum[2],*lnorm2; 807 808 PetscFunctionBegin; 809 if (baij->size == 1) { 810 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 811 } else { 812 if (type == NORM_FROBENIUS) { 813 lnorm2 = (double*)PetscMalloc(2*sizeof(double));CHKPTRQ(lnorm2); 814 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 815 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 816 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 817 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 818 /* 819 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 820 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]); 821 */ 822 ierr = MPI_Allreduce(lnorm2,&sum,2,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 823 /* 824 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]); 825 PetscSynchronizedFlush(PETSC_COMM_WORLD); */ 826 827 *norm = sqrt(sum[0] + 2*sum[1]); 828 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 829 } else { 830 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 831 } 832 } 833 PetscFunctionReturn(0); 834 } 835 836 /* 837 Creates the hash table, and sets the table 838 This table is created only once. 839 If new entried need to be added to the matrix 840 then the hash table has to be destroyed and 841 recreated. 842 */ 843 #undef __FUNC__ 844 #define __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPISBAIJ_Private" 845 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor) 846 { 847 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 848 Mat A = baij->A,B=baij->B; 849 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 850 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 851 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 852 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 853 int *HT,key; 854 MatScalar **HD; 855 PetscReal tmp; 856 #if defined(PETSC_USE_BOPT_g) 857 int ct=0,max=0; 858 #endif 859 860 PetscFunctionBegin; 861 baij->ht_size=(int)(factor*nz); 862 size = baij->ht_size; 863 864 if (baij->ht) { 865 PetscFunctionReturn(0); 866 } 867 868 /* Allocate Memory for Hash Table */ 869 baij->hd = (MatScalar**)PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1);CHKPTRQ(baij->hd); 870 baij->ht = (int*)(baij->hd + size); 871 HD = baij->hd; 872 HT = baij->ht; 873 874 875 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr); 876 877 878 /* Loop Over A */ 879 for (i=0; i<a->mbs; i++) { 880 for (j=ai[i]; j<ai[i+1]; j++) { 881 row = i+rstart; 882 col = aj[j]+cstart; 883 884 key = row*Nbs + col + 1; 885 h1 = HASH(size,key,tmp); 886 for (k=0; k<size; k++){ 887 if (HT[(h1+k)%size] == 0.0) { 888 HT[(h1+k)%size] = key; 889 HD[(h1+k)%size] = a->a + j*bs2; 890 break; 891 #if defined(PETSC_USE_BOPT_g) 892 } else { 893 ct++; 894 #endif 895 } 896 } 897 #if defined(PETSC_USE_BOPT_g) 898 if (k> max) max = k; 899 #endif 900 } 901 } 902 /* Loop Over B */ 903 for (i=0; i<b->mbs; i++) { 904 for (j=bi[i]; j<bi[i+1]; j++) { 905 row = i+rstart; 906 col = garray[bj[j]]; 907 key = row*Nbs + col + 1; 908 h1 = HASH(size,key,tmp); 909 for (k=0; k<size; k++){ 910 if (HT[(h1+k)%size] == 0.0) { 911 HT[(h1+k)%size] = key; 912 HD[(h1+k)%size] = b->a + j*bs2; 913 break; 914 #if defined(PETSC_USE_BOPT_g) 915 } else { 916 ct++; 917 #endif 918 } 919 } 920 #if defined(PETSC_USE_BOPT_g) 921 if (k> max) max = k; 922 #endif 923 } 924 } 925 926 /* Print Summary */ 927 #if defined(PETSC_USE_BOPT_g) 928 for (i=0,j=0; i<size; i++) { 929 if (HT[i]) {j++;} 930 } 931 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max); 932 #endif 933 PetscFunctionReturn(0); 934 } 935 936 #undef __FUNC__ 937 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPISBAIJ" 938 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 939 { 940 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 941 int ierr,nstash,reallocs; 942 InsertMode addv; 943 944 PetscFunctionBegin; 945 if (baij->donotstash) { 946 PetscFunctionReturn(0); 947 } 948 949 /* make sure all processors are either in INSERTMODE or ADDMODE */ 950 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 951 if (addv == (ADD_VALUES|INSERT_VALUES)) { 952 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 953 } 954 mat->insertmode = addv; /* in case this processor had no cache */ 955 956 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 957 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 958 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 959 PLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 960 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 961 PLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 962 PetscFunctionReturn(0); 963 } 964 965 #undef __FUNC__ 966 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPISBAIJ" 967 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 968 { 969 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 970 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 971 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 972 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 973 int *row,*col,other_disassembled; 974 PetscTruth r1,r2,r3; 975 MatScalar *val; 976 InsertMode addv = mat->insertmode; 977 int rank; 978 979 PetscFunctionBegin; 980 /* remove 2 line below later */ 981 ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); 982 983 if (!baij->donotstash) { 984 while (1) { 985 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 986 /* 987 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg); 988 PetscSynchronizedFlush(PETSC_COMM_WORLD); 989 */ 990 if (!flg) break; 991 992 for (i=0; i<n;) { 993 /* Now identify the consecutive vals belonging to the same row */ 994 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 995 if (j < n) ncols = j-i; 996 else ncols = n-i; 997 /* Now assemble all these values with a single function call */ 998 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 999 i = j; 1000 } 1001 } 1002 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1003 /* Now process the block-stash. Since the values are stashed column-oriented, 1004 set the roworiented flag to column oriented, and after MatSetValues() 1005 restore the original flags */ 1006 r1 = baij->roworiented; 1007 r2 = a->roworiented; 1008 r3 = b->roworiented; 1009 baij->roworiented = PETSC_FALSE; 1010 a->roworiented = PETSC_FALSE; 1011 b->roworiented = PETSC_FALSE; 1012 while (1) { 1013 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1014 if (!flg) break; 1015 1016 for (i=0; i<n;) { 1017 /* Now identify the consecutive vals belonging to the same row */ 1018 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1019 if (j < n) ncols = j-i; 1020 else ncols = n-i; 1021 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1022 i = j; 1023 } 1024 } 1025 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1026 baij->roworiented = r1; 1027 a->roworiented = r2; 1028 b->roworiented = r3; 1029 } 1030 1031 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1032 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1033 1034 /* determine if any processor has disassembled, if so we must 1035 also disassemble ourselfs, in order that we may reassemble. */ 1036 /* 1037 if nonzero structure of submatrix B cannot change then we know that 1038 no processor disassembled thus we can skip this stuff 1039 */ 1040 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1041 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1042 if (mat->was_assembled && !other_disassembled) { 1043 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 1044 } 1045 } 1046 1047 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1048 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); 1049 } 1050 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1051 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1052 1053 #if defined(PETSC_USE_BOPT_g) 1054 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1055 PLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct); 1056 baij->ht_total_ct = 0; 1057 baij->ht_insert_ct = 0; 1058 } 1059 #endif 1060 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1061 ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1062 mat->ops->setvalues = MatSetValues_MPISBAIJ_HT; 1063 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT; 1064 } 1065 1066 if (baij->rowvalues) { 1067 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1068 baij->rowvalues = 0; 1069 } 1070 PetscFunctionReturn(0); 1071 } 1072 1073 #undef __FUNC__ 1074 #define __FUNC__ /*<a name=""></a>*/"MatView_MPISBAIJ_ASCIIorDraworSocket" 1075 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 1076 { 1077 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1078 int ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank; 1079 PetscTruth isascii,isdraw; 1080 Viewer sviewer; 1081 1082 PetscFunctionBegin; 1083 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 1084 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 1085 if (isascii) { 1086 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1087 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 1088 MatInfo info; 1089 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1090 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1091 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 1092 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 1093 baij->bs,(int)info.memory);CHKERRQ(ierr); 1094 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1095 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1096 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1097 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1098 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 1099 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 1102 ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 1103 PetscFunctionReturn(0); 1104 } 1105 } 1106 1107 if (isdraw) { 1108 Draw draw; 1109 PetscTruth isnull; 1110 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1111 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1112 } 1113 1114 if (size == 1) { 1115 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1116 } else { 1117 /* assemble the entire matrix onto first processor. */ 1118 Mat A; 1119 Mat_SeqSBAIJ *Aloc; 1120 Mat_SeqBAIJ *Bloc; 1121 int M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1122 MatScalar *a; 1123 1124 if (!rank) { 1125 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1126 } else { 1127 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1128 } 1129 PLogObjectParent(mat,A); 1130 1131 /* copy over the A part */ 1132 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 1133 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1134 rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1135 1136 for (i=0; i<mbs; i++) { 1137 rvals[0] = bs*(baij->rstart + i); 1138 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1139 for (j=ai[i]; j<ai[i+1]; j++) { 1140 col = (baij->cstart+aj[j])*bs; 1141 for (k=0; k<bs; k++) { 1142 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1143 col++; a += bs; 1144 } 1145 } 1146 } 1147 /* copy over the B part */ 1148 Bloc = (Mat_SeqBAIJ*)baij->B->data; 1149 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 1150 for (i=0; i<mbs; i++) { 1151 rvals[0] = bs*(baij->rstart + i); 1152 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1153 for (j=ai[i]; j<ai[i+1]; j++) { 1154 col = baij->garray[aj[j]]*bs; 1155 for (k=0; k<bs; k++) { 1156 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1157 col++; a += bs; 1158 } 1159 } 1160 } 1161 ierr = PetscFree(rvals);CHKERRQ(ierr); 1162 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1163 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1164 /* 1165 Everyone has to call to draw the matrix since the graphics waits are 1166 synchronized across all processors that share the Draw object 1167 */ 1168 ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1169 if (!rank) { 1170 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1171 } 1172 ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1173 ierr = MatDestroy(A);CHKERRQ(ierr); 1174 } 1175 PetscFunctionReturn(0); 1176 } 1177 1178 #undef __FUNC__ 1179 #define __FUNC__ /*<a name=""></a>*/"MatView_MPISBAIJ" 1180 int MatView_MPISBAIJ(Mat mat,Viewer viewer) 1181 { 1182 int ierr; 1183 PetscTruth isascii,isdraw,issocket,isbinary; 1184 1185 PetscFunctionBegin; 1186 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 1187 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 1188 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 1189 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 1190 if (isascii || isdraw || issocket || isbinary) { 1191 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1192 } else { 1193 SETERRQ1(1,1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 1194 } 1195 PetscFunctionReturn(0); 1196 } 1197 1198 #undef __FUNC__ 1199 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPISBAIJ" 1200 int MatDestroy_MPISBAIJ(Mat mat) 1201 { 1202 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1203 int ierr; 1204 1205 PetscFunctionBegin; 1206 1207 if (mat->mapping) { 1208 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 1209 } 1210 if (mat->bmapping) { 1211 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 1212 } 1213 if (mat->rmap) { 1214 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 1215 } 1216 if (mat->cmap) { 1217 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 1218 } 1219 #if defined(PETSC_USE_LOG) 1220 PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N); 1221 #endif 1222 1223 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1224 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1225 1226 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1227 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1228 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1229 #if defined (PETSC_USE_CTABLE) 1230 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1231 #else 1232 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1233 #endif 1234 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1235 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1236 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1237 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1238 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1239 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1240 #if defined(PETSC_USE_MAT_SINGLE) 1241 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 1242 #endif 1243 ierr = PetscFree(baij);CHKERRQ(ierr); 1244 PLogObjectDestroy(mat); 1245 PetscHeaderDestroy(mat); 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNC__ 1250 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPISBAIJ" 1251 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1252 { 1253 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1254 int ierr,nt; 1255 1256 PetscFunctionBegin; 1257 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1258 if (nt != a->n) { 1259 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1260 } 1261 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1262 if (nt != a->m) { 1263 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1264 } 1265 1266 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1267 /* do diagonal part */ 1268 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1269 /* do supperdiagonal part */ 1270 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1271 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1272 /* do subdiagonal part */ 1273 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1274 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1275 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1276 1277 PetscFunctionReturn(0); 1278 } 1279 1280 #undef __FUNC__ 1281 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPISBAIJ" 1282 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1283 { 1284 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1285 int ierr; 1286 1287 PetscFunctionBegin; 1288 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1289 /* do diagonal part */ 1290 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1291 /* do supperdiagonal part */ 1292 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1293 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1294 1295 /* do subdiagonal part */ 1296 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1297 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1298 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1299 1300 PetscFunctionReturn(0); 1301 } 1302 1303 #undef __FUNC__ 1304 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPISBAIJ" 1305 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 1306 { 1307 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1308 int ierr; 1309 1310 PetscFunctionBegin; 1311 /* do nondiagonal part */ 1312 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1313 /* send it on its way */ 1314 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1315 /* do local part */ 1316 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1317 /* receive remote parts: note this assumes the values are not actually */ 1318 /* inserted in yy until the next line, which is true for my implementation*/ 1319 /* but is not perhaps always true. */ 1320 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1321 PetscFunctionReturn(0); 1322 } 1323 1324 #undef __FUNC__ 1325 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPISBAIJ" 1326 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1327 { 1328 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1329 int ierr; 1330 1331 PetscFunctionBegin; 1332 /* do nondiagonal part */ 1333 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1334 /* send it on its way */ 1335 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1336 /* do local part */ 1337 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1338 /* receive remote parts: note this assumes the values are not actually */ 1339 /* inserted in yy until the next line, which is true for my implementation*/ 1340 /* but is not perhaps always true. */ 1341 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1342 PetscFunctionReturn(0); 1343 } 1344 1345 /* 1346 This only works correctly for square matrices where the subblock A->A is the 1347 diagonal block 1348 */ 1349 #undef __FUNC__ 1350 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPISBAIJ" 1351 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1352 { 1353 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1354 int ierr; 1355 1356 PetscFunctionBegin; 1357 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); */ 1358 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1359 PetscFunctionReturn(0); 1360 } 1361 1362 #undef __FUNC__ 1363 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPISBAIJ" 1364 int MatScale_MPISBAIJ(Scalar *aa,Mat A) 1365 { 1366 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1367 int ierr; 1368 1369 PetscFunctionBegin; 1370 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1371 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1372 PetscFunctionReturn(0); 1373 } 1374 1375 #undef __FUNC__ 1376 #define __FUNC__ /*<a name=""></a>*/"MatGetSize_MPISBAIJ" 1377 int MatGetSize_MPISBAIJ(Mat matin,int *m,int *n) 1378 { 1379 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1380 1381 PetscFunctionBegin; 1382 if (m) *m = mat->M; 1383 if (n) *n = mat->N; 1384 PetscFunctionReturn(0); 1385 } 1386 1387 #undef __FUNC__ 1388 #define __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPISBAIJ" 1389 int MatGetLocalSize_MPISBAIJ(Mat matin,int *m,int *n) 1390 { 1391 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1392 1393 PetscFunctionBegin; 1394 *m = mat->m; *n = mat->n; 1395 PetscFunctionReturn(0); 1396 } 1397 1398 #undef __FUNC__ 1399 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPISBAIJ" 1400 int MatGetOwnershipRange_MPISBAIJ(Mat matin,int *m,int *n) 1401 { 1402 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1403 1404 PetscFunctionBegin; 1405 if (m) *m = mat->rstart*mat->bs; 1406 if (n) *n = mat->rend*mat->bs; 1407 PetscFunctionReturn(0); 1408 } 1409 1410 #undef __FUNC__ 1411 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPISBAIJ" 1412 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1413 { 1414 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1415 Scalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1416 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1417 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1418 int *cmap,*idx_p,cstart = mat->cstart; 1419 1420 PetscFunctionBegin; 1421 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1422 mat->getrowactive = PETSC_TRUE; 1423 1424 if (!mat->rowvalues && (idx || v)) { 1425 /* 1426 allocate enough space to hold information from the longest row. 1427 */ 1428 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1429 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1430 int max = 1,mbs = mat->mbs,tmp; 1431 for (i=0; i<mbs; i++) { 1432 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1433 if (max < tmp) { max = tmp; } 1434 } 1435 mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1436 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1437 } 1438 1439 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1440 lrow = row - brstart; /* local row index */ 1441 1442 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1443 if (!v) {pvA = 0; pvB = 0;} 1444 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1445 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1446 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1447 nztot = nzA + nzB; 1448 1449 cmap = mat->garray; 1450 if (v || idx) { 1451 if (nztot) { 1452 /* Sort by increasing column numbers, assuming A and B already sorted */ 1453 int imark = -1; 1454 if (v) { 1455 *v = v_p = mat->rowvalues; 1456 for (i=0; i<nzB; i++) { 1457 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1458 else break; 1459 } 1460 imark = i; 1461 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1462 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1463 } 1464 if (idx) { 1465 *idx = idx_p = mat->rowindices; 1466 if (imark > -1) { 1467 for (i=0; i<imark; i++) { 1468 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1469 } 1470 } else { 1471 for (i=0; i<nzB; i++) { 1472 if (cmap[cworkB[i]/bs] < cstart) 1473 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1474 else break; 1475 } 1476 imark = i; 1477 } 1478 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1479 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1480 } 1481 } else { 1482 if (idx) *idx = 0; 1483 if (v) *v = 0; 1484 } 1485 } 1486 *nz = nztot; 1487 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1488 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1489 PetscFunctionReturn(0); 1490 } 1491 1492 #undef __FUNC__ 1493 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPISBAIJ" 1494 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1495 { 1496 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1497 1498 PetscFunctionBegin; 1499 if (baij->getrowactive == PETSC_FALSE) { 1500 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1501 } 1502 baij->getrowactive = PETSC_FALSE; 1503 PetscFunctionReturn(0); 1504 } 1505 1506 #undef __FUNC__ 1507 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPISBAIJ" 1508 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs) 1509 { 1510 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1511 1512 PetscFunctionBegin; 1513 *bs = baij->bs; 1514 PetscFunctionReturn(0); 1515 } 1516 1517 #undef __FUNC__ 1518 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPISBAIJ" 1519 int MatZeroEntries_MPISBAIJ(Mat A) 1520 { 1521 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1522 int ierr; 1523 1524 PetscFunctionBegin; 1525 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1526 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1527 PetscFunctionReturn(0); 1528 } 1529 1530 #undef __FUNC__ 1531 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPISBAIJ" 1532 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1533 { 1534 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1535 Mat A = a->A,B = a->B; 1536 int ierr; 1537 PetscReal isend[5],irecv[5]; 1538 1539 PetscFunctionBegin; 1540 info->block_size = (double)a->bs; 1541 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1542 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1543 isend[3] = info->memory; isend[4] = info->mallocs; 1544 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1545 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1546 isend[3] += info->memory; isend[4] += info->mallocs; 1547 if (flag == MAT_LOCAL) { 1548 info->nz_used = isend[0]; 1549 info->nz_allocated = isend[1]; 1550 info->nz_unneeded = isend[2]; 1551 info->memory = isend[3]; 1552 info->mallocs = isend[4]; 1553 } else if (flag == MAT_GLOBAL_MAX) { 1554 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1555 info->nz_used = irecv[0]; 1556 info->nz_allocated = irecv[1]; 1557 info->nz_unneeded = irecv[2]; 1558 info->memory = irecv[3]; 1559 info->mallocs = irecv[4]; 1560 } else if (flag == MAT_GLOBAL_SUM) { 1561 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1562 info->nz_used = irecv[0]; 1563 info->nz_allocated = irecv[1]; 1564 info->nz_unneeded = irecv[2]; 1565 info->memory = irecv[3]; 1566 info->mallocs = irecv[4]; 1567 } else { 1568 SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 1569 } 1570 info->rows_global = (double)a->M; 1571 info->columns_global = (double)a->N; 1572 info->rows_local = (double)a->m; 1573 info->columns_local = (double)a->N; 1574 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1575 info->fill_ratio_needed = 0; 1576 info->factor_mallocs = 0; 1577 PetscFunctionReturn(0); 1578 } 1579 1580 #undef __FUNC__ 1581 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPISBAIJ" 1582 int MatSetOption_MPISBAIJ(Mat A,MatOption op) 1583 { 1584 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1585 int ierr; 1586 1587 PetscFunctionBegin; 1588 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1589 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1590 op == MAT_COLUMNS_UNSORTED || 1591 op == MAT_COLUMNS_SORTED || 1592 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1593 op == MAT_KEEP_ZEROED_ROWS || 1594 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1595 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1596 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1597 } else if (op == MAT_ROW_ORIENTED) { 1598 a->roworiented = PETSC_TRUE; 1599 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1600 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1601 } else if (op == MAT_ROWS_SORTED || 1602 op == MAT_ROWS_UNSORTED || 1603 op == MAT_SYMMETRIC || 1604 op == MAT_STRUCTURALLY_SYMMETRIC || 1605 op == MAT_YES_NEW_DIAGONALS || 1606 op == MAT_USE_HASH_TABLE) { 1607 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1608 } else if (op == MAT_COLUMN_ORIENTED) { 1609 a->roworiented = PETSC_FALSE; 1610 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1611 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1612 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1613 a->donotstash = PETSC_TRUE; 1614 } else if (op == MAT_NO_NEW_DIAGONALS) { 1615 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1616 } else if (op == MAT_USE_HASH_TABLE) { 1617 a->ht_flag = PETSC_TRUE; 1618 } else { 1619 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1620 } 1621 PetscFunctionReturn(0); 1622 } 1623 1624 #undef __FUNC__ 1625 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPISBAIJ(" 1626 int MatTranspose_MPISBAIJ(Mat A,Mat *matout) 1627 { 1628 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1629 Mat_SeqBAIJ *Aloc; 1630 Mat B; 1631 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1632 int bs=baij->bs,mbs=baij->mbs; 1633 MatScalar *a; 1634 1635 PetscFunctionBegin; 1636 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1637 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1638 1639 /* copy over the A part */ 1640 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1641 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1642 rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1643 1644 for (i=0; i<mbs; i++) { 1645 rvals[0] = bs*(baij->rstart + i); 1646 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1647 for (j=ai[i]; j<ai[i+1]; j++) { 1648 col = (baij->cstart+aj[j])*bs; 1649 for (k=0; k<bs; k++) { 1650 ierr = MatSetValues_MPISBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1651 col++; a += bs; 1652 } 1653 } 1654 } 1655 /* copy over the B part */ 1656 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1657 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1658 for (i=0; i<mbs; i++) { 1659 rvals[0] = bs*(baij->rstart + i); 1660 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1661 for (j=ai[i]; j<ai[i+1]; j++) { 1662 col = baij->garray[aj[j]]*bs; 1663 for (k=0; k<bs; k++) { 1664 ierr = MatSetValues_MPISBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1665 col++; a += bs; 1666 } 1667 } 1668 } 1669 ierr = PetscFree(rvals);CHKERRQ(ierr); 1670 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1671 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1672 1673 if (matout) { 1674 *matout = B; 1675 } else { 1676 PetscOps *Abops; 1677 MatOps Aops; 1678 1679 /* This isn't really an in-place transpose .... but free data structures from baij */ 1680 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1681 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1682 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1683 #if defined (PETSC_USE_CTABLE) 1684 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1685 #else 1686 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1687 #endif 1688 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1689 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1690 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1691 ierr = PetscFree(baij);CHKERRQ(ierr); 1692 1693 /* 1694 This is horrible, horrible code. We need to keep the 1695 A pointers for the bops and ops but copy everything 1696 else from C. 1697 */ 1698 Abops = A->bops; 1699 Aops = A->ops; 1700 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1701 A->bops = Abops; 1702 A->ops = Aops; 1703 1704 PetscHeaderDestroy(B); 1705 } 1706 PetscFunctionReturn(0); 1707 } 1708 1709 #undef __FUNC__ 1710 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPISBAIJ" 1711 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1712 { 1713 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1714 Mat a = baij->A,b = baij->B; 1715 int ierr,s1,s2,s3; 1716 1717 PetscFunctionBegin; 1718 if (ll != rr) { 1719 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"For symmetric format, left and right scaling vectors must be same\n"); 1720 } 1721 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1722 if (rr) { 1723 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1724 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1725 /* Overlap communication with computation. */ 1726 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1727 /*} if (ll) { */ 1728 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1729 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1730 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1731 /* } */ 1732 /* scale the diagonal block */ 1733 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1734 1735 /* if (rr) { */ 1736 /* Do a scatter end and then right scale the off-diagonal block */ 1737 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1738 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1739 } 1740 1741 PetscFunctionReturn(0); 1742 } 1743 1744 #undef __FUNC__ 1745 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPISBAIJ" 1746 int MatZeroRows_MPISBAIJ(Mat A,IS is,Scalar *diag) 1747 { 1748 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1749 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1750 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1751 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1752 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1753 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1754 MPI_Comm comm = A->comm; 1755 MPI_Request *send_waits,*recv_waits; 1756 MPI_Status recv_status,*send_status; 1757 IS istmp; 1758 1759 PetscFunctionBegin; 1760 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 1761 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1762 1763 /* first count number of contributors to each processor */ 1764 nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs); 1765 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1766 procs = nprocs + size; 1767 owner = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 1768 for (i=0; i<N; i++) { 1769 idx = rows[i]; 1770 found = 0; 1771 for (j=0; j<size; j++) { 1772 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1773 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1774 } 1775 } 1776 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1777 } 1778 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 1779 1780 /* inform other processors of number of messages and max length*/ 1781 work = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work); 1782 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 1783 nmax = work[rank]; 1784 nrecvs = work[size+rank]; 1785 ierr = PetscFree(work);CHKERRQ(ierr); 1786 1787 /* post receives: */ 1788 rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 1789 recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1790 for (i=0; i<nrecvs; i++) { 1791 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1792 } 1793 1794 /* do sends: 1795 1) starts[i] gives the starting index in svalues for stuff going to 1796 the ith processor 1797 */ 1798 svalues = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues); 1799 send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1800 starts = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts); 1801 starts[0] = 0; 1802 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1803 for (i=0; i<N; i++) { 1804 svalues[starts[owner[i]]++] = rows[i]; 1805 } 1806 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1807 1808 starts[0] = 0; 1809 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1810 count = 0; 1811 for (i=0; i<size; i++) { 1812 if (procs[i]) { 1813 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1814 } 1815 } 1816 ierr = PetscFree(starts);CHKERRQ(ierr); 1817 1818 base = owners[rank]*bs; 1819 1820 /* wait on receives */ 1821 lens = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens); 1822 source = lens + nrecvs; 1823 count = nrecvs; slen = 0; 1824 while (count) { 1825 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1826 /* unpack receives into our local space */ 1827 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1828 source[imdex] = recv_status.MPI_SOURCE; 1829 lens[imdex] = n; 1830 slen += n; 1831 count--; 1832 } 1833 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1834 1835 /* move the data into the send scatter */ 1836 lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows); 1837 count = 0; 1838 for (i=0; i<nrecvs; i++) { 1839 values = rvalues + i*nmax; 1840 for (j=0; j<lens[i]; j++) { 1841 lrows[count++] = values[j] - base; 1842 } 1843 } 1844 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1845 ierr = PetscFree(lens);CHKERRQ(ierr); 1846 ierr = PetscFree(owner);CHKERRQ(ierr); 1847 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1848 1849 /* actually zap the local rows */ 1850 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1851 PLogObjectParent(A,istmp); 1852 1853 /* 1854 Zero the required rows. If the "diagonal block" of the matrix 1855 is square and the user wishes to set the diagonal we use seperate 1856 code so that MatSetValues() is not called for each diagonal allocating 1857 new memory, thus calling lots of mallocs and slowing things down. 1858 1859 Contributed by: Mathew Knepley 1860 */ 1861 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1862 ierr = MatZeroRows_SeqSBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1863 if (diag && (l->A->M == l->A->N)) { 1864 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1865 } else if (diag) { 1866 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1867 if (((Mat_SeqSBAIJ*)l->A->data)->nonew) { 1868 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1869 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1870 } 1871 for (i=0; i<slen; i++) { 1872 row = lrows[i] + rstart_bs; 1873 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1874 } 1875 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1876 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1877 } else { 1878 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1879 } 1880 1881 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1882 ierr = PetscFree(lrows);CHKERRQ(ierr); 1883 1884 /* wait on sends */ 1885 if (nsends) { 1886 send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1887 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1888 ierr = PetscFree(send_status);CHKERRQ(ierr); 1889 } 1890 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1891 ierr = PetscFree(svalues);CHKERRQ(ierr); 1892 1893 PetscFunctionReturn(0); 1894 } 1895 1896 #undef __FUNC__ 1897 #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPISBAIJ" 1898 int MatPrintHelp_MPISBAIJ(Mat A) 1899 { 1900 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1901 MPI_Comm comm = A->comm; 1902 static int called = 0; 1903 int ierr; 1904 1905 PetscFunctionBegin; 1906 if (!a->rank) { 1907 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1908 } 1909 if (called) {PetscFunctionReturn(0);} else called = 1; 1910 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1911 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1912 PetscFunctionReturn(0); 1913 } 1914 1915 #undef __FUNC__ 1916 #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPISBAIJ" 1917 int MatSetUnfactored_MPISBAIJ(Mat A) 1918 { 1919 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1920 int ierr; 1921 1922 PetscFunctionBegin; 1923 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1924 PetscFunctionReturn(0); 1925 } 1926 1927 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1928 1929 #undef __FUNC__ 1930 #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPISBAIJ" 1931 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1932 { 1933 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1934 Mat a,b,c,d; 1935 PetscTruth flg; 1936 int ierr; 1937 1938 PetscFunctionBegin; 1939 if (B->type != MATMPISBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1940 a = matA->A; b = matA->B; 1941 c = matB->A; d = matB->B; 1942 1943 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1944 if (flg == PETSC_TRUE) { 1945 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1946 } 1947 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1948 PetscFunctionReturn(0); 1949 } 1950 1951 /* -------------------------------------------------------------------*/ 1952 static struct _MatOps MatOps_Values = { 1953 MatSetValues_MPISBAIJ, 1954 MatGetRow_MPISBAIJ, 1955 MatRestoreRow_MPISBAIJ, 1956 MatMult_MPISBAIJ, 1957 MatMultAdd_MPISBAIJ, 1958 MatMultTranspose_MPISBAIJ, 1959 MatMultTransposeAdd_MPISBAIJ, 1960 0, 1961 0, 1962 0, 1963 0, 1964 0, 1965 0, 1966 0, 1967 MatTranspose_MPISBAIJ, 1968 MatGetInfo_MPISBAIJ, 1969 MatEqual_MPISBAIJ, 1970 MatGetDiagonal_MPISBAIJ, 1971 MatDiagonalScale_MPISBAIJ, 1972 MatNorm_MPISBAIJ, 1973 MatAssemblyBegin_MPISBAIJ, 1974 MatAssemblyEnd_MPISBAIJ, 1975 0, 1976 MatSetOption_MPISBAIJ, 1977 MatZeroEntries_MPISBAIJ, 1978 MatZeroRows_MPISBAIJ, 1979 0, 1980 0, 1981 0, 1982 0, 1983 MatGetSize_MPISBAIJ, 1984 MatGetLocalSize_MPISBAIJ, 1985 MatGetOwnershipRange_MPISBAIJ, 1986 0, 1987 0, 1988 0, 1989 0, 1990 MatDuplicate_MPISBAIJ, 1991 0, 1992 0, 1993 0, 1994 0, 1995 0, 1996 MatGetSubMatrices_MPISBAIJ, 1997 MatIncreaseOverlap_MPISBAIJ, 1998 MatGetValues_MPISBAIJ, 1999 0, 2000 MatPrintHelp_MPISBAIJ, 2001 MatScale_MPISBAIJ, 2002 0, 2003 0, 2004 0, 2005 MatGetBlockSize_MPISBAIJ, 2006 0, 2007 0, 2008 0, 2009 0, 2010 0, 2011 0, 2012 MatSetUnfactored_MPISBAIJ, 2013 0, 2014 MatSetValuesBlocked_MPISBAIJ, 2015 0, 2016 0, 2017 0, 2018 MatGetMaps_Petsc}; 2019 2020 2021 EXTERN_C_BEGIN 2022 #undef __FUNC__ 2023 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPISBAIJ" 2024 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2025 { 2026 PetscFunctionBegin; 2027 *a = ((Mat_MPISBAIJ *)A->data)->A; 2028 *iscopy = PETSC_FALSE; 2029 PetscFunctionReturn(0); 2030 } 2031 EXTERN_C_END 2032 2033 #undef __FUNC__ 2034 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPISBAIJ" 2035 /*@C 2036 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2037 (block compressed row). For good matrix assembly performance 2038 the user should preallocate the matrix storage by setting the parameters 2039 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2040 performance can be increased by more than a factor of 50. 2041 2042 Collective on MPI_Comm 2043 2044 Input Parameters: 2045 + comm - MPI communicator 2046 . bs - size of blockk 2047 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2048 This value should be the same as the local size used in creating the 2049 y vector for the matrix-vector product y = Ax. 2050 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2051 This value should be the same as the local size used in creating the 2052 x vector for the matrix-vector product y = Ax. 2053 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2054 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2055 . d_nz - number of block nonzeros per block row in diagonal portion of local 2056 submatrix (same for all local rows) 2057 . d_nnz - array containing the number of block nonzeros in the various block rows 2058 of the in diagonal portion of the local (possibly different for each block 2059 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2060 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2061 submatrix (same for all local rows). 2062 - o_nnz - array containing the number of nonzeros in the various block rows of the 2063 off-diagonal portion of the local submatrix (possibly different for 2064 each block row) or PETSC_NULL. 2065 2066 Output Parameter: 2067 . A - the matrix 2068 2069 Options Database Keys: 2070 . -mat_no_unroll - uses code that does not unroll the loops in the 2071 block calculations (much slower) 2072 . -mat_block_size - size of the blocks to use 2073 . -mat_mpi - use the parallel matrix data structures even on one processor 2074 (defaults to using SeqBAIJ format on one processor) 2075 2076 Notes: 2077 The user MUST specify either the local or global matrix dimensions 2078 (possibly both). 2079 2080 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2081 than it must be used on all processors that share the object for that argument. 2082 2083 Storage Information: 2084 For a square global matrix we define each processor's diagonal portion 2085 to be its local rows and the corresponding columns (a square submatrix); 2086 each processor's off-diagonal portion encompasses the remainder of the 2087 local matrix (a rectangular submatrix). 2088 2089 The user can specify preallocated storage for the diagonal part of 2090 the local submatrix with either d_nz or d_nnz (not both). Set 2091 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2092 memory allocation. Likewise, specify preallocated storage for the 2093 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2094 2095 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2096 the figure below we depict these three local rows and all columns (0-11). 2097 2098 .vb 2099 0 1 2 3 4 5 6 7 8 9 10 11 2100 ------------------- 2101 row 3 | o o o d d d o o o o o o 2102 row 4 | o o o d d d o o o o o o 2103 row 5 | o o o d d d o o o o o o 2104 ------------------- 2105 .ve 2106 2107 Thus, any entries in the d locations are stored in the d (diagonal) 2108 submatrix, and any entries in the o locations are stored in the 2109 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2110 stored simply in the MATSEQBAIJ format for compressed row storage. 2111 2112 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2113 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2114 In general, for PDE problems in which most nonzeros are near the diagonal, 2115 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2116 or you will get TERRIBLE performance; see the users' manual chapter on 2117 matrices. 2118 2119 Level: intermediate 2120 2121 .keywords: matrix, block, aij, compressed row, sparse, parallel 2122 2123 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPISBAIJ() 2124 @*/ 2125 2126 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2127 { 2128 Mat B; 2129 Mat_MPISBAIJ *b; 2130 int ierr,i,sum[1],work[1],mbs,Mbs=PETSC_DECIDE,size; 2131 PetscTruth flag1,flag2,flg; 2132 2133 PetscFunctionBegin; 2134 if (M != N || m != n){ /* N and n are not used after this */ 2135 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"For symmetric format, set M=N and m=n"); 2136 } 2137 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2138 2139 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 2140 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -2: value %d",d_nz); 2141 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -2: value %d",o_nz); 2142 if (d_nnz) { 2143 for (i=0; i<m/bs; i++) { 2144 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 2145 } 2146 } 2147 if (o_nnz) { 2148 for (i=0; i<m/bs; i++) { 2149 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 2150 } 2151 } 2152 2153 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2154 ierr = OptionsHasName(PETSC_NULL,"-mat_mpisbaij",&flag1);CHKERRQ(ierr); 2155 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 2156 if (!flag1 && !flag2 && size == 1) { 2157 if (M == PETSC_DECIDE) M = m; 2158 ierr = MatCreateSeqSBAIJ(comm,bs,M,M,d_nz,d_nnz,A);CHKERRQ(ierr); 2159 PetscFunctionReturn(0); 2160 } 2161 2162 *A = 0; 2163 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPISBAIJ,"Mat",comm,MatDestroy,MatView); 2164 PLogObjectCreate(B); 2165 B->data = (void*)(b = PetscNew(Mat_MPISBAIJ));CHKPTRQ(b); 2166 ierr = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 2167 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2168 2169 B->ops->destroy = MatDestroy_MPISBAIJ; 2170 B->ops->view = MatView_MPISBAIJ; 2171 B->mapping = 0; 2172 B->factor = 0; 2173 B->assembled = PETSC_FALSE; 2174 2175 B->insertmode = NOT_SET_VALUES; 2176 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 2177 ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr); 2178 2179 if (m == PETSC_DECIDE && (d_nnz || o_nnz)) { 2180 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 2181 } 2182 if (M == PETSC_DECIDE && m == PETSC_DECIDE) { 2183 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 2184 } 2185 if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 2186 2187 if (M == PETSC_DECIDE) { 2188 work[0] = m; mbs = m/bs; 2189 ierr = MPI_Allreduce(work,sum,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2190 M = sum[0]; Mbs = M/bs; 2191 } else { /* M is specified */ 2192 Mbs = M/bs; 2193 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 2194 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 2195 m = mbs*bs; 2196 } 2197 2198 if (mbs*bs != m) { 2199 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows/cols must be divisible by blocksize"); 2200 } 2201 2202 b->m = m; B->m = m; 2203 b->n = m; B->n = m; 2204 b->N = M; B->N = M; 2205 b->M = M; 2206 B->M = M; 2207 b->bs = bs; 2208 b->bs2 = bs*bs; 2209 b->mbs = mbs; 2210 b->nbs = mbs; 2211 b->Mbs = Mbs; 2212 b->Nbs = Mbs; 2213 2214 /* the information in the maps duplicates the information computed below, eventually 2215 we should remove the duplicate information that is not contained in the maps */ 2216 ierr = MapCreateMPI(B->comm,m,M,&B->rmap);CHKERRQ(ierr); 2217 ierr = MapCreateMPI(B->comm,m,M,&B->cmap);CHKERRQ(ierr); 2218 2219 /* build local table of row and column ownerships */ 2220 b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 2221 PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 2222 b->cowners = b->rowners + b->size + 2; 2223 b->rowners_bs = b->cowners + b->size + 2; 2224 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2225 b->rowners[0] = 0; 2226 for (i=2; i<=b->size; i++) { 2227 b->rowners[i] += b->rowners[i-1]; 2228 } 2229 for (i=0; i<=b->size; i++) { 2230 b->rowners_bs[i] = b->rowners[i]*bs; 2231 } 2232 b->rstart = b->rowners[b->rank]; 2233 b->rend = b->rowners[b->rank+1]; 2234 b->rstart_bs = b->rstart * bs; 2235 b->rend_bs = b->rend * bs; 2236 2237 b->cstart = b->rstart; 2238 b->cend = b->rend; 2239 b->cstart_bs = b->cstart * bs; 2240 b->cend_bs = b->cend * bs; 2241 2242 2243 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2244 ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,m,m,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2245 PLogObjectParent(B,b->A); 2246 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2247 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,M,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2248 PLogObjectParent(B,b->B); 2249 2250 /* build cache for off array entries formed */ 2251 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2252 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2253 b->donotstash = PETSC_FALSE; 2254 b->colmap = PETSC_NULL; 2255 b->garray = PETSC_NULL; 2256 b->roworiented = PETSC_TRUE; 2257 2258 #if defined(PEYSC_USE_MAT_SINGLE) 2259 /* stuff for MatSetValues_XXX in single precision */ 2260 b->lensetvalues = 0; 2261 b->setvaluescopy = PETSC_NULL; 2262 #endif 2263 2264 /* stuff used in block assembly */ 2265 b->barray = 0; 2266 2267 /* stuff used for matrix vector multiply */ 2268 b->lvec = 0; 2269 b->Mvctx = 0; 2270 2271 /* stuff for MatGetRow() */ 2272 b->rowindices = 0; 2273 b->rowvalues = 0; 2274 b->getrowactive = PETSC_FALSE; 2275 2276 /* hash table stuff */ 2277 b->ht = 0; 2278 b->hd = 0; 2279 b->ht_size = 0; 2280 b->ht_flag = PETSC_FALSE; 2281 b->ht_fact = 0; 2282 b->ht_total_ct = 0; 2283 b->ht_insert_ct = 0; 2284 2285 *A = B; 2286 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2287 if (flg) { 2288 double fact = 1.39; 2289 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2290 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2291 if (fact <= 1.0) fact = 1.39; 2292 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2293 PLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact); 2294 } 2295 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2296 "MatStoreValues_MPISBAIJ", 2297 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2298 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2299 "MatRetrieveValues_MPISBAIJ", 2300 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2301 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2302 "MatGetDiagonalBlock_MPISBAIJ", 2303 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 2304 PetscFunctionReturn(0); 2305 } 2306 2307 2308 #undef __FUNC__ 2309 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPISBAIJ" 2310 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2311 { 2312 Mat mat; 2313 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2314 int ierr,len=0; 2315 PetscTruth flg; 2316 2317 PetscFunctionBegin; 2318 *newmat = 0; 2319 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPISBAIJ,"Mat",matin->comm,MatDestroy,MatView); 2320 PLogObjectCreate(mat); 2321 mat->data = (void*)(a = PetscNew(Mat_MPISBAIJ));CHKPTRQ(a); 2322 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2323 mat->ops->destroy = MatDestroy_MPISBAIJ; 2324 mat->ops->view = MatView_MPISBAIJ; 2325 mat->factor = matin->factor; 2326 mat->assembled = PETSC_TRUE; 2327 mat->insertmode = NOT_SET_VALUES; 2328 2329 a->m = mat->m = oldmat->m; 2330 a->n = mat->n = oldmat->n; 2331 a->M = mat->M = oldmat->M; 2332 a->N = mat->N = oldmat->N; 2333 2334 a->bs = oldmat->bs; 2335 a->bs2 = oldmat->bs2; 2336 a->mbs = oldmat->mbs; 2337 a->nbs = oldmat->nbs; 2338 a->Mbs = oldmat->Mbs; 2339 a->Nbs = oldmat->Nbs; 2340 2341 a->rstart = oldmat->rstart; 2342 a->rend = oldmat->rend; 2343 a->cstart = oldmat->cstart; 2344 a->cend = oldmat->cend; 2345 a->size = oldmat->size; 2346 a->rank = oldmat->rank; 2347 a->donotstash = oldmat->donotstash; 2348 a->roworiented = oldmat->roworiented; 2349 a->rowindices = 0; 2350 a->rowvalues = 0; 2351 a->getrowactive = PETSC_FALSE; 2352 a->barray = 0; 2353 a->rstart_bs = oldmat->rstart_bs; 2354 a->rend_bs = oldmat->rend_bs; 2355 a->cstart_bs = oldmat->cstart_bs; 2356 a->cend_bs = oldmat->cend_bs; 2357 2358 /* hash table stuff */ 2359 a->ht = 0; 2360 a->hd = 0; 2361 a->ht_size = 0; 2362 a->ht_flag = oldmat->ht_flag; 2363 a->ht_fact = oldmat->ht_fact; 2364 a->ht_total_ct = 0; 2365 a->ht_insert_ct = 0; 2366 2367 2368 a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 2369 PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 2370 a->cowners = a->rowners + a->size + 2; 2371 a->rowners_bs = a->cowners + a->size + 2; 2372 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2373 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2374 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2375 if (oldmat->colmap) { 2376 #if defined (PETSC_USE_CTABLE) 2377 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2378 #else 2379 a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2380 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2381 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2382 #endif 2383 } else a->colmap = 0; 2384 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2385 a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray); 2386 PLogObjectMemory(mat,len*sizeof(int)); 2387 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2388 } else a->garray = 0; 2389 2390 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2391 PLogObjectParent(mat,a->lvec); 2392 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2393 2394 PLogObjectParent(mat,a->Mvctx); 2395 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2396 PLogObjectParent(mat,a->A); 2397 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2398 PLogObjectParent(mat,a->B); 2399 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 2400 ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2401 if (flg) { 2402 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 2403 } 2404 *newmat = mat; 2405 PetscFunctionReturn(0); 2406 } 2407 2408 #include "petscsys.h" 2409 2410 #undef __FUNC__ 2411 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPISBAIJ" 2412 int MatLoad_MPISBAIJ(Viewer viewer,MatType type,Mat *newmat) 2413 { 2414 Mat A; 2415 int i,nz,ierr,j,rstart,rend,fd; 2416 Scalar *vals,*buf; 2417 MPI_Comm comm = ((PetscObject)viewer)->comm; 2418 MPI_Status status; 2419 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2420 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2421 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2422 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2423 int dcount,kmax,k,nzcount,tmp; 2424 2425 PetscFunctionBegin; 2426 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2427 2428 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2429 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2430 if (!rank) { 2431 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2432 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2433 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2434 if (header[3] < 0) { 2435 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPISBAIJ"); 2436 } 2437 } 2438 2439 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2440 M = header[1]; N = header[2]; 2441 2442 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2443 2444 /* 2445 This code adds extra rows to make sure the number of rows is 2446 divisible by the blocksize 2447 */ 2448 Mbs = M/bs; 2449 extra_rows = bs - M + bs*(Mbs); 2450 if (extra_rows == bs) extra_rows = 0; 2451 else Mbs++; 2452 if (extra_rows &&!rank) { 2453 PLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"); 2454 } 2455 2456 /* determine ownership of all rows */ 2457 mbs = Mbs/size + ((Mbs % size) > rank); 2458 m = mbs * bs; 2459 rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners); 2460 browners = rowners + size + 1; 2461 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2462 rowners[0] = 0; 2463 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2464 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2465 rstart = rowners[rank]; 2466 rend = rowners[rank+1]; 2467 2468 /* distribute row lengths to all processors */ 2469 locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens); 2470 if (!rank) { 2471 rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2472 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2473 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2474 sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts); 2475 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2476 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2477 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2478 } else { 2479 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2480 } 2481 2482 if (!rank) { 2483 /* calculate the number of nonzeros on each processor */ 2484 procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz); 2485 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2486 for (i=0; i<size; i++) { 2487 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2488 procsnz[i] += rowlengths[j]; 2489 } 2490 } 2491 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2492 2493 /* determine max buffer needed and allocate it */ 2494 maxnz = 0; 2495 for (i=0; i<size; i++) { 2496 maxnz = PetscMax(maxnz,procsnz[i]); 2497 } 2498 cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols); 2499 2500 /* read in my part of the matrix column indices */ 2501 nz = procsnz[0]; 2502 ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 2503 mycols = ibuf; 2504 if (size == 1) nz -= extra_rows; 2505 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2506 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2507 2508 /* read in every ones (except the last) and ship off */ 2509 for (i=1; i<size-1; i++) { 2510 nz = procsnz[i]; 2511 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2512 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2513 } 2514 /* read in the stuff for the last proc */ 2515 if (size != 1) { 2516 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2517 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2518 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2519 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2520 } 2521 ierr = PetscFree(cols);CHKERRQ(ierr); 2522 } else { 2523 /* determine buffer space needed for message */ 2524 nz = 0; 2525 for (i=0; i<m; i++) { 2526 nz += locrowlens[i]; 2527 } 2528 ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 2529 mycols = ibuf; 2530 /* receive message of column indices*/ 2531 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2532 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2533 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2534 } 2535 2536 /* loop over local rows, determining number of off diagonal entries */ 2537 dlens = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens); 2538 odlens = dlens + (rend-rstart); 2539 mask = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask); 2540 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2541 masked1 = mask + Mbs; 2542 masked2 = masked1 + Mbs; 2543 rowcount = 0; nzcount = 0; 2544 for (i=0; i<mbs; i++) { 2545 dcount = 0; 2546 odcount = 0; 2547 for (j=0; j<bs; j++) { 2548 kmax = locrowlens[rowcount]; 2549 for (k=0; k<kmax; k++) { 2550 tmp = mycols[nzcount++]/bs; 2551 if (!mask[tmp]) { 2552 mask[tmp] = 1; 2553 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2554 else masked1[dcount++] = tmp; 2555 } 2556 } 2557 rowcount++; 2558 } 2559 2560 dlens[i] = dcount; 2561 odlens[i] = odcount; 2562 2563 /* zero out the mask elements we set */ 2564 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2565 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2566 } 2567 2568 /* create our matrix */ 2569 ierr = MatCreateMPISBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 2570 A = *newmat; 2571 MatSetOption(A,MAT_COLUMNS_SORTED); 2572 2573 if (!rank) { 2574 buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf); 2575 /* read in my part of the matrix numerical values */ 2576 nz = procsnz[0]; 2577 vals = buf; 2578 mycols = ibuf; 2579 if (size == 1) nz -= extra_rows; 2580 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2581 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2582 2583 /* insert into matrix */ 2584 jj = rstart*bs; 2585 for (i=0; i<m; i++) { 2586 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2587 mycols += locrowlens[i]; 2588 vals += locrowlens[i]; 2589 jj++; 2590 } 2591 /* read in other processors (except the last one) and ship out */ 2592 for (i=1; i<size-1; i++) { 2593 nz = procsnz[i]; 2594 vals = buf; 2595 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2596 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2597 } 2598 /* the last proc */ 2599 if (size != 1){ 2600 nz = procsnz[i] - extra_rows; 2601 vals = buf; 2602 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2603 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2604 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2605 } 2606 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2607 } else { 2608 /* receive numeric values */ 2609 buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf); 2610 2611 /* receive message of values*/ 2612 vals = buf; 2613 mycols = ibuf; 2614 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2615 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2616 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2617 2618 /* insert into matrix */ 2619 jj = rstart*bs; 2620 for (i=0; i<m; i++) { 2621 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2622 mycols += locrowlens[i]; 2623 vals += locrowlens[i]; 2624 jj++; 2625 } 2626 } 2627 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2628 ierr = PetscFree(buf);CHKERRQ(ierr); 2629 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2630 ierr = PetscFree(rowners);CHKERRQ(ierr); 2631 ierr = PetscFree(dlens);CHKERRQ(ierr); 2632 ierr = PetscFree(mask);CHKERRQ(ierr); 2633 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2634 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2635 PetscFunctionReturn(0); 2636 } 2637 2638 #undef __FUNC__ 2639 #define __FUNC__ /*<a name=""></a>*/"MatMPISBAIJSetHashTableFactor" 2640 /*@ 2641 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2642 2643 Input Parameters: 2644 . mat - the matrix 2645 . fact - factor 2646 2647 Collective on Mat 2648 2649 Level: advanced 2650 2651 Notes: 2652 This can also be set by the command line option: -mat_use_hash_table fact 2653 2654 .keywords: matrix, hashtable, factor, HT 2655 2656 .seealso: MatSetOption() 2657 @*/ 2658 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2659 { 2660 Mat_MPIBAIJ *baij; 2661 2662 PetscFunctionBegin; 2663 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2664 if (mat->type != MATMPIBAIJ) { 2665 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2666 } 2667 baij = (Mat_MPIBAIJ*)mat->data; 2668 baij->ht_fact = fact; 2669 PetscFunctionReturn(0); 2670 } 2671