1 /*$Id: mpisbaij.c,v 1.1 2000/07/07 20:55:55 balay Exp balay $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "mpisbaij.h" 6 #include "src/mat/impls/sbaij/seq/sbaij.h" 7 8 extern int MatSetUpMultiply_MPISBAIJ(Mat); 9 extern int DisAssemble_MPISBAIJ(Mat); 10 extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS *,int); 11 extern int MatGetSubMatrices_MPISBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 12 extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *); 13 extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 14 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 15 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**); 16 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**); 17 extern int MatPrintHelp_SeqSBAIJ(Mat); 18 extern int MatZeroRows_SeqSBAIJ(Mat,IS,Scalar*); 19 20 /* UGLY, ugly, ugly 21 When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 22 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 23 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 24 converts the entries into single precision and then calls ..._MatScalar() to put them 25 into the single precision data structures. 26 */ 27 #if defined(PETSC_USE_MAT_SINGLE) 28 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 29 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 30 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 31 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 32 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 33 #else 34 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 35 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 36 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 37 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 38 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 39 #endif 40 41 EXTERN_C_BEGIN 42 #undef __FUNC__ 43 #define __FUNC__ /*<a name="MatStoreValues_MPIBAIJ"></a>*/"MatStoreValues_MPIBAIJ" 44 int MatStoreValues_MPISBAIJ(Mat mat) 45 { 46 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 47 int ierr; 48 49 PetscFunctionBegin; 50 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 51 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 52 PetscFunctionReturn(0); 53 } 54 EXTERN_C_END 55 56 EXTERN_C_BEGIN 57 #undef __FUNC__ 58 #define __FUNC__ /*<a name="MatRetrieveValues_MPIBAIJ"></a>*/"MatRetrieveValues_MPIBAIJ" 59 int MatRetrieveValues_MPISBAIJ(Mat mat) 60 { 61 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 62 int ierr; 63 64 PetscFunctionBegin; 65 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 66 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 67 PetscFunctionReturn(0); 68 } 69 EXTERN_C_END 70 71 /* 72 Local utility routine that creates a mapping from the global column 73 number to the local number in the off-diagonal part of the local 74 storage of the matrix. This is done in a non scable way since the 75 length of colmap equals the global matrix length. 76 */ 77 #undef __FUNC__ 78 #define __FUNC__ /*<a name="CreateColmap_MPISBAIJ_Private"></a>*/"CreateColmap_MPISBAIJ_Private" 79 static int CreateColmap_MPISBAIJ_Private(Mat mat) 80 { 81 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 82 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 83 int nbs = B->nbs,i,bs=B->bs,ierr; 84 85 PetscFunctionBegin; 86 #if defined (PETSC_USE_CTABLE) 87 ierr = PetscTableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr); 88 for (i=0; i<nbs; i++){ 89 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 90 } 91 #else 92 baij->colmap = (int*)PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 93 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 94 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 95 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 96 #endif 97 PetscFunctionReturn(0); 98 } 99 100 #define CHUNKSIZE 10 101 102 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 103 { \ 104 \ 105 brow = row/bs; \ 106 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 107 rmax = aimax[brow]; nrow = ailen[brow]; \ 108 bcol = col/bs; \ 109 ridx = row % bs; cidx = col % bs; \ 110 low = 0; high = nrow; \ 111 while (high-low > 3) { \ 112 t = (low+high)/2; \ 113 if (rp[t] > bcol) high = t; \ 114 else low = t; \ 115 } \ 116 for (_i=low; _i<high; _i++) { \ 117 if (rp[_i] > bcol) break; \ 118 if (rp[_i] == bcol) { \ 119 bap = ap + bs2*_i + bs*cidx + ridx; \ 120 if (addv == ADD_VALUES) *bap += value; \ 121 else *bap = value; \ 122 goto a_noinsert; \ 123 } \ 124 } \ 125 if (a->nonew == 1) goto a_noinsert; \ 126 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 127 if (nrow >= rmax) { \ 128 /* there is no extra room in row, therefore enlarge */ \ 129 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 130 MatScalar *new_a; \ 131 \ 132 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 133 \ 134 /* malloc new storage space */ \ 135 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 136 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 137 new_j = (int*)(new_a + bs2*new_nz); \ 138 new_i = new_j + new_nz; \ 139 \ 140 /* copy over old data into new slots */ \ 141 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 142 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 143 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 144 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 145 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 146 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 147 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 148 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 149 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 150 /* free up old matrix storage */ \ 151 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 152 if (!a->singlemalloc) { \ 153 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 154 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 155 } \ 156 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 157 a->singlemalloc = PETSC_TRUE; \ 158 \ 159 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 160 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 161 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 162 a->s_maxnz += bs2*CHUNKSIZE; \ 163 a->reallocs++; \ 164 a->s_nz++; \ 165 } \ 166 N = nrow++ - 1; \ 167 /* shift up all the later entries in this row */ \ 168 for (ii=N; ii>=_i; ii--) { \ 169 rp[ii+1] = rp[ii]; \ 170 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 171 } \ 172 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 173 rp[_i] = bcol; \ 174 ap[bs2*_i + bs*cidx + ridx] = value; \ 175 a_noinsert:; \ 176 ailen[brow] = nrow; \ 177 } 178 #ifndef MatSetValues_SeqBAIJ_B_Private 179 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 180 { \ 181 brow = row/bs; \ 182 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 183 rmax = bimax[brow]; nrow = bilen[brow]; \ 184 bcol = col/bs; \ 185 ridx = row % bs; cidx = col % bs; \ 186 low = 0; high = nrow; \ 187 while (high-low > 3) { \ 188 t = (low+high)/2; \ 189 if (rp[t] > bcol) high = t; \ 190 else low = t; \ 191 } \ 192 for (_i=low; _i<high; _i++) { \ 193 if (rp[_i] > bcol) break; \ 194 if (rp[_i] == bcol) { \ 195 bap = ap + bs2*_i + bs*cidx + ridx; \ 196 if (addv == ADD_VALUES) *bap += value; \ 197 else *bap = value; \ 198 goto b_noinsert; \ 199 } \ 200 } \ 201 if (b->nonew == 1) goto b_noinsert; \ 202 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 203 if (nrow >= rmax) { \ 204 /* there is no extra room in row, therefore enlarge */ \ 205 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 206 MatScalar *new_a; \ 207 \ 208 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 209 \ 210 /* malloc new storage space */ \ 211 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 212 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 213 new_j = (int*)(new_a + bs2*new_nz); \ 214 new_i = new_j + new_nz; \ 215 \ 216 /* copy over old data into new slots */ \ 217 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 218 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 219 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 220 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 221 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 222 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 223 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 224 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 225 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 226 /* free up old matrix storage */ \ 227 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 228 if (!b->singlemalloc) { \ 229 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 230 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 231 } \ 232 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 233 b->singlemalloc = PETSC_TRUE; \ 234 \ 235 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 236 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 237 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 238 b->maxnz += bs2*CHUNKSIZE; \ 239 b->reallocs++; \ 240 b->nz++; \ 241 } \ 242 N = nrow++ - 1; \ 243 /* shift up all the later entries in this row */ \ 244 for (ii=N; ii>=_i; ii--) { \ 245 rp[ii+1] = rp[ii]; \ 246 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 247 } \ 248 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 249 rp[_i] = bcol; \ 250 ap[bs2*_i + bs*cidx + ridx] = value; \ 251 b_noinsert:; \ 252 bilen[brow] = nrow; \ 253 } 254 #endif 255 256 #if defined(PETSC_USE_MAT_SINGLE) 257 #undef __FUNC__ 258 #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ"></a>*/"MatSetValues_MPISBAIJ" 259 int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 260 { 261 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 262 int ierr,i,N = m*n; 263 MatScalar *vsingle; 264 265 PetscFunctionBegin; 266 if (N > b->setvalueslen) { 267 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 268 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 269 b->setvalueslen = N; 270 } 271 vsingle = b->setvaluescopy; 272 273 for (i=0; i<N; i++) { 274 vsingle[i] = v[i]; 275 } 276 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 277 PetscFunctionReturn(0); 278 } 279 280 #undef __FUNC__ 281 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ"></a>*/"MatSetValuesBlocked_MPISBAIJ" 282 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 283 { 284 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 285 int ierr,i,N = m*n*b->bs2; 286 MatScalar *vsingle; 287 288 PetscFunctionBegin; 289 if (N > b->setvalueslen) { 290 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 291 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 292 b->setvalueslen = N; 293 } 294 vsingle = b->setvaluescopy; 295 for (i=0; i<N; i++) { 296 vsingle[i] = v[i]; 297 } 298 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 299 PetscFunctionReturn(0); 300 } 301 302 #undef __FUNC__ 303 #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ_HT"></a>*/"MatSetValues_MPISBAIJ_HT" 304 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 305 { 306 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 307 int ierr,i,N = m*n; 308 MatScalar *vsingle; 309 310 PetscFunctionBegin; 311 if (N > b->setvalueslen) { 312 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 313 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 314 b->setvalueslen = N; 315 } 316 vsingle = b->setvaluescopy; 317 for (i=0; i<N; i++) { 318 vsingle[i] = v[i]; 319 } 320 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 321 PetscFunctionReturn(0); 322 } 323 324 #undef __FUNC__ 325 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ_HT"></a>*/"MatSetValuesBlocked_MPISBAIJ_HT" 326 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 327 { 328 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 329 int ierr,i,N = m*n*b->bs2; 330 MatScalar *vsingle; 331 332 PetscFunctionBegin; 333 if (N > b->setvalueslen) { 334 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 335 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 336 b->setvalueslen = N; 337 } 338 vsingle = b->setvaluescopy; 339 for (i=0; i<N; i++) { 340 vsingle[i] = v[i]; 341 } 342 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 343 PetscFunctionReturn(0); 344 } 345 #endif 346 347 /* Only add/insert a(i,j) with i<=j (blocks). 348 Any a(i,j) with i>j input by user is ingored. 349 */ 350 #undef __FUNC__ 351 #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ" 352 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 353 { 354 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 355 MatScalar value; 356 int ierr,i,j,row,col; 357 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 358 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 359 int cend_orig=baij->cend_bs,bs=baij->bs; 360 361 /* Some Variables required in the macro */ 362 Mat A = baij->A; 363 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 364 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 365 MatScalar *aa=a->a; 366 367 Mat B = baij->B; 368 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 369 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 370 MatScalar *ba=b->a; 371 372 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 373 int low,high,t,ridx,cidx,bs2=a->bs2; 374 MatScalar *ap,*bap; 375 376 /* for stash */ 377 int n_loc, *in_loc; 378 MatScalar *v_loc; 379 380 PetscFunctionBegin; 381 382 if(!baij->donotstash){ 383 in_loc = (int*)PetscMalloc(n*sizeof(int)); CHKPTRQ(in_loc); 384 v_loc = (MatScalar*)PetscMalloc(n*sizeof(MatScalar)); CHKPTRQ(v_loc); 385 } 386 387 for (i=0; i<m; i++) { 388 if (im[i] < 0) continue; 389 #if defined(PETSC_USE_BOPT_g) 390 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 391 #endif 392 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 393 row = im[i] - rstart_orig; /* local row index */ 394 for (j=0; j<n; j++) { 395 if (im[i]/bs > in[j]/bs) continue; /* ignore low triangular blocks */ 396 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 397 col = in[j] - cstart_orig; /* local col index */ 398 brow = row/bs; bcol = col/bs; 399 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 400 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 401 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 402 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 403 } else if (in[j] < 0) continue; 404 #if defined(PETSC_USE_BOPT_g) 405 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 406 #endif 407 else { /* off-diag entry (B) */ 408 if (mat->was_assembled) { 409 if (!baij->colmap) { 410 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 411 } 412 #if defined (PETSC_USE_CTABLE) 413 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 414 col = col - 1 + in[j]%bs; 415 #else 416 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 417 #endif 418 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 419 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 420 col = in[j]; 421 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 422 B = baij->B; 423 b = (Mat_SeqBAIJ*)(B)->data; 424 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 425 ba=b->a; 426 } 427 } else col = in[j]; 428 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 429 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 430 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 431 } 432 } 433 } else { /* off processor entry */ 434 if (!baij->donotstash) { 435 n_loc = 0; 436 for (j=0; j<n; j++){ 437 if (im[i]/bs > in[j]/bs) continue; /* ignore low triangular blocks */ 438 in_loc[n_loc] = in[j]; 439 if (roworiented) { 440 v_loc[n_loc] = v[i*n+j]; 441 } else { 442 v_loc[n_loc] = v[j*m+i]; 443 } 444 n_loc++; 445 } 446 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 447 } 448 } 449 } 450 451 if(!baij->donotstash){ 452 ierr = PetscFree(in_loc);CHKERRQ(ierr); 453 ierr = PetscFree(v_loc);CHKERRQ(ierr); 454 } 455 PetscFunctionReturn(0); 456 } 457 458 #undef __FUNC__ 459 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ"></a>*/"MatSetValuesBlocked_MPISBAIJ" 460 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 461 { 462 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 463 MatScalar *value,*barray=baij->barray; 464 int ierr,i,j,ii,jj,row,col; 465 int roworiented = baij->roworiented,rstart=baij->rstart ; 466 int rend=baij->rend,cstart=baij->cstart,stepval; 467 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 468 469 PetscFunctionBegin; 470 if(!barray) { 471 baij->barray = barray = (MatScalar*)PetscMalloc(bs2*sizeof(MatScalar));CHKPTRQ(barray); 472 } 473 474 if (roworiented) { 475 stepval = (n-1)*bs; 476 } else { 477 stepval = (m-1)*bs; 478 } 479 for (i=0; i<m; i++) { 480 if (im[i] < 0) continue; 481 #if defined(PETSC_USE_BOPT_g) 482 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 483 #endif 484 if (im[i] >= rstart && im[i] < rend) { 485 row = im[i] - rstart; 486 for (j=0; j<n; j++) { 487 /* If NumCol = 1 then a copy is not required */ 488 if ((roworiented) && (n == 1)) { 489 barray = v + i*bs2; 490 } else if((!roworiented) && (m == 1)) { 491 barray = v + j*bs2; 492 } else { /* Here a copy is required */ 493 if (roworiented) { 494 value = v + i*(stepval+bs)*bs + j*bs; 495 } else { 496 value = v + j*(stepval+bs)*bs + i*bs; 497 } 498 for (ii=0; ii<bs; ii++,value+=stepval) { 499 for (jj=0; jj<bs; jj++) { 500 *barray++ = *value++; 501 } 502 } 503 barray -=bs2; 504 } 505 506 if (in[j] >= cstart && in[j] < cend){ 507 col = in[j] - cstart; 508 ierr = MatSetValuesBlocked_SeqSBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 509 } 510 else if (in[j] < 0) continue; 511 #if defined(PETSC_USE_BOPT_g) 512 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 513 #endif 514 else { 515 if (mat->was_assembled) { 516 if (!baij->colmap) { 517 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 518 } 519 520 #if defined(PETSC_USE_BOPT_g) 521 #if defined (PETSC_USE_CTABLE) 522 { int data; 523 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 524 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap"); 525 } 526 #else 527 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap"); 528 #endif 529 #endif 530 #if defined (PETSC_USE_CTABLE) 531 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 532 col = (col - 1)/bs; 533 #else 534 col = (baij->colmap[in[j]] - 1)/bs; 535 #endif 536 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 537 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 538 col = in[j]; 539 } 540 } 541 else col = in[j]; 542 ierr = MatSetValuesBlocked_SeqSBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 543 } 544 } 545 } else { 546 if (!baij->donotstash) { 547 if (roworiented) { 548 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 549 } else { 550 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 551 } 552 } 553 } 554 } 555 PetscFunctionReturn(0); 556 } 557 558 #define HASH_KEY 0.6180339887 559 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 560 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 561 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 562 #undef __FUNC__ 563 #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ_HT_MatScalar"></a>*/"MatSetValues_MPISBAIJ_HT_MatScalar" 564 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 565 { 566 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 567 int ierr,i,j,row,col; 568 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 569 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 570 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 571 PetscReal tmp; 572 MatScalar **HD = baij->hd,value; 573 #if defined(PETSC_USE_BOPT_g) 574 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 575 #endif 576 577 PetscFunctionBegin; 578 579 for (i=0; i<m; i++) { 580 #if defined(PETSC_USE_BOPT_g) 581 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 582 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 583 #endif 584 row = im[i]; 585 if (row >= rstart_orig && row < rend_orig) { 586 for (j=0; j<n; j++) { 587 col = in[j]; 588 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 589 /* Look up into the Hash Table */ 590 key = (row/bs)*Nbs+(col/bs)+1; 591 h1 = HASH(size,key,tmp); 592 593 594 idx = h1; 595 #if defined(PETSC_USE_BOPT_g) 596 insert_ct++; 597 total_ct++; 598 if (HT[idx] != key) { 599 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 600 if (idx == size) { 601 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 602 if (idx == h1) { 603 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 604 } 605 } 606 } 607 #else 608 if (HT[idx] != key) { 609 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 610 if (idx == size) { 611 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 612 if (idx == h1) { 613 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 614 } 615 } 616 } 617 #endif 618 /* A HASH table entry is found, so insert the values at the correct address */ 619 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 620 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 621 } 622 } else { 623 if (!baij->donotstash) { 624 if (roworiented) { 625 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 626 } else { 627 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 628 } 629 } 630 } 631 } 632 #if defined(PETSC_USE_BOPT_g) 633 baij->ht_total_ct = total_ct; 634 baij->ht_insert_ct = insert_ct; 635 #endif 636 PetscFunctionReturn(0); 637 } 638 639 #undef __FUNC__ 640 #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPISBAIJ_HT_MatScalar" 641 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 642 { 643 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 644 int ierr,i,j,ii,jj,row,col; 645 int roworiented = baij->roworiented,rstart=baij->rstart ; 646 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 647 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 648 PetscReal tmp; 649 MatScalar **HD = baij->hd,*baij_a; 650 MatScalar *v_t,*value; 651 #if defined(PETSC_USE_BOPT_g) 652 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 653 #endif 654 655 PetscFunctionBegin; 656 657 if (roworiented) { 658 stepval = (n-1)*bs; 659 } else { 660 stepval = (m-1)*bs; 661 } 662 for (i=0; i<m; i++) { 663 #if defined(PETSC_USE_BOPT_g) 664 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 665 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 666 #endif 667 row = im[i]; 668 v_t = v + i*bs2; 669 if (row >= rstart && row < rend) { 670 for (j=0; j<n; j++) { 671 col = in[j]; 672 673 /* Look up into the Hash Table */ 674 key = row*Nbs+col+1; 675 h1 = HASH(size,key,tmp); 676 677 idx = h1; 678 #if defined(PETSC_USE_BOPT_g) 679 total_ct++; 680 insert_ct++; 681 if (HT[idx] != key) { 682 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 683 if (idx == size) { 684 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 685 if (idx == h1) { 686 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 687 } 688 } 689 } 690 #else 691 if (HT[idx] != key) { 692 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 693 if (idx == size) { 694 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 695 if (idx == h1) { 696 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 697 } 698 } 699 } 700 #endif 701 baij_a = HD[idx]; 702 if (roworiented) { 703 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 704 /* value = v + (i*(stepval+bs)+j)*bs; */ 705 value = v_t; 706 v_t += bs; 707 if (addv == ADD_VALUES) { 708 for (ii=0; ii<bs; ii++,value+=stepval) { 709 for (jj=ii; jj<bs2; jj+=bs) { 710 baij_a[jj] += *value++; 711 } 712 } 713 } else { 714 for (ii=0; ii<bs; ii++,value+=stepval) { 715 for (jj=ii; jj<bs2; jj+=bs) { 716 baij_a[jj] = *value++; 717 } 718 } 719 } 720 } else { 721 value = v + j*(stepval+bs)*bs + i*bs; 722 if (addv == ADD_VALUES) { 723 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 724 for (jj=0; jj<bs; jj++) { 725 baij_a[jj] += *value++; 726 } 727 } 728 } else { 729 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 730 for (jj=0; jj<bs; jj++) { 731 baij_a[jj] = *value++; 732 } 733 } 734 } 735 } 736 } 737 } else { 738 if (!baij->donotstash) { 739 if (roworiented) { 740 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 741 } else { 742 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 743 } 744 } 745 } 746 } 747 #if defined(PETSC_USE_BOPT_g) 748 baij->ht_total_ct = total_ct; 749 baij->ht_insert_ct = insert_ct; 750 #endif 751 PetscFunctionReturn(0); 752 } 753 754 #undef __FUNC__ 755 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPISBAIJ" 756 int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 757 { 758 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 759 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 760 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 761 762 PetscFunctionBegin; 763 for (i=0; i<m; i++) { 764 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 765 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 766 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 767 row = idxm[i] - bsrstart; 768 for (j=0; j<n; j++) { 769 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 770 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 771 if (idxn[j] >= bscstart && idxn[j] < bscend){ 772 col = idxn[j] - bscstart; 773 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 774 } else { 775 if (!baij->colmap) { 776 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 777 } 778 #if defined (PETSC_USE_CTABLE) 779 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 780 data --; 781 #else 782 data = baij->colmap[idxn[j]/bs]-1; 783 #endif 784 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 785 else { 786 col = data + idxn[j]%bs; 787 ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 788 } 789 } 790 } 791 } else { 792 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 793 } 794 } 795 PetscFunctionReturn(0); 796 } 797 798 #undef __FUNC__ 799 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPISBAIJ" 800 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 801 { 802 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 803 Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; 804 Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ*)baij->B->data; 805 int ierr; 806 PetscReal sum[2],*lnorm2; 807 808 PetscFunctionBegin; 809 if (baij->size == 1) { 810 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 811 } else { 812 if (type == NORM_FROBENIUS) { 813 lnorm2 = (Scalar*)PetscMalloc(2*sizeof(Scalar));CHKPTRQ(lnorm2); 814 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 815 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 816 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 817 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 818 /* 819 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 820 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]); 821 */ 822 ierr = MPI_Allreduce(lnorm2,&sum,2,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 823 /* 824 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]); 825 PetscSynchronizedFlush(PETSC_COMM_WORLD); */ 826 827 *norm = sqrt(sum[0] + 2*sum[1]); 828 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 829 } else { 830 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 831 } 832 } 833 PetscFunctionReturn(0); 834 } 835 836 /* 837 Creates the hash table, and sets the table 838 This table is created only once. 839 If new entried need to be added to the matrix 840 then the hash table has to be destroyed and 841 recreated. 842 */ 843 #undef __FUNC__ 844 #define __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPISBAIJ_Private" 845 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor) 846 { 847 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 848 Mat A = baij->A,B=baij->B; 849 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 850 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 851 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 852 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 853 int *HT,key; 854 MatScalar **HD; 855 PetscReal tmp; 856 #if defined(PETSC_USE_BOPT_g) 857 int ct=0,max=0; 858 #endif 859 860 PetscFunctionBegin; 861 baij->ht_size=(int)(factor*nz); 862 size = baij->ht_size; 863 864 if (baij->ht) { 865 PetscFunctionReturn(0); 866 } 867 868 /* Allocate Memory for Hash Table */ 869 baij->hd = (MatScalar**)PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1);CHKPTRQ(baij->hd); 870 baij->ht = (int*)(baij->hd + size); 871 HD = baij->hd; 872 HT = baij->ht; 873 874 875 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr); 876 877 878 /* Loop Over A */ 879 for (i=0; i<a->mbs; i++) { 880 for (j=ai[i]; j<ai[i+1]; j++) { 881 row = i+rstart; 882 col = aj[j]+cstart; 883 884 key = row*Nbs + col + 1; 885 h1 = HASH(size,key,tmp); 886 for (k=0; k<size; k++){ 887 if (HT[(h1+k)%size] == 0.0) { 888 HT[(h1+k)%size] = key; 889 HD[(h1+k)%size] = a->a + j*bs2; 890 break; 891 #if defined(PETSC_USE_BOPT_g) 892 } else { 893 ct++; 894 #endif 895 } 896 } 897 #if defined(PETSC_USE_BOPT_g) 898 if (k> max) max = k; 899 #endif 900 } 901 } 902 /* Loop Over B */ 903 for (i=0; i<b->mbs; i++) { 904 for (j=bi[i]; j<bi[i+1]; j++) { 905 row = i+rstart; 906 col = garray[bj[j]]; 907 key = row*Nbs + col + 1; 908 h1 = HASH(size,key,tmp); 909 for (k=0; k<size; k++){ 910 if (HT[(h1+k)%size] == 0.0) { 911 HT[(h1+k)%size] = key; 912 HD[(h1+k)%size] = b->a + j*bs2; 913 break; 914 #if defined(PETSC_USE_BOPT_g) 915 } else { 916 ct++; 917 #endif 918 } 919 } 920 #if defined(PETSC_USE_BOPT_g) 921 if (k> max) max = k; 922 #endif 923 } 924 } 925 926 /* Print Summary */ 927 #if defined(PETSC_USE_BOPT_g) 928 for (i=0,j=0; i<size; i++) { 929 if (HT[i]) {j++;} 930 } 931 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max); 932 #endif 933 PetscFunctionReturn(0); 934 } 935 936 #undef __FUNC__ 937 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPISBAIJ" 938 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 939 { 940 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 941 int ierr,nstash,reallocs; 942 InsertMode addv; 943 944 PetscFunctionBegin; 945 if (baij->donotstash) { 946 PetscFunctionReturn(0); 947 } 948 949 /* make sure all processors are either in INSERTMODE or ADDMODE */ 950 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 951 if (addv == (ADD_VALUES|INSERT_VALUES)) { 952 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 953 } 954 mat->insertmode = addv; /* in case this processor had no cache */ 955 956 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 957 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 958 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 959 PLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 960 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 961 PLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 962 PetscFunctionReturn(0); 963 } 964 965 #undef __FUNC__ 966 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPISBAIJ" 967 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 968 { 969 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 970 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 971 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 972 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 973 int *row,*col,other_disassembled; 974 PetscTruth r1,r2,r3; 975 MatScalar *val; 976 InsertMode addv = mat->insertmode; 977 int rank; 978 979 PetscFunctionBegin; 980 /* remove 2 line below later */ 981 ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); 982 983 if (!baij->donotstash) { 984 while (1) { 985 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 986 /* 987 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg); 988 PetscSynchronizedFlush(PETSC_COMM_WORLD); 989 */ 990 if (!flg) break; 991 992 for (i=0; i<n;) { 993 /* Now identify the consecutive vals belonging to the same row */ 994 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 995 if (j < n) ncols = j-i; 996 else ncols = n-i; 997 /* Now assemble all these values with a single function call */ 998 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 999 i = j; 1000 } 1001 } 1002 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1003 /* Now process the block-stash. Since the values are stashed column-oriented, 1004 set the roworiented flag to column oriented, and after MatSetValues() 1005 restore the original flags */ 1006 r1 = baij->roworiented; 1007 r2 = a->roworiented; 1008 r3 = b->roworiented; 1009 baij->roworiented = PETSC_FALSE; 1010 a->roworiented = PETSC_FALSE; 1011 b->roworiented = PETSC_FALSE; 1012 while (1) { 1013 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1014 if (!flg) break; 1015 1016 for (i=0; i<n;) { 1017 /* Now identify the consecutive vals belonging to the same row */ 1018 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1019 if (j < n) ncols = j-i; 1020 else ncols = n-i; 1021 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1022 i = j; 1023 } 1024 } 1025 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1026 baij->roworiented = r1; 1027 a->roworiented = r2; 1028 b->roworiented = r3; 1029 } 1030 1031 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1032 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1033 1034 /* determine if any processor has disassembled, if so we must 1035 also disassemble ourselfs, in order that we may reassemble. */ 1036 /* 1037 if nonzero structure of submatrix B cannot change then we know that 1038 no processor disassembled thus we can skip this stuff 1039 */ 1040 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1041 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1042 if (mat->was_assembled && !other_disassembled) { 1043 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 1044 } 1045 } 1046 1047 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1048 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); 1049 } 1050 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1051 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1052 1053 #if defined(PETSC_USE_BOPT_g) 1054 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1055 PLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct); 1056 baij->ht_total_ct = 0; 1057 baij->ht_insert_ct = 0; 1058 } 1059 #endif 1060 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1061 ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1062 mat->ops->setvalues = MatSetValues_MPISBAIJ_HT; 1063 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT; 1064 } 1065 1066 if (baij->rowvalues) { 1067 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1068 baij->rowvalues = 0; 1069 } 1070 PetscFunctionReturn(0); 1071 } 1072 1073 #undef __FUNC__ 1074 #define __FUNC__ /*<a name=""></a>*/"MatView_MPISBAIJ_ASCIIorDraworSocket" 1075 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 1076 { 1077 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1078 int ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank; 1079 PetscTruth isascii,isdraw; 1080 1081 PetscFunctionBegin; 1082 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 1083 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 1084 if (isascii) { 1085 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1086 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 1087 MatInfo info; 1088 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1089 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1090 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 1091 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 1092 baij->bs,(int)info.memory);CHKERRQ(ierr); 1093 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1094 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1095 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1096 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1097 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 1098 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1099 PetscFunctionReturn(0); 1100 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 1101 ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 1102 PetscFunctionReturn(0); 1103 } 1104 } 1105 1106 if (isdraw) { 1107 Draw draw; 1108 PetscTruth isnull; 1109 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1110 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1111 } 1112 1113 if (size == 1) { 1114 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1115 } else { 1116 /* assemble the entire matrix onto first processor. */ 1117 Mat A; 1118 Mat_SeqBAIJ *Aloc; 1119 int M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1120 MatScalar *a; 1121 1122 if (!rank) { 1123 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,M,M,M,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1124 } else { 1125 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,M,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1126 } 1127 PLogObjectParent(mat,A); 1128 1129 /* copy over the A part */ 1130 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1131 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1132 rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1133 1134 for (i=0; i<mbs; i++) { 1135 rvals[0] = bs*(baij->rstart + i); 1136 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1137 for (j=ai[i]; j<ai[i+1]; j++) { 1138 col = (baij->cstart+aj[j])*bs; 1139 for (k=0; k<bs; k++) { 1140 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1141 col++; a += bs; 1142 } 1143 } 1144 } 1145 /* copy over the B part */ 1146 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1147 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1148 for (i=0; i<mbs; i++) { 1149 rvals[0] = bs*(baij->rstart + i); 1150 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1151 for (j=ai[i]; j<ai[i+1]; j++) { 1152 col = baij->garray[aj[j]]*bs; 1153 for (k=0; k<bs; k++) { 1154 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1155 col++; a += bs; 1156 } 1157 } 1158 } 1159 ierr = PetscFree(rvals);CHKERRQ(ierr); 1160 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1161 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1162 /* 1163 Everyone has to call to draw the matrix since the graphics waits are 1164 synchronized across all processors that share the Draw object 1165 */ 1166 if (!rank) { 1167 Viewer sviewer; 1168 ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1169 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1170 ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1171 } 1172 ierr = MatDestroy(A);CHKERRQ(ierr); 1173 } 1174 PetscFunctionReturn(0); 1175 } 1176 1177 #undef __FUNC__ 1178 #define __FUNC__ /*<a name=""></a>*/"MatView_MPISBAIJ" 1179 int MatView_MPISBAIJ(Mat mat,Viewer viewer) 1180 { 1181 int ierr; 1182 PetscTruth isascii,isdraw,issocket,isbinary; 1183 1184 PetscFunctionBegin; 1185 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 1186 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 1187 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 1188 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 1189 if (isascii || isdraw || issocket || isbinary) { 1190 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1191 } else { 1192 SETERRQ1(1,1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 1193 } 1194 PetscFunctionReturn(0); 1195 } 1196 1197 #undef __FUNC__ 1198 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPISBAIJ" 1199 int MatDestroy_MPISBAIJ(Mat mat) 1200 { 1201 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1202 int ierr; 1203 1204 PetscFunctionBegin; 1205 1206 if (mat->mapping) { 1207 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 1208 } 1209 if (mat->bmapping) { 1210 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 1211 } 1212 if (mat->rmap) { 1213 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 1214 } 1215 if (mat->cmap) { 1216 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 1217 } 1218 #if defined(PETSC_USE_LOG) 1219 PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N); 1220 #endif 1221 1222 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1223 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1224 1225 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1226 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1227 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1228 #if defined (PETSC_USE_CTABLE) 1229 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1230 #else 1231 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1232 #endif 1233 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1234 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1235 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1236 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1237 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1238 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1239 #if defined(PETSC_USE_MAT_SINGLE) 1240 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 1241 #endif 1242 ierr = PetscFree(baij);CHKERRQ(ierr); 1243 PLogObjectDestroy(mat); 1244 PetscHeaderDestroy(mat); 1245 PetscFunctionReturn(0); 1246 } 1247 1248 #undef __FUNC__ 1249 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPISBAIJ" 1250 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1251 { 1252 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1253 int ierr,nt; 1254 1255 PetscFunctionBegin; 1256 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1257 if (nt != a->n) { 1258 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1259 } 1260 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1261 if (nt != a->m) { 1262 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1263 } 1264 /* do subdiagonal part */ 1265 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1266 /* send it on its way */ 1267 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1268 /* do local part */ 1269 ierr = (*a->B->ops->mult)(a->B,xx,yy);CHKERRQ(ierr); 1270 ierr = (*a->A->ops->multadd)(a->A,xx,yy,yy);CHKERRQ(ierr); 1271 /* receive remote parts */ 1272 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1273 1274 PetscFunctionReturn(0); 1275 } 1276 1277 #undef __FUNC__ 1278 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPISBAIJ" 1279 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1280 { 1281 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1282 int ierr; 1283 1284 PetscFunctionBegin; 1285 /* do subdiagonal part */ 1286 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1287 /* send it on its way */ 1288 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1289 /* do local part */ 1290 ierr = (*a->B->ops->multadd)(a->B,xx,yy,zz);CHKERRQ(ierr); 1291 ierr = (*a->A->ops->multadd)(a->A,xx,zz,zz);CHKERRQ(ierr); 1292 /* receive remote parts */ 1293 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1294 1295 PetscFunctionReturn(0); 1296 } 1297 1298 #undef __FUNC__ 1299 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPISBAIJ" 1300 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 1301 { 1302 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1303 int ierr; 1304 1305 PetscFunctionBegin; 1306 /* do nondiagonal part */ 1307 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1308 /* send it on its way */ 1309 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1310 /* do local part */ 1311 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1312 /* receive remote parts: note this assumes the values are not actually */ 1313 /* inserted in yy until the next line, which is true for my implementation*/ 1314 /* but is not perhaps always true. */ 1315 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1316 PetscFunctionReturn(0); 1317 } 1318 1319 #undef __FUNC__ 1320 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPISBAIJ" 1321 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1322 { 1323 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1324 int ierr; 1325 1326 PetscFunctionBegin; 1327 /* do nondiagonal part */ 1328 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1329 /* send it on its way */ 1330 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1331 /* do local part */ 1332 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1333 /* receive remote parts: note this assumes the values are not actually */ 1334 /* inserted in yy until the next line, which is true for my implementation*/ 1335 /* but is not perhaps always true. */ 1336 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1337 PetscFunctionReturn(0); 1338 } 1339 1340 /* 1341 This only works correctly for square matrices where the subblock A->A is the 1342 diagonal block 1343 */ 1344 #undef __FUNC__ 1345 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPISBAIJ" 1346 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1347 { 1348 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1349 int ierr; 1350 1351 PetscFunctionBegin; 1352 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); */ 1353 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1354 PetscFunctionReturn(0); 1355 } 1356 1357 #undef __FUNC__ 1358 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPISBAIJ" 1359 int MatScale_MPISBAIJ(Scalar *aa,Mat A) 1360 { 1361 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1362 int ierr; 1363 1364 PetscFunctionBegin; 1365 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1366 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #undef __FUNC__ 1371 #define __FUNC__ /*<a name=""></a>*/"MatGetSize_MPISBAIJ" 1372 int MatGetSize_MPISBAIJ(Mat matin,int *m,int *n) 1373 { 1374 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1375 1376 PetscFunctionBegin; 1377 if (m) *m = mat->M; 1378 if (n) *n = mat->N; 1379 PetscFunctionReturn(0); 1380 } 1381 1382 #undef __FUNC__ 1383 #define __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPISBAIJ" 1384 int MatGetLocalSize_MPISBAIJ(Mat matin,int *m,int *n) 1385 { 1386 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1387 1388 PetscFunctionBegin; 1389 *m = mat->m; *n = mat->n; 1390 PetscFunctionReturn(0); 1391 } 1392 1393 #undef __FUNC__ 1394 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPISBAIJ" 1395 int MatGetOwnershipRange_MPISBAIJ(Mat matin,int *m,int *n) 1396 { 1397 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1398 1399 PetscFunctionBegin; 1400 if (m) *m = mat->rstart*mat->bs; 1401 if (n) *n = mat->rend*mat->bs; 1402 PetscFunctionReturn(0); 1403 } 1404 1405 #undef __FUNC__ 1406 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPISBAIJ" 1407 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1408 { 1409 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1410 Scalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1411 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1412 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1413 int *cmap,*idx_p,cstart = mat->cstart; 1414 1415 PetscFunctionBegin; 1416 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1417 mat->getrowactive = PETSC_TRUE; 1418 1419 if (!mat->rowvalues && (idx || v)) { 1420 /* 1421 allocate enough space to hold information from the longest row. 1422 */ 1423 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1424 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1425 int max = 1,mbs = mat->mbs,tmp; 1426 for (i=0; i<mbs; i++) { 1427 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1428 if (max < tmp) { max = tmp; } 1429 } 1430 mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1431 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1432 } 1433 1434 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1435 lrow = row - brstart; /* local row index */ 1436 1437 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1438 if (!v) {pvA = 0; pvB = 0;} 1439 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1440 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1441 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1442 nztot = nzA + nzB; 1443 1444 cmap = mat->garray; 1445 if (v || idx) { 1446 if (nztot) { 1447 /* Sort by increasing column numbers, assuming A and B already sorted */ 1448 int imark = -1; 1449 if (v) { 1450 *v = v_p = mat->rowvalues; 1451 for (i=0; i<nzB; i++) { 1452 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1453 else break; 1454 } 1455 imark = i; 1456 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1457 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1458 } 1459 if (idx) { 1460 *idx = idx_p = mat->rowindices; 1461 if (imark > -1) { 1462 for (i=0; i<imark; i++) { 1463 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1464 } 1465 } else { 1466 for (i=0; i<nzB; i++) { 1467 if (cmap[cworkB[i]/bs] < cstart) 1468 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1469 else break; 1470 } 1471 imark = i; 1472 } 1473 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1474 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1475 } 1476 } else { 1477 if (idx) *idx = 0; 1478 if (v) *v = 0; 1479 } 1480 } 1481 *nz = nztot; 1482 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1483 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1484 PetscFunctionReturn(0); 1485 } 1486 1487 #undef __FUNC__ 1488 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPISBAIJ" 1489 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1490 { 1491 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1492 1493 PetscFunctionBegin; 1494 if (baij->getrowactive == PETSC_FALSE) { 1495 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1496 } 1497 baij->getrowactive = PETSC_FALSE; 1498 PetscFunctionReturn(0); 1499 } 1500 1501 #undef __FUNC__ 1502 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPISBAIJ" 1503 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs) 1504 { 1505 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1506 1507 PetscFunctionBegin; 1508 *bs = baij->bs; 1509 PetscFunctionReturn(0); 1510 } 1511 1512 #undef __FUNC__ 1513 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPISBAIJ" 1514 int MatZeroEntries_MPISBAIJ(Mat A) 1515 { 1516 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1517 int ierr; 1518 1519 PetscFunctionBegin; 1520 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1521 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1522 PetscFunctionReturn(0); 1523 } 1524 1525 #undef __FUNC__ 1526 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPISBAIJ" 1527 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1528 { 1529 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1530 Mat A = a->A,B = a->B; 1531 int ierr; 1532 PetscReal isend[5],irecv[5]; 1533 1534 PetscFunctionBegin; 1535 info->block_size = (double)a->bs; 1536 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1537 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1538 isend[3] = info->memory; isend[4] = info->mallocs; 1539 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1540 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1541 isend[3] += info->memory; isend[4] += info->mallocs; 1542 if (flag == MAT_LOCAL) { 1543 info->nz_used = isend[0]; 1544 info->nz_allocated = isend[1]; 1545 info->nz_unneeded = isend[2]; 1546 info->memory = isend[3]; 1547 info->mallocs = isend[4]; 1548 } else if (flag == MAT_GLOBAL_MAX) { 1549 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1550 info->nz_used = irecv[0]; 1551 info->nz_allocated = irecv[1]; 1552 info->nz_unneeded = irecv[2]; 1553 info->memory = irecv[3]; 1554 info->mallocs = irecv[4]; 1555 } else if (flag == MAT_GLOBAL_SUM) { 1556 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1557 info->nz_used = irecv[0]; 1558 info->nz_allocated = irecv[1]; 1559 info->nz_unneeded = irecv[2]; 1560 info->memory = irecv[3]; 1561 info->mallocs = irecv[4]; 1562 } else { 1563 SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 1564 } 1565 info->rows_global = (double)a->M; 1566 info->columns_global = (double)a->N; 1567 info->rows_local = (double)a->m; 1568 info->columns_local = (double)a->N; 1569 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1570 info->fill_ratio_needed = 0; 1571 info->factor_mallocs = 0; 1572 PetscFunctionReturn(0); 1573 } 1574 1575 #undef __FUNC__ 1576 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPISBAIJ" 1577 int MatSetOption_MPISBAIJ(Mat A,MatOption op) 1578 { 1579 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1580 int ierr; 1581 1582 PetscFunctionBegin; 1583 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1584 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1585 op == MAT_COLUMNS_UNSORTED || 1586 op == MAT_COLUMNS_SORTED || 1587 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1588 op == MAT_KEEP_ZEROED_ROWS || 1589 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1590 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1591 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1592 } else if (op == MAT_ROW_ORIENTED) { 1593 a->roworiented = PETSC_TRUE; 1594 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1595 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1596 } else if (op == MAT_ROWS_SORTED || 1597 op == MAT_ROWS_UNSORTED || 1598 op == MAT_SYMMETRIC || 1599 op == MAT_STRUCTURALLY_SYMMETRIC || 1600 op == MAT_YES_NEW_DIAGONALS || 1601 op == MAT_USE_HASH_TABLE) { 1602 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1603 } else if (op == MAT_COLUMN_ORIENTED) { 1604 a->roworiented = PETSC_FALSE; 1605 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1606 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1607 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1608 a->donotstash = PETSC_TRUE; 1609 } else if (op == MAT_NO_NEW_DIAGONALS) { 1610 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1611 } else if (op == MAT_USE_HASH_TABLE) { 1612 a->ht_flag = PETSC_TRUE; 1613 } else { 1614 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1615 } 1616 PetscFunctionReturn(0); 1617 } 1618 1619 #undef __FUNC__ 1620 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPISBAIJ(" 1621 int MatTranspose_MPISBAIJ(Mat A,Mat *matout) 1622 { 1623 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1624 Mat_SeqBAIJ *Aloc; 1625 Mat B; 1626 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1627 int bs=baij->bs,mbs=baij->mbs; 1628 MatScalar *a; 1629 1630 PetscFunctionBegin; 1631 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1632 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1633 1634 /* copy over the A part */ 1635 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1636 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1637 rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1638 1639 for (i=0; i<mbs; i++) { 1640 rvals[0] = bs*(baij->rstart + i); 1641 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1642 for (j=ai[i]; j<ai[i+1]; j++) { 1643 col = (baij->cstart+aj[j])*bs; 1644 for (k=0; k<bs; k++) { 1645 ierr = MatSetValues_MPISBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1646 col++; a += bs; 1647 } 1648 } 1649 } 1650 /* copy over the B part */ 1651 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1652 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1653 for (i=0; i<mbs; i++) { 1654 rvals[0] = bs*(baij->rstart + i); 1655 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1656 for (j=ai[i]; j<ai[i+1]; j++) { 1657 col = baij->garray[aj[j]]*bs; 1658 for (k=0; k<bs; k++) { 1659 ierr = MatSetValues_MPISBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1660 col++; a += bs; 1661 } 1662 } 1663 } 1664 ierr = PetscFree(rvals);CHKERRQ(ierr); 1665 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1666 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1667 1668 if (matout) { 1669 *matout = B; 1670 } else { 1671 PetscOps *Abops; 1672 MatOps Aops; 1673 1674 /* This isn't really an in-place transpose .... but free data structures from baij */ 1675 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1676 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1677 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1678 #if defined (PETSC_USE_CTABLE) 1679 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1680 #else 1681 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1682 #endif 1683 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1684 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1685 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1686 ierr = PetscFree(baij);CHKERRQ(ierr); 1687 1688 /* 1689 This is horrible, horrible code. We need to keep the 1690 A pointers for the bops and ops but copy everything 1691 else from C. 1692 */ 1693 Abops = A->bops; 1694 Aops = A->ops; 1695 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1696 A->bops = Abops; 1697 A->ops = Aops; 1698 1699 PetscHeaderDestroy(B); 1700 } 1701 PetscFunctionReturn(0); 1702 } 1703 1704 #undef __FUNC__ 1705 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPISBAIJ" 1706 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1707 { 1708 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1709 Mat a = baij->A,b = baij->B; 1710 int ierr,s1,s2,s3; 1711 1712 PetscFunctionBegin; 1713 if (ll != rr) { 1714 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"For symmetric format, left and right scaling vectors must be same\n"); 1715 } 1716 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1717 if (rr) { 1718 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1719 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1720 /* Overlap communication with computation. */ 1721 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1722 /*} if (ll) { */ 1723 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1724 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1725 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1726 /* } */ 1727 /* scale the diagonal block */ 1728 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1729 1730 /* if (rr) { */ 1731 /* Do a scatter end and then right scale the off-diagonal block */ 1732 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1733 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1734 } 1735 1736 PetscFunctionReturn(0); 1737 } 1738 1739 #undef __FUNC__ 1740 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPISBAIJ" 1741 int MatZeroRows_MPISBAIJ(Mat A,IS is,Scalar *diag) 1742 { 1743 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1744 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1745 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1746 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1747 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1748 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1749 MPI_Comm comm = A->comm; 1750 MPI_Request *send_waits,*recv_waits; 1751 MPI_Status recv_status,*send_status; 1752 IS istmp; 1753 1754 PetscFunctionBegin; 1755 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 1756 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1757 1758 /* first count number of contributors to each processor */ 1759 nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs); 1760 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1761 procs = nprocs + size; 1762 owner = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 1763 for (i=0; i<N; i++) { 1764 idx = rows[i]; 1765 found = 0; 1766 for (j=0; j<size; j++) { 1767 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1768 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1769 } 1770 } 1771 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1772 } 1773 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 1774 1775 /* inform other processors of number of messages and max length*/ 1776 work = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work); 1777 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 1778 nmax = work[rank]; 1779 nrecvs = work[size+rank]; 1780 ierr = PetscFree(work);CHKERRQ(ierr); 1781 1782 /* post receives: */ 1783 rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 1784 recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1785 for (i=0; i<nrecvs; i++) { 1786 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1787 } 1788 1789 /* do sends: 1790 1) starts[i] gives the starting index in svalues for stuff going to 1791 the ith processor 1792 */ 1793 svalues = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues); 1794 send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1795 starts = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts); 1796 starts[0] = 0; 1797 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1798 for (i=0; i<N; i++) { 1799 svalues[starts[owner[i]]++] = rows[i]; 1800 } 1801 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1802 1803 starts[0] = 0; 1804 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1805 count = 0; 1806 for (i=0; i<size; i++) { 1807 if (procs[i]) { 1808 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1809 } 1810 } 1811 ierr = PetscFree(starts);CHKERRQ(ierr); 1812 1813 base = owners[rank]*bs; 1814 1815 /* wait on receives */ 1816 lens = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens); 1817 source = lens + nrecvs; 1818 count = nrecvs; slen = 0; 1819 while (count) { 1820 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1821 /* unpack receives into our local space */ 1822 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1823 source[imdex] = recv_status.MPI_SOURCE; 1824 lens[imdex] = n; 1825 slen += n; 1826 count--; 1827 } 1828 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1829 1830 /* move the data into the send scatter */ 1831 lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows); 1832 count = 0; 1833 for (i=0; i<nrecvs; i++) { 1834 values = rvalues + i*nmax; 1835 for (j=0; j<lens[i]; j++) { 1836 lrows[count++] = values[j] - base; 1837 } 1838 } 1839 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1840 ierr = PetscFree(lens);CHKERRQ(ierr); 1841 ierr = PetscFree(owner);CHKERRQ(ierr); 1842 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1843 1844 /* actually zap the local rows */ 1845 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1846 PLogObjectParent(A,istmp); 1847 1848 /* 1849 Zero the required rows. If the "diagonal block" of the matrix 1850 is square and the user wishes to set the diagonal we use seperate 1851 code so that MatSetValues() is not called for each diagonal allocating 1852 new memory, thus calling lots of mallocs and slowing things down. 1853 1854 Contributed by: Mathew Knepley 1855 */ 1856 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1857 ierr = MatZeroRows_SeqSBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1858 if (diag && (l->A->M == l->A->N)) { 1859 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1860 } else if (diag) { 1861 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1862 if (((Mat_SeqSBAIJ*)l->A->data)->nonew) { 1863 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1864 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1865 } 1866 for (i=0; i<slen; i++) { 1867 row = lrows[i] + rstart_bs; 1868 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1869 } 1870 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1871 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1872 } else { 1873 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1874 } 1875 1876 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1877 ierr = PetscFree(lrows);CHKERRQ(ierr); 1878 1879 /* wait on sends */ 1880 if (nsends) { 1881 send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1882 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1883 ierr = PetscFree(send_status);CHKERRQ(ierr); 1884 } 1885 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1886 ierr = PetscFree(svalues);CHKERRQ(ierr); 1887 1888 PetscFunctionReturn(0); 1889 } 1890 1891 #undef __FUNC__ 1892 #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPISBAIJ" 1893 int MatPrintHelp_MPISBAIJ(Mat A) 1894 { 1895 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1896 MPI_Comm comm = A->comm; 1897 static int called = 0; 1898 int ierr; 1899 1900 PetscFunctionBegin; 1901 if (!a->rank) { 1902 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1903 } 1904 if (called) {PetscFunctionReturn(0);} else called = 1; 1905 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1906 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1907 PetscFunctionReturn(0); 1908 } 1909 1910 #undef __FUNC__ 1911 #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPISBAIJ" 1912 int MatSetUnfactored_MPISBAIJ(Mat A) 1913 { 1914 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1915 int ierr; 1916 1917 PetscFunctionBegin; 1918 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1919 PetscFunctionReturn(0); 1920 } 1921 1922 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1923 1924 #undef __FUNC__ 1925 #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPISBAIJ" 1926 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1927 { 1928 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1929 Mat a,b,c,d; 1930 PetscTruth flg; 1931 int ierr; 1932 1933 PetscFunctionBegin; 1934 if (B->type != MATMPISBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1935 a = matA->A; b = matA->B; 1936 c = matB->A; d = matB->B; 1937 1938 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1939 if (flg == PETSC_TRUE) { 1940 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1941 } 1942 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1943 PetscFunctionReturn(0); 1944 } 1945 1946 /* -------------------------------------------------------------------*/ 1947 static struct _MatOps MatOps_Values = { 1948 MatSetValues_MPISBAIJ, 1949 MatGetRow_MPISBAIJ, 1950 MatRestoreRow_MPISBAIJ, 1951 MatMult_MPISBAIJ, 1952 MatMultAdd_MPISBAIJ, 1953 MatMultTranspose_MPISBAIJ, 1954 MatMultTransposeAdd_MPISBAIJ, 1955 0, 1956 0, 1957 0, 1958 0, 1959 0, 1960 0, 1961 0, 1962 MatTranspose_MPISBAIJ, 1963 MatGetInfo_MPISBAIJ, 1964 MatEqual_MPISBAIJ, 1965 MatGetDiagonal_MPISBAIJ, 1966 MatDiagonalScale_MPISBAIJ, 1967 MatNorm_MPISBAIJ, 1968 MatAssemblyBegin_MPISBAIJ, 1969 MatAssemblyEnd_MPISBAIJ, 1970 0, 1971 MatSetOption_MPISBAIJ, 1972 MatZeroEntries_MPISBAIJ, 1973 MatZeroRows_MPISBAIJ, 1974 0, 1975 0, 1976 0, 1977 0, 1978 MatGetSize_MPISBAIJ, 1979 MatGetLocalSize_MPISBAIJ, 1980 MatGetOwnershipRange_MPISBAIJ, 1981 0, 1982 0, 1983 0, 1984 0, 1985 MatDuplicate_MPISBAIJ, 1986 0, 1987 0, 1988 0, 1989 0, 1990 0, 1991 MatGetSubMatrices_MPISBAIJ, 1992 MatIncreaseOverlap_MPISBAIJ, 1993 MatGetValues_MPISBAIJ, 1994 0, 1995 MatPrintHelp_MPISBAIJ, 1996 MatScale_MPISBAIJ, 1997 0, 1998 0, 1999 0, 2000 MatGetBlockSize_MPISBAIJ, 2001 0, 2002 0, 2003 0, 2004 0, 2005 0, 2006 0, 2007 MatSetUnfactored_MPISBAIJ, 2008 0, 2009 MatSetValuesBlocked_MPISBAIJ, 2010 0, 2011 0, 2012 0, 2013 MatGetMaps_Petsc}; 2014 2015 2016 EXTERN_C_BEGIN 2017 #undef __FUNC__ 2018 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPISBAIJ" 2019 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2020 { 2021 PetscFunctionBegin; 2022 *a = ((Mat_MPISBAIJ *)A->data)->A; 2023 *iscopy = PETSC_FALSE; 2024 PetscFunctionReturn(0); 2025 } 2026 EXTERN_C_END 2027 2028 #undef __FUNC__ 2029 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPISBAIJ" 2030 /*@C 2031 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2032 (block compressed row). For good matrix assembly performance 2033 the user should preallocate the matrix storage by setting the parameters 2034 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2035 performance can be increased by more than a factor of 50. 2036 2037 Collective on MPI_Comm 2038 2039 Input Parameters: 2040 + comm - MPI communicator 2041 . bs - size of blockk 2042 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2043 This value should be the same as the local size used in creating the 2044 y vector for the matrix-vector product y = Ax. 2045 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2046 This value should be the same as the local size used in creating the 2047 x vector for the matrix-vector product y = Ax. 2048 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2049 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2050 . d_nz - number of block nonzeros per block row in diagonal portion of local 2051 submatrix (same for all local rows) 2052 . d_nnz - array containing the number of block nonzeros in the various block rows 2053 of the in diagonal portion of the local (possibly different for each block 2054 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2055 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2056 submatrix (same for all local rows). 2057 - o_nnz - array containing the number of nonzeros in the various block rows of the 2058 off-diagonal portion of the local submatrix (possibly different for 2059 each block row) or PETSC_NULL. 2060 2061 Output Parameter: 2062 . A - the matrix 2063 2064 Options Database Keys: 2065 . -mat_no_unroll - uses code that does not unroll the loops in the 2066 block calculations (much slower) 2067 . -mat_block_size - size of the blocks to use 2068 . -mat_mpi - use the parallel matrix data structures even on one processor 2069 (defaults to using SeqBAIJ format on one processor) 2070 2071 Notes: 2072 The user MUST specify either the local or global matrix dimensions 2073 (possibly both). 2074 2075 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2076 than it must be used on all processors that share the object for that argument. 2077 2078 Storage Information: 2079 For a square global matrix we define each processor's diagonal portion 2080 to be its local rows and the corresponding columns (a square submatrix); 2081 each processor's off-diagonal portion encompasses the remainder of the 2082 local matrix (a rectangular submatrix). 2083 2084 The user can specify preallocated storage for the diagonal part of 2085 the local submatrix with either d_nz or d_nnz (not both). Set 2086 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2087 memory allocation. Likewise, specify preallocated storage for the 2088 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2089 2090 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2091 the figure below we depict these three local rows and all columns (0-11). 2092 2093 .vb 2094 0 1 2 3 4 5 6 7 8 9 10 11 2095 ------------------- 2096 row 3 | o o o d d d o o o o o o 2097 row 4 | o o o d d d o o o o o o 2098 row 5 | o o o d d d o o o o o o 2099 ------------------- 2100 .ve 2101 2102 Thus, any entries in the d locations are stored in the d (diagonal) 2103 submatrix, and any entries in the o locations are stored in the 2104 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2105 stored simply in the MATSEQBAIJ format for compressed row storage. 2106 2107 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2108 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2109 In general, for PDE problems in which most nonzeros are near the diagonal, 2110 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2111 or you will get TERRIBLE performance; see the users' manual chapter on 2112 matrices. 2113 2114 Level: intermediate 2115 2116 .keywords: matrix, block, aij, compressed row, sparse, parallel 2117 2118 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPISBAIJ() 2119 @*/ 2120 2121 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2122 { 2123 Mat B; 2124 Mat_MPISBAIJ *b; 2125 int ierr,i,sum[1],work[1],mbs,nbs,Mbs=PETSC_DECIDE,size; 2126 PetscTruth flag1,flag2,flg; 2127 2128 PetscFunctionBegin; 2129 if (M != N || m != n){ /* N and n are not used after this */ 2130 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"For symmetric format, set M=N and m=n"); 2131 } 2132 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2133 2134 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 2135 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -2: value %d",d_nz); 2136 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -2: value %d",o_nz); 2137 if (d_nnz) { 2138 for (i=0; i<m/bs; i++) { 2139 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 2140 } 2141 } 2142 if (o_nnz) { 2143 for (i=0; i<m/bs; i++) { 2144 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 2145 } 2146 } 2147 2148 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2149 ierr = OptionsHasName(PETSC_NULL,"-mat_mpisbaij",&flag1);CHKERRQ(ierr); 2150 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 2151 if (!flag1 && !flag2 && size == 1) { 2152 if (M == PETSC_DECIDE) M = m; 2153 ierr = MatCreateSeqSBAIJ(comm,bs,M,M,d_nz,d_nnz,A);CHKERRQ(ierr); 2154 PetscFunctionReturn(0); 2155 } 2156 2157 *A = 0; 2158 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPISBAIJ,"Mat",comm,MatDestroy,MatView); 2159 PLogObjectCreate(B); 2160 B->data = (void*)(b = PetscNew(Mat_MPISBAIJ));CHKPTRQ(b); 2161 ierr = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 2162 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2163 2164 B->ops->destroy = MatDestroy_MPISBAIJ; 2165 B->ops->view = MatView_MPISBAIJ; 2166 B->mapping = 0; 2167 B->factor = 0; 2168 B->assembled = PETSC_FALSE; 2169 2170 B->insertmode = NOT_SET_VALUES; 2171 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 2172 ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr); 2173 2174 if (m == PETSC_DECIDE && (d_nnz || o_nnz)) { 2175 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 2176 } 2177 if (M == PETSC_DECIDE && m == PETSC_DECIDE) { 2178 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 2179 } 2180 if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 2181 2182 if (M == PETSC_DECIDE) { 2183 work[0] = m; mbs = m/bs; 2184 ierr = MPI_Allreduce(work,sum,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2185 M = sum[0]; Mbs = M/bs; 2186 } else { /* M is specified */ 2187 Mbs = M/bs; 2188 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 2189 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 2190 m = mbs*bs; 2191 } 2192 2193 if (mbs*bs != m) { 2194 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows/cols must be divisible by blocksize"); 2195 } 2196 2197 b->m = m; B->m = m; 2198 b->n = m; B->n = m; 2199 b->N = M; B->N = M; 2200 b->M = M; 2201 B->M = M; 2202 b->bs = bs; 2203 b->bs2 = bs*bs; 2204 b->mbs = mbs; 2205 b->nbs = mbs; 2206 b->Mbs = Mbs; 2207 b->Nbs = Mbs; 2208 2209 /* the information in the maps duplicates the information computed below, eventually 2210 we should remove the duplicate information that is not contained in the maps */ 2211 ierr = MapCreateMPI(B->comm,m,M,&B->rmap);CHKERRQ(ierr); 2212 ierr = MapCreateMPI(B->comm,m,M,&B->cmap);CHKERRQ(ierr); 2213 2214 /* build local table of row and column ownerships */ 2215 b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 2216 PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 2217 b->cowners = b->rowners + b->size + 2; 2218 b->rowners_bs = b->cowners + b->size + 2; 2219 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2220 b->rowners[0] = 0; 2221 for (i=2; i<=b->size; i++) { 2222 b->rowners[i] += b->rowners[i-1]; 2223 } 2224 for (i=0; i<=b->size; i++) { 2225 b->rowners_bs[i] = b->rowners[i]*bs; 2226 } 2227 b->rstart = b->rowners[b->rank]; 2228 b->rend = b->rowners[b->rank+1]; 2229 b->rstart_bs = b->rstart * bs; 2230 b->rend_bs = b->rend * bs; 2231 2232 b->cstart = b->rstart; 2233 b->cend = b->rend; 2234 b->cstart_bs = b->cstart * bs; 2235 b->cend_bs = b->cend * bs; 2236 2237 2238 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2239 ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,m,m,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2240 PLogObjectParent(B,b->A); 2241 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2242 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,M,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2243 PLogObjectParent(B,b->B); 2244 2245 /* build cache for off array entries formed */ 2246 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2247 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2248 b->donotstash = PETSC_FALSE; 2249 b->colmap = PETSC_NULL; 2250 b->garray = PETSC_NULL; 2251 b->roworiented = PETSC_TRUE; 2252 2253 #if defined(PEYSC_USE_MAT_SINGLE) 2254 /* stuff for MatSetValues_XXX in single precision */ 2255 b->lensetvalues = 0; 2256 b->setvaluescopy = PETSC_NULL; 2257 #endif 2258 2259 /* stuff used in block assembly */ 2260 b->barray = 0; 2261 2262 /* stuff used for matrix vector multiply */ 2263 b->lvec = 0; 2264 b->Mvctx = 0; 2265 2266 /* stuff for MatGetRow() */ 2267 b->rowindices = 0; 2268 b->rowvalues = 0; 2269 b->getrowactive = PETSC_FALSE; 2270 2271 /* hash table stuff */ 2272 b->ht = 0; 2273 b->hd = 0; 2274 b->ht_size = 0; 2275 b->ht_flag = PETSC_FALSE; 2276 b->ht_fact = 0; 2277 b->ht_total_ct = 0; 2278 b->ht_insert_ct = 0; 2279 2280 *A = B; 2281 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2282 if (flg) { 2283 double fact = 1.39; 2284 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2285 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2286 if (fact <= 1.0) fact = 1.39; 2287 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2288 PLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact); 2289 } 2290 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2291 "MatStoreValues_MPISBAIJ", 2292 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2293 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2294 "MatRetrieveValues_MPISBAIJ", 2295 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2296 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2297 "MatGetDiagonalBlock_MPISBAIJ", 2298 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 2299 PetscFunctionReturn(0); 2300 } 2301 2302 2303 #undef __FUNC__ 2304 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPISBAIJ" 2305 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2306 { 2307 Mat mat; 2308 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2309 int ierr,len=0; 2310 PetscTruth flg; 2311 2312 PetscFunctionBegin; 2313 *newmat = 0; 2314 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPISBAIJ,"Mat",matin->comm,MatDestroy,MatView); 2315 PLogObjectCreate(mat); 2316 mat->data = (void*)(a = PetscNew(Mat_MPISBAIJ));CHKPTRQ(a); 2317 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2318 mat->ops->destroy = MatDestroy_MPISBAIJ; 2319 mat->ops->view = MatView_MPISBAIJ; 2320 mat->factor = matin->factor; 2321 mat->assembled = PETSC_TRUE; 2322 mat->insertmode = NOT_SET_VALUES; 2323 2324 a->m = mat->m = oldmat->m; 2325 a->n = mat->n = oldmat->n; 2326 a->M = mat->M = oldmat->M; 2327 a->N = mat->N = oldmat->N; 2328 2329 a->bs = oldmat->bs; 2330 a->bs2 = oldmat->bs2; 2331 a->mbs = oldmat->mbs; 2332 a->nbs = oldmat->nbs; 2333 a->Mbs = oldmat->Mbs; 2334 a->Nbs = oldmat->Nbs; 2335 2336 a->rstart = oldmat->rstart; 2337 a->rend = oldmat->rend; 2338 a->cstart = oldmat->cstart; 2339 a->cend = oldmat->cend; 2340 a->size = oldmat->size; 2341 a->rank = oldmat->rank; 2342 a->donotstash = oldmat->donotstash; 2343 a->roworiented = oldmat->roworiented; 2344 a->rowindices = 0; 2345 a->rowvalues = 0; 2346 a->getrowactive = PETSC_FALSE; 2347 a->barray = 0; 2348 a->rstart_bs = oldmat->rstart_bs; 2349 a->rend_bs = oldmat->rend_bs; 2350 a->cstart_bs = oldmat->cstart_bs; 2351 a->cend_bs = oldmat->cend_bs; 2352 2353 /* hash table stuff */ 2354 a->ht = 0; 2355 a->hd = 0; 2356 a->ht_size = 0; 2357 a->ht_flag = oldmat->ht_flag; 2358 a->ht_fact = oldmat->ht_fact; 2359 a->ht_total_ct = 0; 2360 a->ht_insert_ct = 0; 2361 2362 2363 a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 2364 PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 2365 a->cowners = a->rowners + a->size + 2; 2366 a->rowners_bs = a->cowners + a->size + 2; 2367 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2368 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2369 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2370 if (oldmat->colmap) { 2371 #if defined (PETSC_USE_CTABLE) 2372 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2373 #else 2374 a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2375 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2376 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2377 #endif 2378 } else a->colmap = 0; 2379 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2380 a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray); 2381 PLogObjectMemory(mat,len*sizeof(int)); 2382 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2383 } else a->garray = 0; 2384 2385 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2386 PLogObjectParent(mat,a->lvec); 2387 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2388 2389 PLogObjectParent(mat,a->Mvctx); 2390 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2391 PLogObjectParent(mat,a->A); 2392 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2393 PLogObjectParent(mat,a->B); 2394 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 2395 ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2396 if (flg) { 2397 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 2398 } 2399 *newmat = mat; 2400 PetscFunctionReturn(0); 2401 } 2402 2403 #include "petscsys.h" 2404 2405 #undef __FUNC__ 2406 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPISBAIJ" 2407 int MatLoad_MPISBAIJ(Viewer viewer,MatType type,Mat *newmat) 2408 { 2409 Mat A; 2410 int i,nz,ierr,j,rstart,rend,fd; 2411 Scalar *vals,*buf; 2412 MPI_Comm comm = ((PetscObject)viewer)->comm; 2413 MPI_Status status; 2414 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2415 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2416 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2417 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2418 int dcount,kmax,k,nzcount,tmp; 2419 2420 PetscFunctionBegin; 2421 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2422 2423 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2424 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2425 if (!rank) { 2426 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2427 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2428 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2429 if (header[3] < 0) { 2430 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPISBAIJ"); 2431 } 2432 } 2433 2434 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2435 M = header[1]; N = header[2]; 2436 2437 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2438 2439 /* 2440 This code adds extra rows to make sure the number of rows is 2441 divisible by the blocksize 2442 */ 2443 Mbs = M/bs; 2444 extra_rows = bs - M + bs*(Mbs); 2445 if (extra_rows == bs) extra_rows = 0; 2446 else Mbs++; 2447 if (extra_rows &&!rank) { 2448 PLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"); 2449 } 2450 2451 /* determine ownership of all rows */ 2452 mbs = Mbs/size + ((Mbs % size) > rank); 2453 m = mbs * bs; 2454 rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners); 2455 browners = rowners + size + 1; 2456 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2457 rowners[0] = 0; 2458 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2459 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2460 rstart = rowners[rank]; 2461 rend = rowners[rank+1]; 2462 2463 /* distribute row lengths to all processors */ 2464 locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens); 2465 if (!rank) { 2466 rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2467 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2468 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2469 sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts); 2470 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2471 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2472 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2473 } else { 2474 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2475 } 2476 2477 if (!rank) { 2478 /* calculate the number of nonzeros on each processor */ 2479 procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz); 2480 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2481 for (i=0; i<size; i++) { 2482 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2483 procsnz[i] += rowlengths[j]; 2484 } 2485 } 2486 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2487 2488 /* determine max buffer needed and allocate it */ 2489 maxnz = 0; 2490 for (i=0; i<size; i++) { 2491 maxnz = PetscMax(maxnz,procsnz[i]); 2492 } 2493 cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols); 2494 2495 /* read in my part of the matrix column indices */ 2496 nz = procsnz[0]; 2497 ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 2498 mycols = ibuf; 2499 if (size == 1) nz -= extra_rows; 2500 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2501 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2502 2503 /* read in every ones (except the last) and ship off */ 2504 for (i=1; i<size-1; i++) { 2505 nz = procsnz[i]; 2506 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2507 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2508 } 2509 /* read in the stuff for the last proc */ 2510 if (size != 1) { 2511 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2512 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2513 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2514 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2515 } 2516 ierr = PetscFree(cols);CHKERRQ(ierr); 2517 } else { 2518 /* determine buffer space needed for message */ 2519 nz = 0; 2520 for (i=0; i<m; i++) { 2521 nz += locrowlens[i]; 2522 } 2523 ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 2524 mycols = ibuf; 2525 /* receive message of column indices*/ 2526 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2527 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2528 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2529 } 2530 2531 /* loop over local rows, determining number of off diagonal entries */ 2532 dlens = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens); 2533 odlens = dlens + (rend-rstart); 2534 mask = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask); 2535 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2536 masked1 = mask + Mbs; 2537 masked2 = masked1 + Mbs; 2538 rowcount = 0; nzcount = 0; 2539 for (i=0; i<mbs; i++) { 2540 dcount = 0; 2541 odcount = 0; 2542 for (j=0; j<bs; j++) { 2543 kmax = locrowlens[rowcount]; 2544 for (k=0; k<kmax; k++) { 2545 tmp = mycols[nzcount++]/bs; 2546 if (!mask[tmp]) { 2547 mask[tmp] = 1; 2548 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2549 else masked1[dcount++] = tmp; 2550 } 2551 } 2552 rowcount++; 2553 } 2554 2555 dlens[i] = dcount; 2556 odlens[i] = odcount; 2557 2558 /* zero out the mask elements we set */ 2559 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2560 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2561 } 2562 2563 /* create our matrix */ 2564 ierr = MatCreateMPISBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 2565 A = *newmat; 2566 MatSetOption(A,MAT_COLUMNS_SORTED); 2567 2568 if (!rank) { 2569 buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf); 2570 /* read in my part of the matrix numerical values */ 2571 nz = procsnz[0]; 2572 vals = buf; 2573 mycols = ibuf; 2574 if (size == 1) nz -= extra_rows; 2575 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2576 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2577 2578 /* insert into matrix */ 2579 jj = rstart*bs; 2580 for (i=0; i<m; i++) { 2581 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2582 mycols += locrowlens[i]; 2583 vals += locrowlens[i]; 2584 jj++; 2585 } 2586 /* read in other processors (except the last one) and ship out */ 2587 for (i=1; i<size-1; i++) { 2588 nz = procsnz[i]; 2589 vals = buf; 2590 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2591 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2592 } 2593 /* the last proc */ 2594 if (size != 1){ 2595 nz = procsnz[i] - extra_rows; 2596 vals = buf; 2597 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2598 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2599 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2600 } 2601 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2602 } else { 2603 /* receive numeric values */ 2604 buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf); 2605 2606 /* receive message of values*/ 2607 vals = buf; 2608 mycols = ibuf; 2609 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2610 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2611 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2612 2613 /* insert into matrix */ 2614 jj = rstart*bs; 2615 for (i=0; i<m; i++) { 2616 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2617 mycols += locrowlens[i]; 2618 vals += locrowlens[i]; 2619 jj++; 2620 } 2621 } 2622 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2623 ierr = PetscFree(buf);CHKERRQ(ierr); 2624 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2625 ierr = PetscFree(rowners);CHKERRQ(ierr); 2626 ierr = PetscFree(dlens);CHKERRQ(ierr); 2627 ierr = PetscFree(mask);CHKERRQ(ierr); 2628 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2629 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2630 PetscFunctionReturn(0); 2631 } 2632 2633 #undef __FUNC__ 2634 #define __FUNC__ /*<a name=""></a>*/"MatMPISBAIJSetHashTableFactor" 2635 /*@ 2636 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2637 2638 Input Parameters: 2639 . mat - the matrix 2640 . fact - factor 2641 2642 Collective on Mat 2643 2644 Level: advanced 2645 2646 Notes: 2647 This can also be set by the command line option: -mat_use_hash_table fact 2648 2649 .keywords: matrix, hashtable, factor, HT 2650 2651 .seealso: MatSetOption() 2652 @*/ 2653 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2654 { 2655 Mat_MPIBAIJ *baij; 2656 2657 PetscFunctionBegin; 2658 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2659 if (mat->type != MATMPIBAIJ) { 2660 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2661 } 2662 baij = (Mat_MPIBAIJ*)mat->data; 2663 baij->ht_fact = fact; 2664 PetscFunctionReturn(0); 2665 } 2666