1 /*$Id: mpibaij.c,v 1.206 2000/11/10 21:17:14 bsmith Exp bsmith $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 6 EXTERN int MatSetUpMultiply_MPIBAIJ(Mat); 7 EXTERN int DisAssemble_MPIBAIJ(Mat); 8 EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 9 EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 10 EXTERN int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 11 EXTERN int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 12 EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 13 EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 14 EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 15 EXTERN int MatPrintHelp_SeqBAIJ(Mat); 16 EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,Scalar*); 17 18 /* UGLY, ugly, ugly 19 When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 20 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 21 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 22 converts the entries into single precision and then calls ..._MatScalar() to put them 23 into the single precision data structures. 24 */ 25 #if defined(PETSC_USE_MAT_SINGLE) 26 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 27 EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 28 EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 29 EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 30 EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 31 #else 32 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 33 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 34 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 35 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 36 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 37 #endif 38 39 #undef __FUNC__ 40 #define __FUNC__ /*<a name=""></a>*/"MatGetRowMax_MPIBAIJ" 41 int MatGetRowMax_MPIBAIJ(Mat A,Vec v) 42 { 43 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 44 int ierr,i; 45 Scalar *va,*vb; 46 Vec vtmp; 47 48 PetscFunctionBegin; 49 50 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 51 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 52 53 ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRA(ierr); 54 ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 55 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 56 57 for (i=0; i<A->m; i++){ 58 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 59 } 60 61 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 62 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 63 ierr = VecDestroy(vtmp);CHKERRA(ierr); 64 65 PetscFunctionReturn(0); 66 } 67 68 EXTERN_C_BEGIN 69 #undef __FUNC__ 70 #define __FUNC__ /*<a name="MatStoreValues_MPIBAIJ"></a>*/"MatStoreValues_MPIBAIJ" 71 int MatStoreValues_MPIBAIJ(Mat mat) 72 { 73 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 74 int ierr; 75 76 PetscFunctionBegin; 77 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 78 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 79 PetscFunctionReturn(0); 80 } 81 EXTERN_C_END 82 83 EXTERN_C_BEGIN 84 #undef __FUNC__ 85 #define __FUNC__ /*<a name="MatRetrieveValues_MPIBAIJ"></a>*/"MatRetrieveValues_MPIBAIJ" 86 int MatRetrieveValues_MPIBAIJ(Mat mat) 87 { 88 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 89 int ierr; 90 91 PetscFunctionBegin; 92 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 93 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 94 PetscFunctionReturn(0); 95 } 96 EXTERN_C_END 97 98 /* 99 Local utility routine that creates a mapping from the global column 100 number to the local number in the off-diagonal part of the local 101 storage of the matrix. This is done in a non scable way since the 102 length of colmap equals the global matrix length. 103 */ 104 #undef __FUNC__ 105 #define __FUNC__ /*<a name="CreateColmap_MPIBAIJ_Private"></a>*/"CreateColmap_MPIBAIJ_Private" 106 static int CreateColmap_MPIBAIJ_Private(Mat mat) 107 { 108 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 109 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 110 int nbs = B->nbs,i,bs=B->bs,ierr; 111 112 PetscFunctionBegin; 113 #if defined (PETSC_USE_CTABLE) 114 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 115 for (i=0; i<nbs; i++){ 116 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 117 } 118 #else 119 baij->colmap = (int*)PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 120 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 121 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 122 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 123 #endif 124 PetscFunctionReturn(0); 125 } 126 127 #define CHUNKSIZE 10 128 129 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 130 { \ 131 \ 132 brow = row/bs; \ 133 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 134 rmax = aimax[brow]; nrow = ailen[brow]; \ 135 bcol = col/bs; \ 136 ridx = row % bs; cidx = col % bs; \ 137 low = 0; high = nrow; \ 138 while (high-low > 3) { \ 139 t = (low+high)/2; \ 140 if (rp[t] > bcol) high = t; \ 141 else low = t; \ 142 } \ 143 for (_i=low; _i<high; _i++) { \ 144 if (rp[_i] > bcol) break; \ 145 if (rp[_i] == bcol) { \ 146 bap = ap + bs2*_i + bs*cidx + ridx; \ 147 if (addv == ADD_VALUES) *bap += value; \ 148 else *bap = value; \ 149 goto a_noinsert; \ 150 } \ 151 } \ 152 if (a->nonew == 1) goto a_noinsert; \ 153 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 154 if (nrow >= rmax) { \ 155 /* there is no extra room in row, therefore enlarge */ \ 156 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 157 MatScalar *new_a; \ 158 \ 159 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 160 \ 161 /* malloc new storage space */ \ 162 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 163 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 164 new_j = (int*)(new_a + bs2*new_nz); \ 165 new_i = new_j + new_nz; \ 166 \ 167 /* copy over old data into new slots */ \ 168 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 169 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 170 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 171 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 172 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 173 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 174 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 175 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 176 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 177 /* free up old matrix storage */ \ 178 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 179 if (!a->singlemalloc) { \ 180 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 181 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 182 } \ 183 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 184 a->singlemalloc = PETSC_TRUE; \ 185 \ 186 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 187 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 188 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 189 a->maxnz += bs2*CHUNKSIZE; \ 190 a->reallocs++; \ 191 a->nz++; \ 192 } \ 193 N = nrow++ - 1; \ 194 /* shift up all the later entries in this row */ \ 195 for (ii=N; ii>=_i; ii--) { \ 196 rp[ii+1] = rp[ii]; \ 197 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 198 } \ 199 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 200 rp[_i] = bcol; \ 201 ap[bs2*_i + bs*cidx + ridx] = value; \ 202 a_noinsert:; \ 203 ailen[brow] = nrow; \ 204 } 205 206 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 207 { \ 208 brow = row/bs; \ 209 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 210 rmax = bimax[brow]; nrow = bilen[brow]; \ 211 bcol = col/bs; \ 212 ridx = row % bs; cidx = col % bs; \ 213 low = 0; high = nrow; \ 214 while (high-low > 3) { \ 215 t = (low+high)/2; \ 216 if (rp[t] > bcol) high = t; \ 217 else low = t; \ 218 } \ 219 for (_i=low; _i<high; _i++) { \ 220 if (rp[_i] > bcol) break; \ 221 if (rp[_i] == bcol) { \ 222 bap = ap + bs2*_i + bs*cidx + ridx; \ 223 if (addv == ADD_VALUES) *bap += value; \ 224 else *bap = value; \ 225 goto b_noinsert; \ 226 } \ 227 } \ 228 if (b->nonew == 1) goto b_noinsert; \ 229 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 230 if (nrow >= rmax) { \ 231 /* there is no extra room in row, therefore enlarge */ \ 232 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 233 MatScalar *new_a; \ 234 \ 235 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 236 \ 237 /* malloc new storage space */ \ 238 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 239 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 240 new_j = (int*)(new_a + bs2*new_nz); \ 241 new_i = new_j + new_nz; \ 242 \ 243 /* copy over old data into new slots */ \ 244 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 245 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 246 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 247 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 248 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 249 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 250 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 251 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 252 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 253 /* free up old matrix storage */ \ 254 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 255 if (!b->singlemalloc) { \ 256 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 257 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 258 } \ 259 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 260 b->singlemalloc = PETSC_TRUE; \ 261 \ 262 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 263 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 264 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 265 b->maxnz += bs2*CHUNKSIZE; \ 266 b->reallocs++; \ 267 b->nz++; \ 268 } \ 269 N = nrow++ - 1; \ 270 /* shift up all the later entries in this row */ \ 271 for (ii=N; ii>=_i; ii--) { \ 272 rp[ii+1] = rp[ii]; \ 273 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 274 } \ 275 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 276 rp[_i] = bcol; \ 277 ap[bs2*_i + bs*cidx + ridx] = value; \ 278 b_noinsert:; \ 279 bilen[brow] = nrow; \ 280 } 281 282 #if defined(PETSC_USE_MAT_SINGLE) 283 #undef __FUNC__ 284 #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ" 285 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 286 { 287 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 288 int ierr,i,N = m*n; 289 MatScalar *vsingle; 290 291 PetscFunctionBegin; 292 if (N > b->setvalueslen) { 293 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 294 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 295 b->setvalueslen = N; 296 } 297 vsingle = b->setvaluescopy; 298 299 for (i=0; i<N; i++) { 300 vsingle[i] = v[i]; 301 } 302 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 303 PetscFunctionReturn(0); 304 } 305 306 #undef __FUNC__ 307 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPIBAIJ"></a>*/"MatSetValuesBlocked_MPIBAIJ" 308 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 309 { 310 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 311 int ierr,i,N = m*n*b->bs2; 312 MatScalar *vsingle; 313 314 PetscFunctionBegin; 315 if (N > b->setvalueslen) { 316 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 317 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 318 b->setvalueslen = N; 319 } 320 vsingle = b->setvaluescopy; 321 for (i=0; i<N; i++) { 322 vsingle[i] = v[i]; 323 } 324 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327 328 #undef __FUNC__ 329 #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ_HT"></a>*/"MatSetValues_MPIBAIJ_HT" 330 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 331 { 332 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 333 int ierr,i,N = m*n; 334 MatScalar *vsingle; 335 336 PetscFunctionBegin; 337 if (N > b->setvalueslen) { 338 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 339 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 340 b->setvalueslen = N; 341 } 342 vsingle = b->setvaluescopy; 343 for (i=0; i<N; i++) { 344 vsingle[i] = v[i]; 345 } 346 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 347 PetscFunctionReturn(0); 348 } 349 350 #undef __FUNC__ 351 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPIBAIJ_HT"></a>*/"MatSetValuesBlocked_MPIBAIJ_HT" 352 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 353 { 354 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 355 int ierr,i,N = m*n*b->bs2; 356 MatScalar *vsingle; 357 358 PetscFunctionBegin; 359 if (N > b->setvalueslen) { 360 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 361 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 362 b->setvalueslen = N; 363 } 364 vsingle = b->setvaluescopy; 365 for (i=0; i<N; i++) { 366 vsingle[i] = v[i]; 367 } 368 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 369 PetscFunctionReturn(0); 370 } 371 #endif 372 373 #undef __FUNC__ 374 #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ" 375 int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 376 { 377 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 378 MatScalar value; 379 PetscTruth roworiented = baij->roworiented; 380 int ierr,i,j,row,col; 381 int rstart_orig=baij->rstart_bs; 382 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 383 int cend_orig=baij->cend_bs,bs=baij->bs; 384 385 /* Some Variables required in the macro */ 386 Mat A = baij->A; 387 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 388 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 389 MatScalar *aa=a->a; 390 391 Mat B = baij->B; 392 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 393 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 394 MatScalar *ba=b->a; 395 396 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 397 int low,high,t,ridx,cidx,bs2=a->bs2; 398 MatScalar *ap,*bap; 399 400 PetscFunctionBegin; 401 for (i=0; i<m; i++) { 402 if (im[i] < 0) continue; 403 #if defined(PETSC_USE_BOPT_g) 404 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 405 #endif 406 if (im[i] >= rstart_orig && im[i] < rend_orig) { 407 row = im[i] - rstart_orig; 408 for (j=0; j<n; j++) { 409 if (in[j] >= cstart_orig && in[j] < cend_orig){ 410 col = in[j] - cstart_orig; 411 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 412 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 413 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 414 } else if (in[j] < 0) continue; 415 #if defined(PETSC_USE_BOPT_g) 416 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 417 #endif 418 else { 419 if (mat->was_assembled) { 420 if (!baij->colmap) { 421 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 422 } 423 #if defined (PETSC_USE_CTABLE) 424 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 425 col = col - 1 + in[j]%bs; 426 #else 427 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 428 #endif 429 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 430 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 431 col = in[j]; 432 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 433 B = baij->B; 434 b = (Mat_SeqBAIJ*)(B)->data; 435 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 436 ba=b->a; 437 } 438 } else col = in[j]; 439 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 440 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 441 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 442 } 443 } 444 } else { 445 if (!baij->donotstash) { 446 if (roworiented) { 447 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 448 } else { 449 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 450 } 451 } 452 } 453 } 454 PetscFunctionReturn(0); 455 } 456 457 #undef __FUNC__ 458 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPIBAIJ"></a>*/"MatSetValuesBlocked_MPIBAIJ" 459 int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 460 { 461 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 462 MatScalar *value,*barray=baij->barray; 463 PetscTruth roworiented = baij->roworiented; 464 int ierr,i,j,ii,jj,row,col,rstart=baij->rstart; 465 int rend=baij->rend,cstart=baij->cstart,stepval; 466 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 467 468 PetscFunctionBegin; 469 if(!barray) { 470 baij->barray = barray = (MatScalar*)PetscMalloc(bs2*sizeof(MatScalar));CHKPTRQ(barray); 471 } 472 473 if (roworiented) { 474 stepval = (n-1)*bs; 475 } else { 476 stepval = (m-1)*bs; 477 } 478 for (i=0; i<m; i++) { 479 if (im[i] < 0) continue; 480 #if defined(PETSC_USE_BOPT_g) 481 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs); 482 #endif 483 if (im[i] >= rstart && im[i] < rend) { 484 row = im[i] - rstart; 485 for (j=0; j<n; j++) { 486 /* If NumCol = 1 then a copy is not required */ 487 if ((roworiented) && (n == 1)) { 488 barray = v + i*bs2; 489 } else if((!roworiented) && (m == 1)) { 490 barray = v + j*bs2; 491 } else { /* Here a copy is required */ 492 if (roworiented) { 493 value = v + i*(stepval+bs)*bs + j*bs; 494 } else { 495 value = v + j*(stepval+bs)*bs + i*bs; 496 } 497 for (ii=0; ii<bs; ii++,value+=stepval) { 498 for (jj=0; jj<bs; jj++) { 499 *barray++ = *value++; 500 } 501 } 502 barray -=bs2; 503 } 504 505 if (in[j] >= cstart && in[j] < cend){ 506 col = in[j] - cstart; 507 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 508 } 509 else if (in[j] < 0) continue; 510 #if defined(PETSC_USE_BOPT_g) 511 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs);} 512 #endif 513 else { 514 if (mat->was_assembled) { 515 if (!baij->colmap) { 516 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 517 } 518 519 #if defined(PETSC_USE_BOPT_g) 520 #if defined (PETSC_USE_CTABLE) 521 { int data; 522 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 523 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 524 } 525 #else 526 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 527 #endif 528 #endif 529 #if defined (PETSC_USE_CTABLE) 530 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 531 col = (col - 1)/bs; 532 #else 533 col = (baij->colmap[in[j]] - 1)/bs; 534 #endif 535 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 536 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 537 col = in[j]; 538 } 539 } 540 else col = in[j]; 541 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 542 } 543 } 544 } else { 545 if (!baij->donotstash) { 546 if (roworiented) { 547 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 548 } else { 549 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 550 } 551 } 552 } 553 } 554 PetscFunctionReturn(0); 555 } 556 557 #define HASH_KEY 0.6180339887 558 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 559 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 560 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 561 #undef __FUNC__ 562 #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ_HT_MatScalar"></a>*/"MatSetValues_MPIBAIJ_HT_MatScalar" 563 int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 564 { 565 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 566 PetscTruth roworiented = baij->roworiented; 567 int ierr,i,j,row,col; 568 int rstart_orig=baij->rstart_bs; 569 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 570 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 571 PetscReal tmp; 572 MatScalar **HD = baij->hd,value; 573 #if defined(PETSC_USE_BOPT_g) 574 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 575 #endif 576 577 PetscFunctionBegin; 578 579 for (i=0; i<m; i++) { 580 #if defined(PETSC_USE_BOPT_g) 581 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 582 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 583 #endif 584 row = im[i]; 585 if (row >= rstart_orig && row < rend_orig) { 586 for (j=0; j<n; j++) { 587 col = in[j]; 588 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 589 /* Look up into the Hash Table */ 590 key = (row/bs)*Nbs+(col/bs)+1; 591 h1 = HASH(size,key,tmp); 592 593 594 idx = h1; 595 #if defined(PETSC_USE_BOPT_g) 596 insert_ct++; 597 total_ct++; 598 if (HT[idx] != key) { 599 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 600 if (idx == size) { 601 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 602 if (idx == h1) { 603 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 604 } 605 } 606 } 607 #else 608 if (HT[idx] != key) { 609 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 610 if (idx == size) { 611 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 612 if (idx == h1) { 613 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 614 } 615 } 616 } 617 #endif 618 /* A HASH table entry is found, so insert the values at the correct address */ 619 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 620 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 621 } 622 } else { 623 if (!baij->donotstash) { 624 if (roworiented) { 625 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 626 } else { 627 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 628 } 629 } 630 } 631 } 632 #if defined(PETSC_USE_BOPT_g) 633 baij->ht_total_ct = total_ct; 634 baij->ht_insert_ct = insert_ct; 635 #endif 636 PetscFunctionReturn(0); 637 } 638 639 #undef __FUNC__ 640 #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 641 int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 642 { 643 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 644 PetscTruth roworiented = baij->roworiented; 645 int ierr,i,j,ii,jj,row,col; 646 int rstart=baij->rstart ; 647 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 648 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 649 PetscReal tmp; 650 MatScalar **HD = baij->hd,*baij_a; 651 MatScalar *v_t,*value; 652 #if defined(PETSC_USE_BOPT_g) 653 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 654 #endif 655 656 PetscFunctionBegin; 657 658 if (roworiented) { 659 stepval = (n-1)*bs; 660 } else { 661 stepval = (m-1)*bs; 662 } 663 for (i=0; i<m; i++) { 664 #if defined(PETSC_USE_BOPT_g) 665 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 666 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 667 #endif 668 row = im[i]; 669 v_t = v + i*bs2; 670 if (row >= rstart && row < rend) { 671 for (j=0; j<n; j++) { 672 col = in[j]; 673 674 /* Look up into the Hash Table */ 675 key = row*Nbs+col+1; 676 h1 = HASH(size,key,tmp); 677 678 idx = h1; 679 #if defined(PETSC_USE_BOPT_g) 680 total_ct++; 681 insert_ct++; 682 if (HT[idx] != key) { 683 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 684 if (idx == size) { 685 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 686 if (idx == h1) { 687 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 688 } 689 } 690 } 691 #else 692 if (HT[idx] != key) { 693 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 694 if (idx == size) { 695 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 696 if (idx == h1) { 697 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 698 } 699 } 700 } 701 #endif 702 baij_a = HD[idx]; 703 if (roworiented) { 704 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 705 /* value = v + (i*(stepval+bs)+j)*bs; */ 706 value = v_t; 707 v_t += bs; 708 if (addv == ADD_VALUES) { 709 for (ii=0; ii<bs; ii++,value+=stepval) { 710 for (jj=ii; jj<bs2; jj+=bs) { 711 baij_a[jj] += *value++; 712 } 713 } 714 } else { 715 for (ii=0; ii<bs; ii++,value+=stepval) { 716 for (jj=ii; jj<bs2; jj+=bs) { 717 baij_a[jj] = *value++; 718 } 719 } 720 } 721 } else { 722 value = v + j*(stepval+bs)*bs + i*bs; 723 if (addv == ADD_VALUES) { 724 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 725 for (jj=0; jj<bs; jj++) { 726 baij_a[jj] += *value++; 727 } 728 } 729 } else { 730 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 731 for (jj=0; jj<bs; jj++) { 732 baij_a[jj] = *value++; 733 } 734 } 735 } 736 } 737 } 738 } else { 739 if (!baij->donotstash) { 740 if (roworiented) { 741 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 742 } else { 743 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 744 } 745 } 746 } 747 } 748 #if defined(PETSC_USE_BOPT_g) 749 baij->ht_total_ct = total_ct; 750 baij->ht_insert_ct = insert_ct; 751 #endif 752 PetscFunctionReturn(0); 753 } 754 755 #undef __FUNC__ 756 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIBAIJ" 757 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 758 { 759 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 760 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 761 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 762 763 PetscFunctionBegin; 764 for (i=0; i<m; i++) { 765 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 766 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 767 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 768 row = idxm[i] - bsrstart; 769 for (j=0; j<n; j++) { 770 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 771 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 772 if (idxn[j] >= bscstart && idxn[j] < bscend){ 773 col = idxn[j] - bscstart; 774 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 775 } else { 776 if (!baij->colmap) { 777 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 778 } 779 #if defined (PETSC_USE_CTABLE) 780 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 781 data --; 782 #else 783 data = baij->colmap[idxn[j]/bs]-1; 784 #endif 785 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 786 else { 787 col = data + idxn[j]%bs; 788 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 789 } 790 } 791 } 792 } else { 793 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 794 } 795 } 796 PetscFunctionReturn(0); 797 } 798 799 #undef __FUNC__ 800 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPIBAIJ" 801 int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *norm) 802 { 803 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 804 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 805 int ierr,i,bs2=baij->bs2; 806 PetscReal sum = 0.0; 807 MatScalar *v; 808 809 PetscFunctionBegin; 810 if (baij->size == 1) { 811 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 812 } else { 813 if (type == NORM_FROBENIUS) { 814 v = amat->a; 815 for (i=0; i<amat->nz*bs2; i++) { 816 #if defined(PETSC_USE_COMPLEX) 817 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 818 #else 819 sum += (*v)*(*v); v++; 820 #endif 821 } 822 v = bmat->a; 823 for (i=0; i<bmat->nz*bs2; i++) { 824 #if defined(PETSC_USE_COMPLEX) 825 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 826 #else 827 sum += (*v)*(*v); v++; 828 #endif 829 } 830 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 831 *norm = sqrt(*norm); 832 } else { 833 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 834 } 835 } 836 PetscFunctionReturn(0); 837 } 838 839 840 /* 841 Creates the hash table, and sets the table 842 This table is created only once. 843 If new entried need to be added to the matrix 844 then the hash table has to be destroyed and 845 recreated. 846 */ 847 #undef __FUNC__ 848 #define __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPIBAIJ_Private" 849 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 850 { 851 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 852 Mat A = baij->A,B=baij->B; 853 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 854 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 855 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 856 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 857 int *HT,key; 858 MatScalar **HD; 859 PetscReal tmp; 860 #if defined(PETSC_USE_BOPT_g) 861 int ct=0,max=0; 862 #endif 863 864 PetscFunctionBegin; 865 baij->ht_size=(int)(factor*nz); 866 size = baij->ht_size; 867 868 if (baij->ht) { 869 PetscFunctionReturn(0); 870 } 871 872 /* Allocate Memory for Hash Table */ 873 baij->hd = (MatScalar**)PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1);CHKPTRQ(baij->hd); 874 baij->ht = (int*)(baij->hd + size); 875 HD = baij->hd; 876 HT = baij->ht; 877 878 879 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr); 880 881 882 /* Loop Over A */ 883 for (i=0; i<a->mbs; i++) { 884 for (j=ai[i]; j<ai[i+1]; j++) { 885 row = i+rstart; 886 col = aj[j]+cstart; 887 888 key = row*Nbs + col + 1; 889 h1 = HASH(size,key,tmp); 890 for (k=0; k<size; k++){ 891 if (HT[(h1+k)%size] == 0.0) { 892 HT[(h1+k)%size] = key; 893 HD[(h1+k)%size] = a->a + j*bs2; 894 break; 895 #if defined(PETSC_USE_BOPT_g) 896 } else { 897 ct++; 898 #endif 899 } 900 } 901 #if defined(PETSC_USE_BOPT_g) 902 if (k> max) max = k; 903 #endif 904 } 905 } 906 /* Loop Over B */ 907 for (i=0; i<b->mbs; i++) { 908 for (j=bi[i]; j<bi[i+1]; j++) { 909 row = i+rstart; 910 col = garray[bj[j]]; 911 key = row*Nbs + col + 1; 912 h1 = HASH(size,key,tmp); 913 for (k=0; k<size; k++){ 914 if (HT[(h1+k)%size] == 0.0) { 915 HT[(h1+k)%size] = key; 916 HD[(h1+k)%size] = b->a + j*bs2; 917 break; 918 #if defined(PETSC_USE_BOPT_g) 919 } else { 920 ct++; 921 #endif 922 } 923 } 924 #if defined(PETSC_USE_BOPT_g) 925 if (k> max) max = k; 926 #endif 927 } 928 } 929 930 /* Print Summary */ 931 #if defined(PETSC_USE_BOPT_g) 932 for (i=0,j=0; i<size; i++) { 933 if (HT[i]) {j++;} 934 } 935 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max); 936 #endif 937 PetscFunctionReturn(0); 938 } 939 940 #undef __FUNC__ 941 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIBAIJ" 942 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 943 { 944 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 945 int ierr,nstash,reallocs; 946 InsertMode addv; 947 948 PetscFunctionBegin; 949 if (baij->donotstash) { 950 PetscFunctionReturn(0); 951 } 952 953 /* make sure all processors are either in INSERTMODE or ADDMODE */ 954 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 955 if (addv == (ADD_VALUES|INSERT_VALUES)) { 956 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 957 } 958 mat->insertmode = addv; /* in case this processor had no cache */ 959 960 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 961 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 962 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 963 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 964 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 965 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 966 PetscFunctionReturn(0); 967 } 968 969 #undef __FUNC__ 970 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIBAIJ" 971 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 972 { 973 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 974 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 975 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 976 int *row,*col,other_disassembled; 977 PetscTruth r1,r2,r3; 978 MatScalar *val; 979 InsertMode addv = mat->insertmode; 980 981 PetscFunctionBegin; 982 if (!baij->donotstash) { 983 while (1) { 984 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 985 if (!flg) break; 986 987 for (i=0; i<n;) { 988 /* Now identify the consecutive vals belonging to the same row */ 989 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 990 if (j < n) ncols = j-i; 991 else ncols = n-i; 992 /* Now assemble all these values with a single function call */ 993 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 994 i = j; 995 } 996 } 997 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 998 /* Now process the block-stash. Since the values are stashed column-oriented, 999 set the roworiented flag to column oriented, and after MatSetValues() 1000 restore the original flags */ 1001 r1 = baij->roworiented; 1002 r2 = a->roworiented; 1003 r3 = b->roworiented; 1004 baij->roworiented = PETSC_FALSE; 1005 a->roworiented = PETSC_FALSE; 1006 b->roworiented = PETSC_FALSE; 1007 while (1) { 1008 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1009 if (!flg) break; 1010 1011 for (i=0; i<n;) { 1012 /* Now identify the consecutive vals belonging to the same row */ 1013 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1014 if (j < n) ncols = j-i; 1015 else ncols = n-i; 1016 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1017 i = j; 1018 } 1019 } 1020 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1021 baij->roworiented = r1; 1022 a->roworiented = r2; 1023 b->roworiented = r3; 1024 } 1025 1026 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1027 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1028 1029 /* determine if any processor has disassembled, if so we must 1030 also disassemble ourselfs, in order that we may reassemble. */ 1031 /* 1032 if nonzero structure of submatrix B cannot change then we know that 1033 no processor disassembled thus we can skip this stuff 1034 */ 1035 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1036 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1037 if (mat->was_assembled && !other_disassembled) { 1038 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1039 } 1040 } 1041 1042 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1043 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1044 } 1045 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1046 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1047 1048 #if defined(PETSC_USE_BOPT_g) 1049 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1050 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct); 1051 baij->ht_total_ct = 0; 1052 baij->ht_insert_ct = 0; 1053 } 1054 #endif 1055 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1056 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1057 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1058 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1059 } 1060 1061 if (baij->rowvalues) { 1062 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1063 baij->rowvalues = 0; 1064 } 1065 PetscFunctionReturn(0); 1066 } 1067 1068 #undef __FUNC__ 1069 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ_ASCIIorDraworSocket" 1070 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 1071 { 1072 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1073 int ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank; 1074 PetscTruth isascii,isdraw; 1075 Viewer sviewer; 1076 1077 PetscFunctionBegin; 1078 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 1079 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 1080 if (isascii) { 1081 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1082 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 1083 MatInfo info; 1084 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1085 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1086 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 1087 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 1088 baij->bs,(int)info.memory);CHKERRQ(ierr); 1089 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1090 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1091 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1092 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1093 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 1094 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1095 PetscFunctionReturn(0); 1096 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 1097 ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 1098 PetscFunctionReturn(0); 1099 } 1100 } 1101 1102 if (isdraw) { 1103 Draw draw; 1104 PetscTruth isnull; 1105 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1106 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1107 } 1108 1109 if (size == 1) { 1110 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1111 } else { 1112 /* assemble the entire matrix onto first processor. */ 1113 Mat A; 1114 Mat_SeqBAIJ *Aloc; 1115 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1116 MatScalar *a; 1117 1118 if (!rank) { 1119 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1120 } else { 1121 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1122 } 1123 PLogObjectParent(mat,A); 1124 1125 /* copy over the A part */ 1126 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1127 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1128 rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1129 1130 for (i=0; i<mbs; i++) { 1131 rvals[0] = bs*(baij->rstart + i); 1132 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1133 for (j=ai[i]; j<ai[i+1]; j++) { 1134 col = (baij->cstart+aj[j])*bs; 1135 for (k=0; k<bs; k++) { 1136 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1137 col++; a += bs; 1138 } 1139 } 1140 } 1141 /* copy over the B part */ 1142 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1143 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1144 for (i=0; i<mbs; i++) { 1145 rvals[0] = bs*(baij->rstart + i); 1146 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1147 for (j=ai[i]; j<ai[i+1]; j++) { 1148 col = baij->garray[aj[j]]*bs; 1149 for (k=0; k<bs; k++) { 1150 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1151 col++; a += bs; 1152 } 1153 } 1154 } 1155 ierr = PetscFree(rvals);CHKERRQ(ierr); 1156 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1157 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1158 /* 1159 Everyone has to call to draw the matrix since the graphics waits are 1160 synchronized across all processors that share the Draw object 1161 */ 1162 ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1163 if (!rank) { 1164 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1165 } 1166 ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1167 ierr = MatDestroy(A);CHKERRQ(ierr); 1168 } 1169 PetscFunctionReturn(0); 1170 } 1171 1172 #undef __FUNC__ 1173 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ" 1174 int MatView_MPIBAIJ(Mat mat,Viewer viewer) 1175 { 1176 int ierr; 1177 PetscTruth isascii,isdraw,issocket,isbinary; 1178 1179 PetscFunctionBegin; 1180 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 1181 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 1182 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 1183 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 1184 if (isascii || isdraw || issocket || isbinary) { 1185 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1186 } else { 1187 SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1188 } 1189 PetscFunctionReturn(0); 1190 } 1191 1192 #undef __FUNC__ 1193 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIBAIJ" 1194 int MatDestroy_MPIBAIJ(Mat mat) 1195 { 1196 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1197 int ierr; 1198 1199 PetscFunctionBegin; 1200 #if defined(PETSC_USE_LOG) 1201 PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 1202 #endif 1203 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1204 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1205 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1206 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1207 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1208 #if defined (PETSC_USE_CTABLE) 1209 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1210 #else 1211 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1212 #endif 1213 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1214 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1215 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1216 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1217 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1218 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1219 #if defined(PETSC_USE_MAT_SINGLE) 1220 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 1221 #endif 1222 ierr = PetscFree(baij);CHKERRQ(ierr); 1223 PetscFunctionReturn(0); 1224 } 1225 1226 #undef __FUNC__ 1227 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPIBAIJ" 1228 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1229 { 1230 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1231 int ierr,nt; 1232 1233 PetscFunctionBegin; 1234 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1235 if (nt != A->n) { 1236 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1237 } 1238 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1239 if (nt != A->m) { 1240 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1241 } 1242 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1243 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1244 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1245 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1246 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1247 PetscFunctionReturn(0); 1248 } 1249 1250 #undef __FUNC__ 1251 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIBAIJ" 1252 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1253 { 1254 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1255 int ierr; 1256 1257 PetscFunctionBegin; 1258 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1259 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1260 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1261 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1262 PetscFunctionReturn(0); 1263 } 1264 1265 #undef __FUNC__ 1266 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIBAIJ" 1267 int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1268 { 1269 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1270 int ierr; 1271 1272 PetscFunctionBegin; 1273 /* do nondiagonal part */ 1274 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1275 /* send it on its way */ 1276 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1277 /* do local part */ 1278 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1279 /* receive remote parts: note this assumes the values are not actually */ 1280 /* inserted in yy until the next line, which is true for my implementation*/ 1281 /* but is not perhaps always true. */ 1282 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1283 PetscFunctionReturn(0); 1284 } 1285 1286 #undef __FUNC__ 1287 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIBAIJ" 1288 int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1289 { 1290 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1291 int ierr; 1292 1293 PetscFunctionBegin; 1294 /* do nondiagonal part */ 1295 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1296 /* send it on its way */ 1297 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1298 /* do local part */ 1299 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1300 /* receive remote parts: note this assumes the values are not actually */ 1301 /* inserted in yy until the next line, which is true for my implementation*/ 1302 /* but is not perhaps always true. */ 1303 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 /* 1308 This only works correctly for square matrices where the subblock A->A is the 1309 diagonal block 1310 */ 1311 #undef __FUNC__ 1312 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIBAIJ" 1313 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1314 { 1315 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1316 int ierr; 1317 1318 PetscFunctionBegin; 1319 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1320 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1321 PetscFunctionReturn(0); 1322 } 1323 1324 #undef __FUNC__ 1325 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPIBAIJ" 1326 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1327 { 1328 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1329 int ierr; 1330 1331 PetscFunctionBegin; 1332 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1333 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1334 PetscFunctionReturn(0); 1335 } 1336 1337 #undef __FUNC__ 1338 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIBAIJ" 1339 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1340 { 1341 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1342 1343 PetscFunctionBegin; 1344 if (m) *m = mat->rstart_bs; 1345 if (n) *n = mat->rend_bs; 1346 PetscFunctionReturn(0); 1347 } 1348 1349 #undef __FUNC__ 1350 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIBAIJ" 1351 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1352 { 1353 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1354 Scalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1355 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1356 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1357 int *cmap,*idx_p,cstart = mat->cstart; 1358 1359 PetscFunctionBegin; 1360 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1361 mat->getrowactive = PETSC_TRUE; 1362 1363 if (!mat->rowvalues && (idx || v)) { 1364 /* 1365 allocate enough space to hold information from the longest row. 1366 */ 1367 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1368 int max = 1,mbs = mat->mbs,tmp; 1369 for (i=0; i<mbs; i++) { 1370 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1371 if (max < tmp) { max = tmp; } 1372 } 1373 mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1374 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1375 } 1376 1377 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1378 lrow = row - brstart; 1379 1380 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1381 if (!v) {pvA = 0; pvB = 0;} 1382 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1383 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1384 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1385 nztot = nzA + nzB; 1386 1387 cmap = mat->garray; 1388 if (v || idx) { 1389 if (nztot) { 1390 /* Sort by increasing column numbers, assuming A and B already sorted */ 1391 int imark = -1; 1392 if (v) { 1393 *v = v_p = mat->rowvalues; 1394 for (i=0; i<nzB; i++) { 1395 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1396 else break; 1397 } 1398 imark = i; 1399 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1400 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1401 } 1402 if (idx) { 1403 *idx = idx_p = mat->rowindices; 1404 if (imark > -1) { 1405 for (i=0; i<imark; i++) { 1406 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1407 } 1408 } else { 1409 for (i=0; i<nzB; i++) { 1410 if (cmap[cworkB[i]/bs] < cstart) 1411 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1412 else break; 1413 } 1414 imark = i; 1415 } 1416 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1417 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1418 } 1419 } else { 1420 if (idx) *idx = 0; 1421 if (v) *v = 0; 1422 } 1423 } 1424 *nz = nztot; 1425 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1426 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1427 PetscFunctionReturn(0); 1428 } 1429 1430 #undef __FUNC__ 1431 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIBAIJ" 1432 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1433 { 1434 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1435 1436 PetscFunctionBegin; 1437 if (baij->getrowactive == PETSC_FALSE) { 1438 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1439 } 1440 baij->getrowactive = PETSC_FALSE; 1441 PetscFunctionReturn(0); 1442 } 1443 1444 #undef __FUNC__ 1445 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIBAIJ" 1446 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1447 { 1448 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1449 1450 PetscFunctionBegin; 1451 *bs = baij->bs; 1452 PetscFunctionReturn(0); 1453 } 1454 1455 #undef __FUNC__ 1456 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIBAIJ" 1457 int MatZeroEntries_MPIBAIJ(Mat A) 1458 { 1459 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1460 int ierr; 1461 1462 PetscFunctionBegin; 1463 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1464 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1465 PetscFunctionReturn(0); 1466 } 1467 1468 #undef __FUNC__ 1469 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIBAIJ" 1470 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1471 { 1472 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1473 Mat A = a->A,B = a->B; 1474 int ierr; 1475 PetscReal isend[5],irecv[5]; 1476 1477 PetscFunctionBegin; 1478 info->block_size = (double)a->bs; 1479 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1480 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1481 isend[3] = info->memory; isend[4] = info->mallocs; 1482 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1483 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1484 isend[3] += info->memory; isend[4] += info->mallocs; 1485 if (flag == MAT_LOCAL) { 1486 info->nz_used = isend[0]; 1487 info->nz_allocated = isend[1]; 1488 info->nz_unneeded = isend[2]; 1489 info->memory = isend[3]; 1490 info->mallocs = isend[4]; 1491 } else if (flag == MAT_GLOBAL_MAX) { 1492 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1493 info->nz_used = irecv[0]; 1494 info->nz_allocated = irecv[1]; 1495 info->nz_unneeded = irecv[2]; 1496 info->memory = irecv[3]; 1497 info->mallocs = irecv[4]; 1498 } else if (flag == MAT_GLOBAL_SUM) { 1499 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1500 info->nz_used = irecv[0]; 1501 info->nz_allocated = irecv[1]; 1502 info->nz_unneeded = irecv[2]; 1503 info->memory = irecv[3]; 1504 info->mallocs = irecv[4]; 1505 } else { 1506 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1507 } 1508 info->rows_global = (double)A->M; 1509 info->columns_global = (double)A->N; 1510 info->rows_local = (double)A->m; 1511 info->columns_local = (double)A->N; 1512 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1513 info->fill_ratio_needed = 0; 1514 info->factor_mallocs = 0; 1515 PetscFunctionReturn(0); 1516 } 1517 1518 #undef __FUNC__ 1519 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIBAIJ" 1520 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1521 { 1522 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1523 int ierr; 1524 1525 PetscFunctionBegin; 1526 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1527 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1528 op == MAT_COLUMNS_UNSORTED || 1529 op == MAT_COLUMNS_SORTED || 1530 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1531 op == MAT_KEEP_ZEROED_ROWS || 1532 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1533 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1534 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1535 } else if (op == MAT_ROW_ORIENTED) { 1536 a->roworiented = PETSC_TRUE; 1537 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1538 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1539 } else if (op == MAT_ROWS_SORTED || 1540 op == MAT_ROWS_UNSORTED || 1541 op == MAT_SYMMETRIC || 1542 op == MAT_STRUCTURALLY_SYMMETRIC || 1543 op == MAT_YES_NEW_DIAGONALS || 1544 op == MAT_USE_HASH_TABLE) { 1545 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1546 } else if (op == MAT_COLUMN_ORIENTED) { 1547 a->roworiented = PETSC_FALSE; 1548 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1549 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1550 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1551 a->donotstash = PETSC_TRUE; 1552 } else if (op == MAT_NO_NEW_DIAGONALS) { 1553 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1554 } else if (op == MAT_USE_HASH_TABLE) { 1555 a->ht_flag = PETSC_TRUE; 1556 } else { 1557 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1558 } 1559 PetscFunctionReturn(0); 1560 } 1561 1562 #undef __FUNC__ 1563 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIBAIJ(" 1564 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1565 { 1566 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1567 Mat_SeqBAIJ *Aloc; 1568 Mat B; 1569 int ierr,M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col; 1570 int bs=baij->bs,mbs=baij->mbs; 1571 MatScalar *a; 1572 1573 PetscFunctionBegin; 1574 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1575 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1576 1577 /* copy over the A part */ 1578 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1579 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1580 rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1581 1582 for (i=0; i<mbs; i++) { 1583 rvals[0] = bs*(baij->rstart + i); 1584 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1585 for (j=ai[i]; j<ai[i+1]; j++) { 1586 col = (baij->cstart+aj[j])*bs; 1587 for (k=0; k<bs; k++) { 1588 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1589 col++; a += bs; 1590 } 1591 } 1592 } 1593 /* copy over the B part */ 1594 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1595 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1596 for (i=0; i<mbs; i++) { 1597 rvals[0] = bs*(baij->rstart + i); 1598 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1599 for (j=ai[i]; j<ai[i+1]; j++) { 1600 col = baij->garray[aj[j]]*bs; 1601 for (k=0; k<bs; k++) { 1602 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1603 col++; a += bs; 1604 } 1605 } 1606 } 1607 ierr = PetscFree(rvals);CHKERRQ(ierr); 1608 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1609 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1610 1611 if (matout) { 1612 *matout = B; 1613 } else { 1614 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1615 } 1616 PetscFunctionReturn(0); 1617 } 1618 1619 #undef __FUNC__ 1620 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIBAIJ" 1621 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1622 { 1623 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1624 Mat a = baij->A,b = baij->B; 1625 int ierr,s1,s2,s3; 1626 1627 PetscFunctionBegin; 1628 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1629 if (rr) { 1630 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1631 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1632 /* Overlap communication with computation. */ 1633 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1634 } 1635 if (ll) { 1636 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1637 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1638 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1639 } 1640 /* scale the diagonal block */ 1641 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1642 1643 if (rr) { 1644 /* Do a scatter end and then right scale the off-diagonal block */ 1645 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1646 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1647 } 1648 1649 PetscFunctionReturn(0); 1650 } 1651 1652 #undef __FUNC__ 1653 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIBAIJ" 1654 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1655 { 1656 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1657 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1658 int *procs,*nprocs,j,idx,nsends,*work,row; 1659 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1660 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1661 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1662 MPI_Comm comm = A->comm; 1663 MPI_Request *send_waits,*recv_waits; 1664 MPI_Status recv_status,*send_status; 1665 IS istmp; 1666 PetscTruth found; 1667 1668 PetscFunctionBegin; 1669 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 1670 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1671 1672 /* first count number of contributors to each processor */ 1673 nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs); 1674 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1675 procs = nprocs + size; 1676 owner = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 1677 for (i=0; i<N; i++) { 1678 idx = rows[i]; 1679 found = PETSC_FALSE; 1680 for (j=0; j<size; j++) { 1681 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1682 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 1683 } 1684 } 1685 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1686 } 1687 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 1688 1689 /* inform other processors of number of messages and max length*/ 1690 work = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work); 1691 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 1692 nmax = work[rank]; 1693 nrecvs = work[size+rank]; 1694 ierr = PetscFree(work);CHKERRQ(ierr); 1695 1696 /* post receives: */ 1697 rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 1698 recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1699 for (i=0; i<nrecvs; i++) { 1700 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1701 } 1702 1703 /* do sends: 1704 1) starts[i] gives the starting index in svalues for stuff going to 1705 the ith processor 1706 */ 1707 svalues = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues); 1708 send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1709 starts = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts); 1710 starts[0] = 0; 1711 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1712 for (i=0; i<N; i++) { 1713 svalues[starts[owner[i]]++] = rows[i]; 1714 } 1715 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1716 1717 starts[0] = 0; 1718 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1719 count = 0; 1720 for (i=0; i<size; i++) { 1721 if (procs[i]) { 1722 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1723 } 1724 } 1725 ierr = PetscFree(starts);CHKERRQ(ierr); 1726 1727 base = owners[rank]*bs; 1728 1729 /* wait on receives */ 1730 lens = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens); 1731 source = lens + nrecvs; 1732 count = nrecvs; slen = 0; 1733 while (count) { 1734 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1735 /* unpack receives into our local space */ 1736 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1737 source[imdex] = recv_status.MPI_SOURCE; 1738 lens[imdex] = n; 1739 slen += n; 1740 count--; 1741 } 1742 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1743 1744 /* move the data into the send scatter */ 1745 lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows); 1746 count = 0; 1747 for (i=0; i<nrecvs; i++) { 1748 values = rvalues + i*nmax; 1749 for (j=0; j<lens[i]; j++) { 1750 lrows[count++] = values[j] - base; 1751 } 1752 } 1753 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1754 ierr = PetscFree(lens);CHKERRQ(ierr); 1755 ierr = PetscFree(owner);CHKERRQ(ierr); 1756 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1757 1758 /* actually zap the local rows */ 1759 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1760 PLogObjectParent(A,istmp); 1761 1762 /* 1763 Zero the required rows. If the "diagonal block" of the matrix 1764 is square and the user wishes to set the diagonal we use seperate 1765 code so that MatSetValues() is not called for each diagonal allocating 1766 new memory, thus calling lots of mallocs and slowing things down. 1767 1768 Contributed by: Mathew Knepley 1769 */ 1770 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1771 ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1772 if (diag && (l->A->M == l->A->N)) { 1773 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1774 } else if (diag) { 1775 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1776 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1777 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1778 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1779 } 1780 for (i=0; i<slen; i++) { 1781 row = lrows[i] + rstart_bs; 1782 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1783 } 1784 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1785 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1786 } else { 1787 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1788 } 1789 1790 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1791 ierr = PetscFree(lrows);CHKERRQ(ierr); 1792 1793 /* wait on sends */ 1794 if (nsends) { 1795 send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1796 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1797 ierr = PetscFree(send_status);CHKERRQ(ierr); 1798 } 1799 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1800 ierr = PetscFree(svalues);CHKERRQ(ierr); 1801 1802 PetscFunctionReturn(0); 1803 } 1804 1805 #undef __FUNC__ 1806 #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPIBAIJ" 1807 int MatPrintHelp_MPIBAIJ(Mat A) 1808 { 1809 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1810 MPI_Comm comm = A->comm; 1811 static int called = 0; 1812 int ierr; 1813 1814 PetscFunctionBegin; 1815 if (!a->rank) { 1816 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1817 } 1818 if (called) {PetscFunctionReturn(0);} else called = 1; 1819 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1820 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1821 PetscFunctionReturn(0); 1822 } 1823 1824 #undef __FUNC__ 1825 #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPIBAIJ" 1826 int MatSetUnfactored_MPIBAIJ(Mat A) 1827 { 1828 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1829 int ierr; 1830 1831 PetscFunctionBegin; 1832 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1833 PetscFunctionReturn(0); 1834 } 1835 1836 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1837 1838 #undef __FUNC__ 1839 #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPIBAIJ" 1840 int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1841 { 1842 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1843 Mat a,b,c,d; 1844 PetscTruth flg; 1845 int ierr; 1846 1847 PetscFunctionBegin; 1848 ierr = PetscTypeCompare((PetscObject)B,MATMPIBAIJ,&flg);CHKERRQ(ierr); 1849 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1850 a = matA->A; b = matA->B; 1851 c = matB->A; d = matB->B; 1852 1853 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1854 if (flg == PETSC_TRUE) { 1855 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1856 } 1857 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1858 PetscFunctionReturn(0); 1859 } 1860 1861 1862 #undef __FUNC__ 1863 #define __FUNC__ /*<a name="MatSetUpPreallocation_MPIBAIJ"></a>*/"MatSetUpPreallocation_MPIBAIJ" 1864 int MatSetUpPreallocation_MPIBAIJ(Mat A) 1865 { 1866 int ierr; 1867 1868 PetscFunctionBegin; 1869 ierr = MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1870 PetscFunctionReturn(0); 1871 } 1872 1873 /* -------------------------------------------------------------------*/ 1874 static struct _MatOps MatOps_Values = { 1875 MatSetValues_MPIBAIJ, 1876 MatGetRow_MPIBAIJ, 1877 MatRestoreRow_MPIBAIJ, 1878 MatMult_MPIBAIJ, 1879 MatMultAdd_MPIBAIJ, 1880 MatMultTranspose_MPIBAIJ, 1881 MatMultTransposeAdd_MPIBAIJ, 1882 0, 1883 0, 1884 0, 1885 0, 1886 0, 1887 0, 1888 0, 1889 MatTranspose_MPIBAIJ, 1890 MatGetInfo_MPIBAIJ, 1891 MatEqual_MPIBAIJ, 1892 MatGetDiagonal_MPIBAIJ, 1893 MatDiagonalScale_MPIBAIJ, 1894 MatNorm_MPIBAIJ, 1895 MatAssemblyBegin_MPIBAIJ, 1896 MatAssemblyEnd_MPIBAIJ, 1897 0, 1898 MatSetOption_MPIBAIJ, 1899 MatZeroEntries_MPIBAIJ, 1900 MatZeroRows_MPIBAIJ, 1901 0, 1902 0, 1903 0, 1904 0, 1905 MatSetUpPreallocation_MPIBAIJ, 1906 0, 1907 MatGetOwnershipRange_MPIBAIJ, 1908 0, 1909 0, 1910 0, 1911 0, 1912 MatDuplicate_MPIBAIJ, 1913 0, 1914 0, 1915 0, 1916 0, 1917 0, 1918 MatGetSubMatrices_MPIBAIJ, 1919 MatIncreaseOverlap_MPIBAIJ, 1920 MatGetValues_MPIBAIJ, 1921 0, 1922 MatPrintHelp_MPIBAIJ, 1923 MatScale_MPIBAIJ, 1924 0, 1925 0, 1926 0, 1927 MatGetBlockSize_MPIBAIJ, 1928 0, 1929 0, 1930 0, 1931 0, 1932 0, 1933 0, 1934 MatSetUnfactored_MPIBAIJ, 1935 0, 1936 MatSetValuesBlocked_MPIBAIJ, 1937 0, 1938 MatDestroy_MPIBAIJ, 1939 MatView_MPIBAIJ, 1940 MatGetMaps_Petsc, 1941 0, 1942 0, 1943 0, 1944 0, 1945 0, 1946 0, 1947 MatGetRowMax_MPIBAIJ}; 1948 1949 1950 EXTERN_C_BEGIN 1951 #undef __FUNC__ 1952 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPIBAIJ" 1953 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1954 { 1955 PetscFunctionBegin; 1956 *a = ((Mat_MPIBAIJ *)A->data)->A; 1957 *iscopy = PETSC_FALSE; 1958 PetscFunctionReturn(0); 1959 } 1960 EXTERN_C_END 1961 1962 EXTERN_C_BEGIN 1963 #undef __FUNC__ 1964 #define __FUNC__ /*<a name=""></a>*/"MatCreate_MPIBAIJ" 1965 int MatCreate_MPIBAIJ(Mat B) 1966 { 1967 Mat_MPIBAIJ *b; 1968 int ierr,size; 1969 PetscTruth flg; 1970 1971 PetscFunctionBegin; 1972 1973 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1974 B->data = (void*)(b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b); 1975 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 1976 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1977 B->mapping = 0; 1978 B->factor = 0; 1979 B->assembled = PETSC_FALSE; 1980 1981 B->insertmode = NOT_SET_VALUES; 1982 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1983 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1984 1985 /* build local table of row and column ownerships */ 1986 b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 1987 PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 1988 b->cowners = b->rowners + b->size + 2; 1989 b->rowners_bs = b->cowners + b->size + 2; 1990 1991 /* build cache for off array entries formed */ 1992 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1993 b->donotstash = PETSC_FALSE; 1994 b->colmap = PETSC_NULL; 1995 b->garray = PETSC_NULL; 1996 b->roworiented = PETSC_TRUE; 1997 1998 #if defined(PEYSC_USE_MAT_SINGLE) 1999 /* stuff for MatSetValues_XXX in single precision */ 2000 b->lensetvalues = 0; 2001 b->setvaluescopy = PETSC_NULL; 2002 #endif 2003 2004 /* stuff used in block assembly */ 2005 b->barray = 0; 2006 2007 /* stuff used for matrix vector multiply */ 2008 b->lvec = 0; 2009 b->Mvctx = 0; 2010 2011 /* stuff for MatGetRow() */ 2012 b->rowindices = 0; 2013 b->rowvalues = 0; 2014 b->getrowactive = PETSC_FALSE; 2015 2016 /* hash table stuff */ 2017 b->ht = 0; 2018 b->hd = 0; 2019 b->ht_size = 0; 2020 b->ht_flag = PETSC_FALSE; 2021 b->ht_fact = 0; 2022 b->ht_total_ct = 0; 2023 b->ht_insert_ct = 0; 2024 2025 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2026 if (flg) { 2027 double fact = 1.39; 2028 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2029 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2030 if (fact <= 1.0) fact = 1.39; 2031 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2032 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2033 } 2034 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2035 "MatStoreValues_MPIBAIJ", 2036 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2037 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2038 "MatRetrieveValues_MPIBAIJ", 2039 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2040 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2041 "MatGetDiagonalBlock_MPIBAIJ", 2042 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2043 PetscFunctionReturn(0); 2044 } 2045 EXTERN_C_END 2046 2047 #undef __FUNC__ 2048 #define __FUNC__ /*<a name=""></a>*/"MatMPIBAIJSetPreallocation" 2049 /*@C 2050 MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format 2051 (block compressed row). For good matrix assembly performance 2052 the user should preallocate the matrix storage by setting the parameters 2053 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2054 performance can be increased by more than a factor of 50. 2055 2056 Collective on Mat 2057 2058 Input Parameters: 2059 + A - the matrix 2060 . bs - size of blockk 2061 . d_nz - number of block nonzeros per block row in diagonal portion of local 2062 submatrix (same for all local rows) 2063 . d_nnz - array containing the number of block nonzeros in the various block rows 2064 of the in diagonal portion of the local (possibly different for each block 2065 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2066 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2067 submatrix (same for all local rows). 2068 - o_nnz - array containing the number of nonzeros in the various block rows of the 2069 off-diagonal portion of the local submatrix (possibly different for 2070 each block row) or PETSC_NULL. 2071 2072 Output Parameter: 2073 2074 2075 Options Database Keys: 2076 . -mat_no_unroll - uses code that does not unroll the loops in the 2077 block calculations (much slower) 2078 . -mat_block_size - size of the blocks to use 2079 2080 Notes: 2081 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2082 than it must be used on all processors that share the object for that argument. 2083 2084 Storage Information: 2085 For a square global matrix we define each processor's diagonal portion 2086 to be its local rows and the corresponding columns (a square submatrix); 2087 each processor's off-diagonal portion encompasses the remainder of the 2088 local matrix (a rectangular submatrix). 2089 2090 The user can specify preallocated storage for the diagonal part of 2091 the local submatrix with either d_nz or d_nnz (not both). Set 2092 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2093 memory allocation. Likewise, specify preallocated storage for the 2094 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2095 2096 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2097 the figure below we depict these three local rows and all columns (0-11). 2098 2099 .vb 2100 0 1 2 3 4 5 6 7 8 9 10 11 2101 ------------------- 2102 row 3 | o o o d d d o o o o o o 2103 row 4 | o o o d d d o o o o o o 2104 row 5 | o o o d d d o o o o o o 2105 ------------------- 2106 .ve 2107 2108 Thus, any entries in the d locations are stored in the d (diagonal) 2109 submatrix, and any entries in the o locations are stored in the 2110 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2111 stored simply in the MATSEQBAIJ format for compressed row storage. 2112 2113 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2114 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2115 In general, for PDE problems in which most nonzeros are near the diagonal, 2116 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2117 or you will get TERRIBLE performance; see the users' manual chapter on 2118 matrices. 2119 2120 Level: intermediate 2121 2122 .keywords: matrix, block, aij, compressed row, sparse, parallel 2123 2124 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2125 @*/ 2126 int MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2127 { 2128 Mat_MPIBAIJ *b; 2129 int ierr,i; 2130 PetscTruth flg2; 2131 2132 PetscFunctionBegin; 2133 ierr = PetscTypeCompare((PetscObject)B,MATMPIBAIJ,&flg2);CHKERRQ(ierr); 2134 if (!flg2) PetscFunctionReturn(0); 2135 2136 B->preallocated = PETSC_TRUE; 2137 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2138 2139 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2140 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than -1: value %d",d_nz); 2141 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than -1: value %d",o_nz); 2142 if (d_nnz) { 2143 for (i=0; i<B->m/bs; i++) { 2144 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 2145 } 2146 } 2147 if (o_nnz) { 2148 for (i=0; i<B->m/bs; i++) { 2149 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 2150 } 2151 } 2152 2153 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 2154 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 2155 ierr = MapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 2156 ierr = MapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 2157 2158 b = (Mat_MPIBAIJ*)B->data; 2159 b->bs = bs; 2160 b->bs2 = bs*bs; 2161 b->mbs = B->m/bs; 2162 b->nbs = B->n/bs; 2163 b->Mbs = B->M/bs; 2164 b->Nbs = B->N/bs; 2165 2166 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 2167 b->rowners[0] = 0; 2168 for (i=2; i<=b->size; i++) { 2169 b->rowners[i] += b->rowners[i-1]; 2170 } 2171 b->rstart = b->rowners[b->rank]; 2172 b->rend = b->rowners[b->rank+1]; 2173 2174 ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 2175 b->cowners[0] = 0; 2176 for (i=2; i<=b->size; i++) { 2177 b->cowners[i] += b->cowners[i-1]; 2178 } 2179 b->cstart = b->cowners[b->rank]; 2180 b->cend = b->cowners[b->rank+1]; 2181 2182 for (i=0; i<=b->size; i++) { 2183 b->rowners_bs[i] = b->rowners[i]*bs; 2184 } 2185 b->rstart_bs = b->rstart*bs; 2186 b->rend_bs = b->rend*bs; 2187 b->cstart_bs = b->cstart*bs; 2188 b->cend_bs = b->cend*bs; 2189 2190 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2191 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2192 PLogObjectParent(B,b->A); 2193 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2194 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2195 PLogObjectParent(B,b->B); 2196 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2197 2198 PetscFunctionReturn(0); 2199 } 2200 2201 #undef __FUNC__ 2202 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPIBAIJ" 2203 /*@C 2204 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2205 (block compressed row). For good matrix assembly performance 2206 the user should preallocate the matrix storage by setting the parameters 2207 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2208 performance can be increased by more than a factor of 50. 2209 2210 Collective on MPI_Comm 2211 2212 Input Parameters: 2213 + comm - MPI communicator 2214 . bs - size of blockk 2215 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2216 This value should be the same as the local size used in creating the 2217 y vector for the matrix-vector product y = Ax. 2218 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2219 This value should be the same as the local size used in creating the 2220 x vector for the matrix-vector product y = Ax. 2221 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2222 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2223 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2224 submatrix (same for all local rows) 2225 . d_nnz - array containing the number of nonzero blocks in the various block rows 2226 of the in diagonal portion of the local (possibly different for each block 2227 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2228 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2229 submatrix (same for all local rows). 2230 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2231 off-diagonal portion of the local submatrix (possibly different for 2232 each block row) or PETSC_NULL. 2233 2234 Output Parameter: 2235 . A - the matrix 2236 2237 Options Database Keys: 2238 . -mat_no_unroll - uses code that does not unroll the loops in the 2239 block calculations (much slower) 2240 . -mat_block_size - size of the blocks to use 2241 2242 Notes: 2243 A nonzero block is any block that as 1 or more nonzeros in it 2244 2245 The user MUST specify either the local or global matrix dimensions 2246 (possibly both). 2247 2248 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2249 than it must be used on all processors that share the object for that argument. 2250 2251 Storage Information: 2252 For a square global matrix we define each processor's diagonal portion 2253 to be its local rows and the corresponding columns (a square submatrix); 2254 each processor's off-diagonal portion encompasses the remainder of the 2255 local matrix (a rectangular submatrix). 2256 2257 The user can specify preallocated storage for the diagonal part of 2258 the local submatrix with either d_nz or d_nnz (not both). Set 2259 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2260 memory allocation. Likewise, specify preallocated storage for the 2261 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2262 2263 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2264 the figure below we depict these three local rows and all columns (0-11). 2265 2266 .vb 2267 0 1 2 3 4 5 6 7 8 9 10 11 2268 ------------------- 2269 row 3 | o o o d d d o o o o o o 2270 row 4 | o o o d d d o o o o o o 2271 row 5 | o o o d d d o o o o o o 2272 ------------------- 2273 .ve 2274 2275 Thus, any entries in the d locations are stored in the d (diagonal) 2276 submatrix, and any entries in the o locations are stored in the 2277 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2278 stored simply in the MATSEQBAIJ format for compressed row storage. 2279 2280 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2281 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2282 In general, for PDE problems in which most nonzeros are near the diagonal, 2283 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2284 or you will get TERRIBLE performance; see the users' manual chapter on 2285 matrices. 2286 2287 Level: intermediate 2288 2289 .keywords: matrix, block, aij, compressed row, sparse, parallel 2290 2291 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2292 @*/ 2293 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2294 { 2295 int ierr,size; 2296 2297 PetscFunctionBegin; 2298 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2299 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2300 if (size > 1) { 2301 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2302 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2303 } else { 2304 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2305 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2306 } 2307 PetscFunctionReturn(0); 2308 } 2309 2310 #undef __FUNC__ 2311 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIBAIJ" 2312 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2313 { 2314 Mat mat; 2315 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2316 int ierr,len=0; 2317 2318 PetscFunctionBegin; 2319 *newmat = 0; 2320 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2321 ierr = MatSetType(mat,MATMPIBAIJ);CHKERRQ(ierr); 2322 mat->preallocated = PETSC_TRUE; 2323 mat->assembled = PETSC_TRUE; 2324 a = (Mat_MPIBAIJ*)mat->data; 2325 a->bs = oldmat->bs; 2326 a->bs2 = oldmat->bs2; 2327 a->mbs = oldmat->mbs; 2328 a->nbs = oldmat->nbs; 2329 a->Mbs = oldmat->Mbs; 2330 a->Nbs = oldmat->Nbs; 2331 2332 a->rstart = oldmat->rstart; 2333 a->rend = oldmat->rend; 2334 a->cstart = oldmat->cstart; 2335 a->cend = oldmat->cend; 2336 a->size = oldmat->size; 2337 a->rank = oldmat->rank; 2338 a->donotstash = oldmat->donotstash; 2339 a->roworiented = oldmat->roworiented; 2340 a->rowindices = 0; 2341 a->rowvalues = 0; 2342 a->getrowactive = PETSC_FALSE; 2343 a->barray = 0; 2344 a->rstart_bs = oldmat->rstart_bs; 2345 a->rend_bs = oldmat->rend_bs; 2346 a->cstart_bs = oldmat->cstart_bs; 2347 a->cend_bs = oldmat->cend_bs; 2348 2349 /* hash table stuff */ 2350 a->ht = 0; 2351 a->hd = 0; 2352 a->ht_size = 0; 2353 a->ht_flag = oldmat->ht_flag; 2354 a->ht_fact = oldmat->ht_fact; 2355 a->ht_total_ct = 0; 2356 a->ht_insert_ct = 0; 2357 2358 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2359 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2360 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2361 if (oldmat->colmap) { 2362 #if defined (PETSC_USE_CTABLE) 2363 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2364 #else 2365 a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2366 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2367 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2368 #endif 2369 } else a->colmap = 0; 2370 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2371 a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray); 2372 PLogObjectMemory(mat,len*sizeof(int)); 2373 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2374 } else a->garray = 0; 2375 2376 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2377 PLogObjectParent(mat,a->lvec); 2378 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2379 2380 PLogObjectParent(mat,a->Mvctx); 2381 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2382 PLogObjectParent(mat,a->A); 2383 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2384 PLogObjectParent(mat,a->B); 2385 ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2386 *newmat = mat; 2387 PetscFunctionReturn(0); 2388 } 2389 2390 #include "petscsys.h" 2391 2392 EXTERN_C_BEGIN 2393 #undef __FUNC__ 2394 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIBAIJ" 2395 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2396 { 2397 Mat A; 2398 int i,nz,ierr,j,rstart,rend,fd; 2399 Scalar *vals,*buf; 2400 MPI_Comm comm = ((PetscObject)viewer)->comm; 2401 MPI_Status status; 2402 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2403 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2404 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2405 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2406 int dcount,kmax,k,nzcount,tmp; 2407 2408 PetscFunctionBegin; 2409 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2410 2411 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2412 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2413 if (!rank) { 2414 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2415 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2416 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2417 if (header[3] < 0) { 2418 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ"); 2419 } 2420 } 2421 2422 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2423 M = header[1]; N = header[2]; 2424 2425 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2426 2427 /* 2428 This code adds extra rows to make sure the number of rows is 2429 divisible by the blocksize 2430 */ 2431 Mbs = M/bs; 2432 extra_rows = bs - M + bs*(Mbs); 2433 if (extra_rows == bs) extra_rows = 0; 2434 else Mbs++; 2435 if (extra_rows &&!rank) { 2436 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2437 } 2438 2439 /* determine ownership of all rows */ 2440 mbs = Mbs/size + ((Mbs % size) > rank); 2441 m = mbs*bs; 2442 rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners); 2443 browners = rowners + size + 1; 2444 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2445 rowners[0] = 0; 2446 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2447 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2448 rstart = rowners[rank]; 2449 rend = rowners[rank+1]; 2450 2451 /* distribute row lengths to all processors */ 2452 locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens); 2453 if (!rank) { 2454 rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2455 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2456 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2457 sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts); 2458 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2459 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2460 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2461 } else { 2462 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2463 } 2464 2465 if (!rank) { 2466 /* calculate the number of nonzeros on each processor */ 2467 procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz); 2468 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2469 for (i=0; i<size; i++) { 2470 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2471 procsnz[i] += rowlengths[j]; 2472 } 2473 } 2474 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2475 2476 /* determine max buffer needed and allocate it */ 2477 maxnz = 0; 2478 for (i=0; i<size; i++) { 2479 maxnz = PetscMax(maxnz,procsnz[i]); 2480 } 2481 cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols); 2482 2483 /* read in my part of the matrix column indices */ 2484 nz = procsnz[0]; 2485 ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 2486 mycols = ibuf; 2487 if (size == 1) nz -= extra_rows; 2488 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2489 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2490 2491 /* read in every ones (except the last) and ship off */ 2492 for (i=1; i<size-1; i++) { 2493 nz = procsnz[i]; 2494 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2495 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2496 } 2497 /* read in the stuff for the last proc */ 2498 if (size != 1) { 2499 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2500 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2501 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2502 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2503 } 2504 ierr = PetscFree(cols);CHKERRQ(ierr); 2505 } else { 2506 /* determine buffer space needed for message */ 2507 nz = 0; 2508 for (i=0; i<m; i++) { 2509 nz += locrowlens[i]; 2510 } 2511 ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 2512 mycols = ibuf; 2513 /* receive message of column indices*/ 2514 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2515 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2516 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2517 } 2518 2519 /* loop over local rows, determining number of off diagonal entries */ 2520 dlens = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens); 2521 odlens = dlens + (rend-rstart); 2522 mask = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask); 2523 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2524 masked1 = mask + Mbs; 2525 masked2 = masked1 + Mbs; 2526 rowcount = 0; nzcount = 0; 2527 for (i=0; i<mbs; i++) { 2528 dcount = 0; 2529 odcount = 0; 2530 for (j=0; j<bs; j++) { 2531 kmax = locrowlens[rowcount]; 2532 for (k=0; k<kmax; k++) { 2533 tmp = mycols[nzcount++]/bs; 2534 if (!mask[tmp]) { 2535 mask[tmp] = 1; 2536 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2537 else masked1[dcount++] = tmp; 2538 } 2539 } 2540 rowcount++; 2541 } 2542 2543 dlens[i] = dcount; 2544 odlens[i] = odcount; 2545 2546 /* zero out the mask elements we set */ 2547 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2548 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2549 } 2550 2551 /* create our matrix */ 2552 ierr = MatCreateMPIBAIJ(comm,bs,m,m,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 2553 A = *newmat; 2554 MatSetOption(A,MAT_COLUMNS_SORTED); 2555 2556 if (!rank) { 2557 buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf); 2558 /* read in my part of the matrix numerical values */ 2559 nz = procsnz[0]; 2560 vals = buf; 2561 mycols = ibuf; 2562 if (size == 1) nz -= extra_rows; 2563 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2564 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2565 2566 /* insert into matrix */ 2567 jj = rstart*bs; 2568 for (i=0; i<m; i++) { 2569 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2570 mycols += locrowlens[i]; 2571 vals += locrowlens[i]; 2572 jj++; 2573 } 2574 /* read in other processors (except the last one) and ship out */ 2575 for (i=1; i<size-1; i++) { 2576 nz = procsnz[i]; 2577 vals = buf; 2578 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2579 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2580 } 2581 /* the last proc */ 2582 if (size != 1){ 2583 nz = procsnz[i] - extra_rows; 2584 vals = buf; 2585 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2586 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2587 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2588 } 2589 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2590 } else { 2591 /* receive numeric values */ 2592 buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf); 2593 2594 /* receive message of values*/ 2595 vals = buf; 2596 mycols = ibuf; 2597 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2598 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2599 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2600 2601 /* insert into matrix */ 2602 jj = rstart*bs; 2603 for (i=0; i<m; i++) { 2604 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2605 mycols += locrowlens[i]; 2606 vals += locrowlens[i]; 2607 jj++; 2608 } 2609 } 2610 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2611 ierr = PetscFree(buf);CHKERRQ(ierr); 2612 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2613 ierr = PetscFree(rowners);CHKERRQ(ierr); 2614 ierr = PetscFree(dlens);CHKERRQ(ierr); 2615 ierr = PetscFree(mask);CHKERRQ(ierr); 2616 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2617 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2618 PetscFunctionReturn(0); 2619 } 2620 EXTERN_C_END 2621 2622 #undef __FUNC__ 2623 #define __FUNC__ /*<a name=""></a>*/"MatMPIBAIJSetHashTableFactor" 2624 /*@ 2625 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2626 2627 Input Parameters: 2628 . mat - the matrix 2629 . fact - factor 2630 2631 Collective on Mat 2632 2633 Level: advanced 2634 2635 Notes: 2636 This can also be set by the command line option: -mat_use_hash_table fact 2637 2638 .keywords: matrix, hashtable, factor, HT 2639 2640 .seealso: MatSetOption() 2641 @*/ 2642 int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2643 { 2644 Mat_MPIBAIJ *baij; 2645 int ierr; 2646 PetscTruth flg; 2647 2648 PetscFunctionBegin; 2649 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2650 ierr = PetscTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg);CHKERRQ(ierr); 2651 if (!flg) { 2652 SETERRQ(PETSC_ERR_ARG_WRONG,"Incorrect matrix type. Use MPIBAIJ only."); 2653 } 2654 baij = (Mat_MPIBAIJ*)mat->data; 2655 baij->ht_fact = fact; 2656 PetscFunctionReturn(0); 2657 } 2658