1 /*$Id: mpibaij.c,v 1.203 2000/09/28 21:11:32 bsmith Exp bsmith $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 6 EXTERN int MatSetUpMultiply_MPIBAIJ(Mat); 7 EXTERN int DisAssemble_MPIBAIJ(Mat); 8 EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 9 EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 10 EXTERN int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 11 EXTERN int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 12 EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 13 EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 14 EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 15 EXTERN int MatPrintHelp_SeqBAIJ(Mat); 16 EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,Scalar*); 17 18 /* UGLY, ugly, ugly 19 When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 20 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 21 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 22 converts the entries into single precision and then calls ..._MatScalar() to put them 23 into the single precision data structures. 24 */ 25 #if defined(PETSC_USE_MAT_SINGLE) 26 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 27 EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 28 EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 29 EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 30 EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 31 #else 32 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 33 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 34 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 35 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 36 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 37 #endif 38 39 #undef __FUNC__ 40 #define __FUNC__ /*<a name=""></a>*/"MatGetRowMax_MPIBAIJ" 41 int MatGetRowMax_MPIBAIJ(Mat A,Vec v) 42 { 43 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 44 int ierr,i; 45 PetscReal *va,*vb; 46 Vec vtmp; 47 48 PetscFunctionBegin; 49 50 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 51 ierr = VecGetArry(v,&va);CHKERRQ(ierr); 52 53 ierr = VecCreateSeq(PETSC_COMM_SELF,a->m,&vtmp);CHKERRA(ierr); 54 ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 55 ierr = VecGetArry(vtmp,&vb);CHKERRQ(ierr); 56 57 for (i=0; i<a->m; i++){ 58 if (va[i] < vb[i]) va[i] = vb[i]; 59 } 60 61 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 62 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 63 ierr = VecDestroy(vtmp);CHKERRA(ierr); 64 65 PetscFunctionReturn(0); 66 } 67 68 EXTERN_C_BEGIN 69 #undef __FUNC__ 70 #define __FUNC__ /*<a name="MatStoreValues_MPIBAIJ"></a>*/"MatStoreValues_MPIBAIJ" 71 int MatStoreValues_MPIBAIJ(Mat mat) 72 { 73 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 74 int ierr; 75 76 PetscFunctionBegin; 77 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 78 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 79 PetscFunctionReturn(0); 80 } 81 EXTERN_C_END 82 83 EXTERN_C_BEGIN 84 #undef __FUNC__ 85 #define __FUNC__ /*<a name="MatRetrieveValues_MPIBAIJ"></a>*/"MatRetrieveValues_MPIBAIJ" 86 int MatRetrieveValues_MPIBAIJ(Mat mat) 87 { 88 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 89 int ierr; 90 91 PetscFunctionBegin; 92 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 93 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 94 PetscFunctionReturn(0); 95 } 96 EXTERN_C_END 97 98 /* 99 Local utility routine that creates a mapping from the global column 100 number to the local number in the off-diagonal part of the local 101 storage of the matrix. This is done in a non scable way since the 102 length of colmap equals the global matrix length. 103 */ 104 #undef __FUNC__ 105 #define __FUNC__ /*<a name="CreateColmap_MPIBAIJ_Private"></a>*/"CreateColmap_MPIBAIJ_Private" 106 static int CreateColmap_MPIBAIJ_Private(Mat mat) 107 { 108 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 109 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 110 int nbs = B->nbs,i,bs=B->bs,ierr; 111 112 PetscFunctionBegin; 113 #if defined (PETSC_USE_CTABLE) 114 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 115 for (i=0; i<nbs; i++){ 116 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 117 } 118 #else 119 baij->colmap = (int*)PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 120 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 121 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 122 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 123 #endif 124 PetscFunctionReturn(0); 125 } 126 127 #define CHUNKSIZE 10 128 129 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 130 { \ 131 \ 132 brow = row/bs; \ 133 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 134 rmax = aimax[brow]; nrow = ailen[brow]; \ 135 bcol = col/bs; \ 136 ridx = row % bs; cidx = col % bs; \ 137 low = 0; high = nrow; \ 138 while (high-low > 3) { \ 139 t = (low+high)/2; \ 140 if (rp[t] > bcol) high = t; \ 141 else low = t; \ 142 } \ 143 for (_i=low; _i<high; _i++) { \ 144 if (rp[_i] > bcol) break; \ 145 if (rp[_i] == bcol) { \ 146 bap = ap + bs2*_i + bs*cidx + ridx; \ 147 if (addv == ADD_VALUES) *bap += value; \ 148 else *bap = value; \ 149 goto a_noinsert; \ 150 } \ 151 } \ 152 if (a->nonew == 1) goto a_noinsert; \ 153 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 154 if (nrow >= rmax) { \ 155 /* there is no extra room in row, therefore enlarge */ \ 156 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 157 MatScalar *new_a; \ 158 \ 159 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 160 \ 161 /* malloc new storage space */ \ 162 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 163 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 164 new_j = (int*)(new_a + bs2*new_nz); \ 165 new_i = new_j + new_nz; \ 166 \ 167 /* copy over old data into new slots */ \ 168 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 169 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 170 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 171 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 172 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 173 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 174 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 175 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 176 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 177 /* free up old matrix storage */ \ 178 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 179 if (!a->singlemalloc) { \ 180 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 181 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 182 } \ 183 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 184 a->singlemalloc = PETSC_TRUE; \ 185 \ 186 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 187 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 188 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 189 a->maxnz += bs2*CHUNKSIZE; \ 190 a->reallocs++; \ 191 a->nz++; \ 192 } \ 193 N = nrow++ - 1; \ 194 /* shift up all the later entries in this row */ \ 195 for (ii=N; ii>=_i; ii--) { \ 196 rp[ii+1] = rp[ii]; \ 197 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 198 } \ 199 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 200 rp[_i] = bcol; \ 201 ap[bs2*_i + bs*cidx + ridx] = value; \ 202 a_noinsert:; \ 203 ailen[brow] = nrow; \ 204 } 205 206 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 207 { \ 208 brow = row/bs; \ 209 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 210 rmax = bimax[brow]; nrow = bilen[brow]; \ 211 bcol = col/bs; \ 212 ridx = row % bs; cidx = col % bs; \ 213 low = 0; high = nrow; \ 214 while (high-low > 3) { \ 215 t = (low+high)/2; \ 216 if (rp[t] > bcol) high = t; \ 217 else low = t; \ 218 } \ 219 for (_i=low; _i<high; _i++) { \ 220 if (rp[_i] > bcol) break; \ 221 if (rp[_i] == bcol) { \ 222 bap = ap + bs2*_i + bs*cidx + ridx; \ 223 if (addv == ADD_VALUES) *bap += value; \ 224 else *bap = value; \ 225 goto b_noinsert; \ 226 } \ 227 } \ 228 if (b->nonew == 1) goto b_noinsert; \ 229 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 230 if (nrow >= rmax) { \ 231 /* there is no extra room in row, therefore enlarge */ \ 232 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 233 MatScalar *new_a; \ 234 \ 235 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 236 \ 237 /* malloc new storage space */ \ 238 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 239 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 240 new_j = (int*)(new_a + bs2*new_nz); \ 241 new_i = new_j + new_nz; \ 242 \ 243 /* copy over old data into new slots */ \ 244 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 245 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 246 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 247 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 248 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 249 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 250 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 251 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 252 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 253 /* free up old matrix storage */ \ 254 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 255 if (!b->singlemalloc) { \ 256 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 257 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 258 } \ 259 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 260 b->singlemalloc = PETSC_TRUE; \ 261 \ 262 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 263 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 264 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 265 b->maxnz += bs2*CHUNKSIZE; \ 266 b->reallocs++; \ 267 b->nz++; \ 268 } \ 269 N = nrow++ - 1; \ 270 /* shift up all the later entries in this row */ \ 271 for (ii=N; ii>=_i; ii--) { \ 272 rp[ii+1] = rp[ii]; \ 273 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 274 } \ 275 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 276 rp[_i] = bcol; \ 277 ap[bs2*_i + bs*cidx + ridx] = value; \ 278 b_noinsert:; \ 279 bilen[brow] = nrow; \ 280 } 281 282 #if defined(PETSC_USE_MAT_SINGLE) 283 #undef __FUNC__ 284 #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ" 285 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 286 { 287 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 288 int ierr,i,N = m*n; 289 MatScalar *vsingle; 290 291 PetscFunctionBegin; 292 if (N > b->setvalueslen) { 293 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 294 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 295 b->setvalueslen = N; 296 } 297 vsingle = b->setvaluescopy; 298 299 for (i=0; i<N; i++) { 300 vsingle[i] = v[i]; 301 } 302 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 303 PetscFunctionReturn(0); 304 } 305 306 #undef __FUNC__ 307 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPIBAIJ"></a>*/"MatSetValuesBlocked_MPIBAIJ" 308 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 309 { 310 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 311 int ierr,i,N = m*n*b->bs2; 312 MatScalar *vsingle; 313 314 PetscFunctionBegin; 315 if (N > b->setvalueslen) { 316 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 317 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 318 b->setvalueslen = N; 319 } 320 vsingle = b->setvaluescopy; 321 for (i=0; i<N; i++) { 322 vsingle[i] = v[i]; 323 } 324 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327 328 #undef __FUNC__ 329 #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ_HT"></a>*/"MatSetValues_MPIBAIJ_HT" 330 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 331 { 332 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 333 int ierr,i,N = m*n; 334 MatScalar *vsingle; 335 336 PetscFunctionBegin; 337 if (N > b->setvalueslen) { 338 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 339 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 340 b->setvalueslen = N; 341 } 342 vsingle = b->setvaluescopy; 343 for (i=0; i<N; i++) { 344 vsingle[i] = v[i]; 345 } 346 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 347 PetscFunctionReturn(0); 348 } 349 350 #undef __FUNC__ 351 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPIBAIJ_HT"></a>*/"MatSetValuesBlocked_MPIBAIJ_HT" 352 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 353 { 354 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 355 int ierr,i,N = m*n*b->bs2; 356 MatScalar *vsingle; 357 358 PetscFunctionBegin; 359 if (N > b->setvalueslen) { 360 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 361 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 362 b->setvalueslen = N; 363 } 364 vsingle = b->setvaluescopy; 365 for (i=0; i<N; i++) { 366 vsingle[i] = v[i]; 367 } 368 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 369 PetscFunctionReturn(0); 370 } 371 #endif 372 373 #undef __FUNC__ 374 #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ" 375 int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 376 { 377 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 378 MatScalar value; 379 int ierr,i,j,row,col; 380 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 381 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 382 int cend_orig=baij->cend_bs,bs=baij->bs; 383 384 /* Some Variables required in the macro */ 385 Mat A = baij->A; 386 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 387 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 388 MatScalar *aa=a->a; 389 390 Mat B = baij->B; 391 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 392 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 393 MatScalar *ba=b->a; 394 395 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 396 int low,high,t,ridx,cidx,bs2=a->bs2; 397 MatScalar *ap,*bap; 398 399 PetscFunctionBegin; 400 for (i=0; i<m; i++) { 401 if (im[i] < 0) continue; 402 #if defined(PETSC_USE_BOPT_g) 403 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 404 #endif 405 if (im[i] >= rstart_orig && im[i] < rend_orig) { 406 row = im[i] - rstart_orig; 407 for (j=0; j<n; j++) { 408 if (in[j] >= cstart_orig && in[j] < cend_orig){ 409 col = in[j] - cstart_orig; 410 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 411 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 412 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 413 } else if (in[j] < 0) continue; 414 #if defined(PETSC_USE_BOPT_g) 415 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Col too large");} 416 #endif 417 else { 418 if (mat->was_assembled) { 419 if (!baij->colmap) { 420 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 421 } 422 #if defined (PETSC_USE_CTABLE) 423 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 424 col = col - 1 + in[j]%bs; 425 #else 426 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 427 #endif 428 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 429 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 430 col = in[j]; 431 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 432 B = baij->B; 433 b = (Mat_SeqBAIJ*)(B)->data; 434 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 435 ba=b->a; 436 } 437 } else col = in[j]; 438 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 439 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 440 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 441 } 442 } 443 } else { 444 if (!baij->donotstash) { 445 if (roworiented) { 446 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 447 } else { 448 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 449 } 450 } 451 } 452 } 453 PetscFunctionReturn(0); 454 } 455 456 #undef __FUNC__ 457 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPIBAIJ"></a>*/"MatSetValuesBlocked_MPIBAIJ" 458 int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 459 { 460 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 461 MatScalar *value,*barray=baij->barray; 462 int ierr,i,j,ii,jj,row,col; 463 int roworiented = baij->roworiented,rstart=baij->rstart ; 464 int rend=baij->rend,cstart=baij->cstart,stepval; 465 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 466 467 PetscFunctionBegin; 468 if(!barray) { 469 baij->barray = barray = (MatScalar*)PetscMalloc(bs2*sizeof(MatScalar));CHKPTRQ(barray); 470 } 471 472 if (roworiented) { 473 stepval = (n-1)*bs; 474 } else { 475 stepval = (m-1)*bs; 476 } 477 for (i=0; i<m; i++) { 478 if (im[i] < 0) continue; 479 #if defined(PETSC_USE_BOPT_g) 480 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs); 481 #endif 482 if (im[i] >= rstart && im[i] < rend) { 483 row = im[i] - rstart; 484 for (j=0; j<n; j++) { 485 /* If NumCol = 1 then a copy is not required */ 486 if ((roworiented) && (n == 1)) { 487 barray = v + i*bs2; 488 } else if((!roworiented) && (m == 1)) { 489 barray = v + j*bs2; 490 } else { /* Here a copy is required */ 491 if (roworiented) { 492 value = v + i*(stepval+bs)*bs + j*bs; 493 } else { 494 value = v + j*(stepval+bs)*bs + i*bs; 495 } 496 for (ii=0; ii<bs; ii++,value+=stepval) { 497 for (jj=0; jj<bs; jj++) { 498 *barray++ = *value++; 499 } 500 } 501 barray -=bs2; 502 } 503 504 if (in[j] >= cstart && in[j] < cend){ 505 col = in[j] - cstart; 506 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 507 } 508 else if (in[j] < 0) continue; 509 #if defined(PETSC_USE_BOPT_g) 510 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs);} 511 #endif 512 else { 513 if (mat->was_assembled) { 514 if (!baij->colmap) { 515 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 516 } 517 518 #if defined(PETSC_USE_BOPT_g) 519 #if defined (PETSC_USE_CTABLE) 520 { int data; 521 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 522 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 523 } 524 #else 525 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 526 #endif 527 #endif 528 #if defined (PETSC_USE_CTABLE) 529 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 530 col = (col - 1)/bs; 531 #else 532 col = (baij->colmap[in[j]] - 1)/bs; 533 #endif 534 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 535 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 536 col = in[j]; 537 } 538 } 539 else col = in[j]; 540 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 541 } 542 } 543 } else { 544 if (!baij->donotstash) { 545 if (roworiented) { 546 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 547 } else { 548 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 549 } 550 } 551 } 552 } 553 PetscFunctionReturn(0); 554 } 555 556 #define HASH_KEY 0.6180339887 557 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 558 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 559 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 560 #undef __FUNC__ 561 #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ_HT_MatScalar"></a>*/"MatSetValues_MPIBAIJ_HT_MatScalar" 562 int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 563 { 564 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 565 int ierr,i,j,row,col; 566 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 567 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 568 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 569 PetscReal tmp; 570 MatScalar **HD = baij->hd,value; 571 #if defined(PETSC_USE_BOPT_g) 572 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 573 #endif 574 575 PetscFunctionBegin; 576 577 for (i=0; i<m; i++) { 578 #if defined(PETSC_USE_BOPT_g) 579 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 580 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 581 #endif 582 row = im[i]; 583 if (row >= rstart_orig && row < rend_orig) { 584 for (j=0; j<n; j++) { 585 col = in[j]; 586 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 587 /* Look up into the Hash Table */ 588 key = (row/bs)*Nbs+(col/bs)+1; 589 h1 = HASH(size,key,tmp); 590 591 592 idx = h1; 593 #if defined(PETSC_USE_BOPT_g) 594 insert_ct++; 595 total_ct++; 596 if (HT[idx] != key) { 597 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 598 if (idx == size) { 599 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 600 if (idx == h1) { 601 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 602 } 603 } 604 } 605 #else 606 if (HT[idx] != key) { 607 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 608 if (idx == size) { 609 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 610 if (idx == h1) { 611 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 612 } 613 } 614 } 615 #endif 616 /* A HASH table entry is found, so insert the values at the correct address */ 617 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 618 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 619 } 620 } else { 621 if (!baij->donotstash) { 622 if (roworiented) { 623 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 624 } else { 625 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 626 } 627 } 628 } 629 } 630 #if defined(PETSC_USE_BOPT_g) 631 baij->ht_total_ct = total_ct; 632 baij->ht_insert_ct = insert_ct; 633 #endif 634 PetscFunctionReturn(0); 635 } 636 637 #undef __FUNC__ 638 #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 639 int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 640 { 641 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 642 int ierr,i,j,ii,jj,row,col; 643 int roworiented = baij->roworiented,rstart=baij->rstart ; 644 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 645 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 646 PetscReal tmp; 647 MatScalar **HD = baij->hd,*baij_a; 648 MatScalar *v_t,*value; 649 #if defined(PETSC_USE_BOPT_g) 650 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 651 #endif 652 653 PetscFunctionBegin; 654 655 if (roworiented) { 656 stepval = (n-1)*bs; 657 } else { 658 stepval = (m-1)*bs; 659 } 660 for (i=0; i<m; i++) { 661 #if defined(PETSC_USE_BOPT_g) 662 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 663 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 664 #endif 665 row = im[i]; 666 v_t = v + i*bs2; 667 if (row >= rstart && row < rend) { 668 for (j=0; j<n; j++) { 669 col = in[j]; 670 671 /* Look up into the Hash Table */ 672 key = row*Nbs+col+1; 673 h1 = HASH(size,key,tmp); 674 675 idx = h1; 676 #if defined(PETSC_USE_BOPT_g) 677 total_ct++; 678 insert_ct++; 679 if (HT[idx] != key) { 680 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 681 if (idx == size) { 682 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 683 if (idx == h1) { 684 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 685 } 686 } 687 } 688 #else 689 if (HT[idx] != key) { 690 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 691 if (idx == size) { 692 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 693 if (idx == h1) { 694 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 695 } 696 } 697 } 698 #endif 699 baij_a = HD[idx]; 700 if (roworiented) { 701 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 702 /* value = v + (i*(stepval+bs)+j)*bs; */ 703 value = v_t; 704 v_t += bs; 705 if (addv == ADD_VALUES) { 706 for (ii=0; ii<bs; ii++,value+=stepval) { 707 for (jj=ii; jj<bs2; jj+=bs) { 708 baij_a[jj] += *value++; 709 } 710 } 711 } else { 712 for (ii=0; ii<bs; ii++,value+=stepval) { 713 for (jj=ii; jj<bs2; jj+=bs) { 714 baij_a[jj] = *value++; 715 } 716 } 717 } 718 } else { 719 value = v + j*(stepval+bs)*bs + i*bs; 720 if (addv == ADD_VALUES) { 721 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 722 for (jj=0; jj<bs; jj++) { 723 baij_a[jj] += *value++; 724 } 725 } 726 } else { 727 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 728 for (jj=0; jj<bs; jj++) { 729 baij_a[jj] = *value++; 730 } 731 } 732 } 733 } 734 } 735 } else { 736 if (!baij->donotstash) { 737 if (roworiented) { 738 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 739 } else { 740 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 741 } 742 } 743 } 744 } 745 #if defined(PETSC_USE_BOPT_g) 746 baij->ht_total_ct = total_ct; 747 baij->ht_insert_ct = insert_ct; 748 #endif 749 PetscFunctionReturn(0); 750 } 751 752 #undef __FUNC__ 753 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIBAIJ" 754 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 755 { 756 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 757 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 758 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 759 760 PetscFunctionBegin; 761 for (i=0; i<m; i++) { 762 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 763 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 764 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 765 row = idxm[i] - bsrstart; 766 for (j=0; j<n; j++) { 767 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 768 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 769 if (idxn[j] >= bscstart && idxn[j] < bscend){ 770 col = idxn[j] - bscstart; 771 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 772 } else { 773 if (!baij->colmap) { 774 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 775 } 776 #if defined (PETSC_USE_CTABLE) 777 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 778 data --; 779 #else 780 data = baij->colmap[idxn[j]/bs]-1; 781 #endif 782 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 783 else { 784 col = data + idxn[j]%bs; 785 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 786 } 787 } 788 } 789 } else { 790 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 791 } 792 } 793 PetscFunctionReturn(0); 794 } 795 796 #undef __FUNC__ 797 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPIBAIJ" 798 int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *norm) 799 { 800 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 801 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 802 int ierr,i,bs2=baij->bs2; 803 PetscReal sum = 0.0; 804 MatScalar *v; 805 806 PetscFunctionBegin; 807 if (baij->size == 1) { 808 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 809 } else { 810 if (type == NORM_FROBENIUS) { 811 v = amat->a; 812 for (i=0; i<amat->nz*bs2; i++) { 813 #if defined(PETSC_USE_COMPLEX) 814 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 815 #else 816 sum += (*v)*(*v); v++; 817 #endif 818 } 819 v = bmat->a; 820 for (i=0; i<bmat->nz*bs2; i++) { 821 #if defined(PETSC_USE_COMPLEX) 822 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 823 #else 824 sum += (*v)*(*v); v++; 825 #endif 826 } 827 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 828 *norm = sqrt(*norm); 829 } else { 830 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 831 } 832 } 833 PetscFunctionReturn(0); 834 } 835 836 837 /* 838 Creates the hash table, and sets the table 839 This table is created only once. 840 If new entried need to be added to the matrix 841 then the hash table has to be destroyed and 842 recreated. 843 */ 844 #undef __FUNC__ 845 #define __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPIBAIJ_Private" 846 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 847 { 848 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 849 Mat A = baij->A,B=baij->B; 850 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 851 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 852 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 853 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 854 int *HT,key; 855 MatScalar **HD; 856 PetscReal tmp; 857 #if defined(PETSC_USE_BOPT_g) 858 int ct=0,max=0; 859 #endif 860 861 PetscFunctionBegin; 862 baij->ht_size=(int)(factor*nz); 863 size = baij->ht_size; 864 865 if (baij->ht) { 866 PetscFunctionReturn(0); 867 } 868 869 /* Allocate Memory for Hash Table */ 870 baij->hd = (MatScalar**)PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1);CHKPTRQ(baij->hd); 871 baij->ht = (int*)(baij->hd + size); 872 HD = baij->hd; 873 HT = baij->ht; 874 875 876 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr); 877 878 879 /* Loop Over A */ 880 for (i=0; i<a->mbs; i++) { 881 for (j=ai[i]; j<ai[i+1]; j++) { 882 row = i+rstart; 883 col = aj[j]+cstart; 884 885 key = row*Nbs + col + 1; 886 h1 = HASH(size,key,tmp); 887 for (k=0; k<size; k++){ 888 if (HT[(h1+k)%size] == 0.0) { 889 HT[(h1+k)%size] = key; 890 HD[(h1+k)%size] = a->a + j*bs2; 891 break; 892 #if defined(PETSC_USE_BOPT_g) 893 } else { 894 ct++; 895 #endif 896 } 897 } 898 #if defined(PETSC_USE_BOPT_g) 899 if (k> max) max = k; 900 #endif 901 } 902 } 903 /* Loop Over B */ 904 for (i=0; i<b->mbs; i++) { 905 for (j=bi[i]; j<bi[i+1]; j++) { 906 row = i+rstart; 907 col = garray[bj[j]]; 908 key = row*Nbs + col + 1; 909 h1 = HASH(size,key,tmp); 910 for (k=0; k<size; k++){ 911 if (HT[(h1+k)%size] == 0.0) { 912 HT[(h1+k)%size] = key; 913 HD[(h1+k)%size] = b->a + j*bs2; 914 break; 915 #if defined(PETSC_USE_BOPT_g) 916 } else { 917 ct++; 918 #endif 919 } 920 } 921 #if defined(PETSC_USE_BOPT_g) 922 if (k> max) max = k; 923 #endif 924 } 925 } 926 927 /* Print Summary */ 928 #if defined(PETSC_USE_BOPT_g) 929 for (i=0,j=0; i<size; i++) { 930 if (HT[i]) {j++;} 931 } 932 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max); 933 #endif 934 PetscFunctionReturn(0); 935 } 936 937 #undef __FUNC__ 938 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIBAIJ" 939 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 940 { 941 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 942 int ierr,nstash,reallocs; 943 InsertMode addv; 944 945 PetscFunctionBegin; 946 if (baij->donotstash) { 947 PetscFunctionReturn(0); 948 } 949 950 /* make sure all processors are either in INSERTMODE or ADDMODE */ 951 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 952 if (addv == (ADD_VALUES|INSERT_VALUES)) { 953 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 954 } 955 mat->insertmode = addv; /* in case this processor had no cache */ 956 957 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 958 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 959 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 960 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 961 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 962 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 963 PetscFunctionReturn(0); 964 } 965 966 #undef __FUNC__ 967 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIBAIJ" 968 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 969 { 970 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 971 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 972 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 973 int *row,*col,other_disassembled; 974 PetscTruth r1,r2,r3; 975 MatScalar *val; 976 InsertMode addv = mat->insertmode; 977 978 PetscFunctionBegin; 979 if (!baij->donotstash) { 980 while (1) { 981 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 982 if (!flg) break; 983 984 for (i=0; i<n;) { 985 /* Now identify the consecutive vals belonging to the same row */ 986 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 987 if (j < n) ncols = j-i; 988 else ncols = n-i; 989 /* Now assemble all these values with a single function call */ 990 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 991 i = j; 992 } 993 } 994 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 995 /* Now process the block-stash. Since the values are stashed column-oriented, 996 set the roworiented flag to column oriented, and after MatSetValues() 997 restore the original flags */ 998 r1 = baij->roworiented; 999 r2 = a->roworiented; 1000 r3 = b->roworiented; 1001 baij->roworiented = PETSC_FALSE; 1002 a->roworiented = PETSC_FALSE; 1003 b->roworiented = PETSC_FALSE; 1004 while (1) { 1005 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1006 if (!flg) break; 1007 1008 for (i=0; i<n;) { 1009 /* Now identify the consecutive vals belonging to the same row */ 1010 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1011 if (j < n) ncols = j-i; 1012 else ncols = n-i; 1013 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1014 i = j; 1015 } 1016 } 1017 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1018 baij->roworiented = r1; 1019 a->roworiented = r2; 1020 b->roworiented = r3; 1021 } 1022 1023 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1024 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1025 1026 /* determine if any processor has disassembled, if so we must 1027 also disassemble ourselfs, in order that we may reassemble. */ 1028 /* 1029 if nonzero structure of submatrix B cannot change then we know that 1030 no processor disassembled thus we can skip this stuff 1031 */ 1032 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1033 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1034 if (mat->was_assembled && !other_disassembled) { 1035 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1036 } 1037 } 1038 1039 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1040 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1041 } 1042 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1043 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1044 1045 #if defined(PETSC_USE_BOPT_g) 1046 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1047 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct); 1048 baij->ht_total_ct = 0; 1049 baij->ht_insert_ct = 0; 1050 } 1051 #endif 1052 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1053 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1054 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1055 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1056 } 1057 1058 if (baij->rowvalues) { 1059 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1060 baij->rowvalues = 0; 1061 } 1062 PetscFunctionReturn(0); 1063 } 1064 1065 #undef __FUNC__ 1066 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ_ASCIIorDraworSocket" 1067 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 1068 { 1069 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1070 int ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank; 1071 PetscTruth isascii,isdraw; 1072 Viewer sviewer; 1073 1074 PetscFunctionBegin; 1075 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 1076 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 1077 if (isascii) { 1078 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1079 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 1080 MatInfo info; 1081 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1082 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1083 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 1084 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 1085 baij->bs,(int)info.memory);CHKERRQ(ierr); 1086 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1087 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1088 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1089 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1090 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 1091 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1092 PetscFunctionReturn(0); 1093 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 1094 ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 1095 PetscFunctionReturn(0); 1096 } 1097 } 1098 1099 if (isdraw) { 1100 Draw draw; 1101 PetscTruth isnull; 1102 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1103 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1104 } 1105 1106 if (size == 1) { 1107 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1108 } else { 1109 /* assemble the entire matrix onto first processor. */ 1110 Mat A; 1111 Mat_SeqBAIJ *Aloc; 1112 int M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1113 MatScalar *a; 1114 1115 if (!rank) { 1116 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1117 } else { 1118 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1119 } 1120 PLogObjectParent(mat,A); 1121 1122 /* copy over the A part */ 1123 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1124 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1125 rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1126 1127 for (i=0; i<mbs; i++) { 1128 rvals[0] = bs*(baij->rstart + i); 1129 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1130 for (j=ai[i]; j<ai[i+1]; j++) { 1131 col = (baij->cstart+aj[j])*bs; 1132 for (k=0; k<bs; k++) { 1133 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1134 col++; a += bs; 1135 } 1136 } 1137 } 1138 /* copy over the B part */ 1139 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1140 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1141 for (i=0; i<mbs; i++) { 1142 rvals[0] = bs*(baij->rstart + i); 1143 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1144 for (j=ai[i]; j<ai[i+1]; j++) { 1145 col = baij->garray[aj[j]]*bs; 1146 for (k=0; k<bs; k++) { 1147 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1148 col++; a += bs; 1149 } 1150 } 1151 } 1152 ierr = PetscFree(rvals);CHKERRQ(ierr); 1153 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1154 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1155 /* 1156 Everyone has to call to draw the matrix since the graphics waits are 1157 synchronized across all processors that share the Draw object 1158 */ 1159 ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1160 if (!rank) { 1161 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1162 } 1163 ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1164 ierr = MatDestroy(A);CHKERRQ(ierr); 1165 } 1166 PetscFunctionReturn(0); 1167 } 1168 1169 #undef __FUNC__ 1170 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ" 1171 int MatView_MPIBAIJ(Mat mat,Viewer viewer) 1172 { 1173 int ierr; 1174 PetscTruth isascii,isdraw,issocket,isbinary; 1175 1176 PetscFunctionBegin; 1177 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 1178 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 1179 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 1180 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 1181 if (isascii || isdraw || issocket || isbinary) { 1182 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1183 } else { 1184 SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1185 } 1186 PetscFunctionReturn(0); 1187 } 1188 1189 #undef __FUNC__ 1190 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIBAIJ" 1191 int MatDestroy_MPIBAIJ(Mat mat) 1192 { 1193 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1194 int ierr; 1195 1196 PetscFunctionBegin; 1197 1198 if (mat->mapping) { 1199 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 1200 } 1201 if (mat->bmapping) { 1202 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 1203 } 1204 if (mat->rmap) { 1205 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 1206 } 1207 if (mat->cmap) { 1208 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 1209 } 1210 #if defined(PETSC_USE_LOG) 1211 PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N); 1212 #endif 1213 1214 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1215 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1216 1217 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1218 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1219 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1220 #if defined (PETSC_USE_CTABLE) 1221 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1222 #else 1223 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1224 #endif 1225 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1226 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1227 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1228 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1229 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1230 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1231 #if defined(PETSC_USE_MAT_SINGLE) 1232 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 1233 #endif 1234 ierr = PetscFree(baij);CHKERRQ(ierr); 1235 PLogObjectDestroy(mat); 1236 PetscHeaderDestroy(mat); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 #undef __FUNC__ 1241 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPIBAIJ" 1242 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1243 { 1244 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1245 int ierr,nt; 1246 1247 PetscFunctionBegin; 1248 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1249 if (nt != a->n) { 1250 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1251 } 1252 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1253 if (nt != a->m) { 1254 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1255 } 1256 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1257 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1258 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1259 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1260 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1261 PetscFunctionReturn(0); 1262 } 1263 1264 #undef __FUNC__ 1265 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIBAIJ" 1266 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1267 { 1268 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1269 int ierr; 1270 1271 PetscFunctionBegin; 1272 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1273 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1274 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1275 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1276 PetscFunctionReturn(0); 1277 } 1278 1279 #undef __FUNC__ 1280 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIBAIJ" 1281 int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1282 { 1283 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1284 int ierr; 1285 1286 PetscFunctionBegin; 1287 /* do nondiagonal part */ 1288 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1289 /* send it on its way */ 1290 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1291 /* do local part */ 1292 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1293 /* receive remote parts: note this assumes the values are not actually */ 1294 /* inserted in yy until the next line, which is true for my implementation*/ 1295 /* but is not perhaps always true. */ 1296 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1297 PetscFunctionReturn(0); 1298 } 1299 1300 #undef __FUNC__ 1301 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIBAIJ" 1302 int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1303 { 1304 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1305 int ierr; 1306 1307 PetscFunctionBegin; 1308 /* do nondiagonal part */ 1309 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1310 /* send it on its way */ 1311 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1312 /* do local part */ 1313 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1314 /* receive remote parts: note this assumes the values are not actually */ 1315 /* inserted in yy until the next line, which is true for my implementation*/ 1316 /* but is not perhaps always true. */ 1317 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1318 PetscFunctionReturn(0); 1319 } 1320 1321 /* 1322 This only works correctly for square matrices where the subblock A->A is the 1323 diagonal block 1324 */ 1325 #undef __FUNC__ 1326 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIBAIJ" 1327 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1328 { 1329 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1330 int ierr; 1331 1332 PetscFunctionBegin; 1333 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1334 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1335 PetscFunctionReturn(0); 1336 } 1337 1338 #undef __FUNC__ 1339 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPIBAIJ" 1340 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1341 { 1342 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1343 int ierr; 1344 1345 PetscFunctionBegin; 1346 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1347 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1348 PetscFunctionReturn(0); 1349 } 1350 1351 #undef __FUNC__ 1352 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIBAIJ" 1353 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1354 { 1355 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1356 1357 PetscFunctionBegin; 1358 if (m) *m = mat->rstart*mat->bs; 1359 if (n) *n = mat->rend*mat->bs; 1360 PetscFunctionReturn(0); 1361 } 1362 1363 #undef __FUNC__ 1364 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIBAIJ" 1365 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1366 { 1367 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1368 Scalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1369 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1370 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1371 int *cmap,*idx_p,cstart = mat->cstart; 1372 1373 PetscFunctionBegin; 1374 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1375 mat->getrowactive = PETSC_TRUE; 1376 1377 if (!mat->rowvalues && (idx || v)) { 1378 /* 1379 allocate enough space to hold information from the longest row. 1380 */ 1381 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1382 int max = 1,mbs = mat->mbs,tmp; 1383 for (i=0; i<mbs; i++) { 1384 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1385 if (max < tmp) { max = tmp; } 1386 } 1387 mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1388 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1389 } 1390 1391 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1392 lrow = row - brstart; 1393 1394 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1395 if (!v) {pvA = 0; pvB = 0;} 1396 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1397 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1398 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1399 nztot = nzA + nzB; 1400 1401 cmap = mat->garray; 1402 if (v || idx) { 1403 if (nztot) { 1404 /* Sort by increasing column numbers, assuming A and B already sorted */ 1405 int imark = -1; 1406 if (v) { 1407 *v = v_p = mat->rowvalues; 1408 for (i=0; i<nzB; i++) { 1409 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1410 else break; 1411 } 1412 imark = i; 1413 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1414 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1415 } 1416 if (idx) { 1417 *idx = idx_p = mat->rowindices; 1418 if (imark > -1) { 1419 for (i=0; i<imark; i++) { 1420 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1421 } 1422 } else { 1423 for (i=0; i<nzB; i++) { 1424 if (cmap[cworkB[i]/bs] < cstart) 1425 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1426 else break; 1427 } 1428 imark = i; 1429 } 1430 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1431 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1432 } 1433 } else { 1434 if (idx) *idx = 0; 1435 if (v) *v = 0; 1436 } 1437 } 1438 *nz = nztot; 1439 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1440 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1441 PetscFunctionReturn(0); 1442 } 1443 1444 #undef __FUNC__ 1445 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIBAIJ" 1446 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1447 { 1448 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1449 1450 PetscFunctionBegin; 1451 if (baij->getrowactive == PETSC_FALSE) { 1452 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1453 } 1454 baij->getrowactive = PETSC_FALSE; 1455 PetscFunctionReturn(0); 1456 } 1457 1458 #undef __FUNC__ 1459 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIBAIJ" 1460 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1461 { 1462 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1463 1464 PetscFunctionBegin; 1465 *bs = baij->bs; 1466 PetscFunctionReturn(0); 1467 } 1468 1469 #undef __FUNC__ 1470 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIBAIJ" 1471 int MatZeroEntries_MPIBAIJ(Mat A) 1472 { 1473 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1474 int ierr; 1475 1476 PetscFunctionBegin; 1477 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1478 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1479 PetscFunctionReturn(0); 1480 } 1481 1482 #undef __FUNC__ 1483 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIBAIJ" 1484 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1485 { 1486 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1487 Mat A = a->A,B = a->B; 1488 int ierr; 1489 PetscReal isend[5],irecv[5]; 1490 1491 PetscFunctionBegin; 1492 info->block_size = (double)a->bs; 1493 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1494 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1495 isend[3] = info->memory; isend[4] = info->mallocs; 1496 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1497 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1498 isend[3] += info->memory; isend[4] += info->mallocs; 1499 if (flag == MAT_LOCAL) { 1500 info->nz_used = isend[0]; 1501 info->nz_allocated = isend[1]; 1502 info->nz_unneeded = isend[2]; 1503 info->memory = isend[3]; 1504 info->mallocs = isend[4]; 1505 } else if (flag == MAT_GLOBAL_MAX) { 1506 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1507 info->nz_used = irecv[0]; 1508 info->nz_allocated = irecv[1]; 1509 info->nz_unneeded = irecv[2]; 1510 info->memory = irecv[3]; 1511 info->mallocs = irecv[4]; 1512 } else if (flag == MAT_GLOBAL_SUM) { 1513 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1514 info->nz_used = irecv[0]; 1515 info->nz_allocated = irecv[1]; 1516 info->nz_unneeded = irecv[2]; 1517 info->memory = irecv[3]; 1518 info->mallocs = irecv[4]; 1519 } else { 1520 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1521 } 1522 info->rows_global = (double)a->M; 1523 info->columns_global = (double)a->N; 1524 info->rows_local = (double)a->m; 1525 info->columns_local = (double)a->N; 1526 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1527 info->fill_ratio_needed = 0; 1528 info->factor_mallocs = 0; 1529 PetscFunctionReturn(0); 1530 } 1531 1532 #undef __FUNC__ 1533 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIBAIJ" 1534 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1535 { 1536 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1537 int ierr; 1538 1539 PetscFunctionBegin; 1540 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1541 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1542 op == MAT_COLUMNS_UNSORTED || 1543 op == MAT_COLUMNS_SORTED || 1544 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1545 op == MAT_KEEP_ZEROED_ROWS || 1546 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1547 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1548 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1549 } else if (op == MAT_ROW_ORIENTED) { 1550 a->roworiented = PETSC_TRUE; 1551 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1552 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1553 } else if (op == MAT_ROWS_SORTED || 1554 op == MAT_ROWS_UNSORTED || 1555 op == MAT_SYMMETRIC || 1556 op == MAT_STRUCTURALLY_SYMMETRIC || 1557 op == MAT_YES_NEW_DIAGONALS || 1558 op == MAT_USE_HASH_TABLE) { 1559 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1560 } else if (op == MAT_COLUMN_ORIENTED) { 1561 a->roworiented = PETSC_FALSE; 1562 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1563 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1564 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1565 a->donotstash = PETSC_TRUE; 1566 } else if (op == MAT_NO_NEW_DIAGONALS) { 1567 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1568 } else if (op == MAT_USE_HASH_TABLE) { 1569 a->ht_flag = PETSC_TRUE; 1570 } else { 1571 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1572 } 1573 PetscFunctionReturn(0); 1574 } 1575 1576 #undef __FUNC__ 1577 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIBAIJ(" 1578 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1579 { 1580 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1581 Mat_SeqBAIJ *Aloc; 1582 Mat B; 1583 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1584 int bs=baij->bs,mbs=baij->mbs; 1585 MatScalar *a; 1586 1587 PetscFunctionBegin; 1588 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1589 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1590 1591 /* copy over the A part */ 1592 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1593 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1594 rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1595 1596 for (i=0; i<mbs; i++) { 1597 rvals[0] = bs*(baij->rstart + i); 1598 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1599 for (j=ai[i]; j<ai[i+1]; j++) { 1600 col = (baij->cstart+aj[j])*bs; 1601 for (k=0; k<bs; k++) { 1602 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1603 col++; a += bs; 1604 } 1605 } 1606 } 1607 /* copy over the B part */ 1608 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1609 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1610 for (i=0; i<mbs; i++) { 1611 rvals[0] = bs*(baij->rstart + i); 1612 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1613 for (j=ai[i]; j<ai[i+1]; j++) { 1614 col = baij->garray[aj[j]]*bs; 1615 for (k=0; k<bs; k++) { 1616 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1617 col++; a += bs; 1618 } 1619 } 1620 } 1621 ierr = PetscFree(rvals);CHKERRQ(ierr); 1622 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1623 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1624 1625 if (matout) { 1626 *matout = B; 1627 } else { 1628 PetscOps *Abops; 1629 MatOps Aops; 1630 1631 /* This isn't really an in-place transpose .... but free data structures from baij */ 1632 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1633 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1634 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1635 #if defined (PETSC_USE_CTABLE) 1636 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1637 #else 1638 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1639 #endif 1640 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1641 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1642 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1643 ierr = PetscFree(baij);CHKERRQ(ierr); 1644 1645 /* 1646 This is horrible, horrible code. We need to keep the 1647 A pointers for the bops and ops but copy everything 1648 else from C. 1649 */ 1650 Abops = A->bops; 1651 Aops = A->ops; 1652 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1653 A->bops = Abops; 1654 A->ops = Aops; 1655 1656 PetscHeaderDestroy(B); 1657 } 1658 PetscFunctionReturn(0); 1659 } 1660 1661 #undef __FUNC__ 1662 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIBAIJ" 1663 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1664 { 1665 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1666 Mat a = baij->A,b = baij->B; 1667 int ierr,s1,s2,s3; 1668 1669 PetscFunctionBegin; 1670 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1671 if (rr) { 1672 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1673 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1674 /* Overlap communication with computation. */ 1675 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1676 } 1677 if (ll) { 1678 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1679 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1680 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1681 } 1682 /* scale the diagonal block */ 1683 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1684 1685 if (rr) { 1686 /* Do a scatter end and then right scale the off-diagonal block */ 1687 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1688 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1689 } 1690 1691 PetscFunctionReturn(0); 1692 } 1693 1694 #undef __FUNC__ 1695 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIBAIJ" 1696 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1697 { 1698 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1699 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1700 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1701 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1702 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1703 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1704 MPI_Comm comm = A->comm; 1705 MPI_Request *send_waits,*recv_waits; 1706 MPI_Status recv_status,*send_status; 1707 IS istmp; 1708 1709 PetscFunctionBegin; 1710 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 1711 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1712 1713 /* first count number of contributors to each processor */ 1714 nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs); 1715 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1716 procs = nprocs + size; 1717 owner = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 1718 for (i=0; i<N; i++) { 1719 idx = rows[i]; 1720 found = 0; 1721 for (j=0; j<size; j++) { 1722 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1723 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1724 } 1725 } 1726 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1727 } 1728 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 1729 1730 /* inform other processors of number of messages and max length*/ 1731 work = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work); 1732 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 1733 nmax = work[rank]; 1734 nrecvs = work[size+rank]; 1735 ierr = PetscFree(work);CHKERRQ(ierr); 1736 1737 /* post receives: */ 1738 rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 1739 recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1740 for (i=0; i<nrecvs; i++) { 1741 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1742 } 1743 1744 /* do sends: 1745 1) starts[i] gives the starting index in svalues for stuff going to 1746 the ith processor 1747 */ 1748 svalues = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues); 1749 send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1750 starts = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts); 1751 starts[0] = 0; 1752 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1753 for (i=0; i<N; i++) { 1754 svalues[starts[owner[i]]++] = rows[i]; 1755 } 1756 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1757 1758 starts[0] = 0; 1759 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1760 count = 0; 1761 for (i=0; i<size; i++) { 1762 if (procs[i]) { 1763 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1764 } 1765 } 1766 ierr = PetscFree(starts);CHKERRQ(ierr); 1767 1768 base = owners[rank]*bs; 1769 1770 /* wait on receives */ 1771 lens = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens); 1772 source = lens + nrecvs; 1773 count = nrecvs; slen = 0; 1774 while (count) { 1775 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1776 /* unpack receives into our local space */ 1777 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1778 source[imdex] = recv_status.MPI_SOURCE; 1779 lens[imdex] = n; 1780 slen += n; 1781 count--; 1782 } 1783 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1784 1785 /* move the data into the send scatter */ 1786 lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows); 1787 count = 0; 1788 for (i=0; i<nrecvs; i++) { 1789 values = rvalues + i*nmax; 1790 for (j=0; j<lens[i]; j++) { 1791 lrows[count++] = values[j] - base; 1792 } 1793 } 1794 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1795 ierr = PetscFree(lens);CHKERRQ(ierr); 1796 ierr = PetscFree(owner);CHKERRQ(ierr); 1797 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1798 1799 /* actually zap the local rows */ 1800 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1801 PLogObjectParent(A,istmp); 1802 1803 /* 1804 Zero the required rows. If the "diagonal block" of the matrix 1805 is square and the user wishes to set the diagonal we use seperate 1806 code so that MatSetValues() is not called for each diagonal allocating 1807 new memory, thus calling lots of mallocs and slowing things down. 1808 1809 Contributed by: Mathew Knepley 1810 */ 1811 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1812 ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1813 if (diag && (l->A->M == l->A->N)) { 1814 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1815 } else if (diag) { 1816 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1817 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1818 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1819 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1820 } 1821 for (i=0; i<slen; i++) { 1822 row = lrows[i] + rstart_bs; 1823 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1824 } 1825 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1826 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1827 } else { 1828 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1829 } 1830 1831 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1832 ierr = PetscFree(lrows);CHKERRQ(ierr); 1833 1834 /* wait on sends */ 1835 if (nsends) { 1836 send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1837 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1838 ierr = PetscFree(send_status);CHKERRQ(ierr); 1839 } 1840 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1841 ierr = PetscFree(svalues);CHKERRQ(ierr); 1842 1843 PetscFunctionReturn(0); 1844 } 1845 1846 #undef __FUNC__ 1847 #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPIBAIJ" 1848 int MatPrintHelp_MPIBAIJ(Mat A) 1849 { 1850 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1851 MPI_Comm comm = A->comm; 1852 static int called = 0; 1853 int ierr; 1854 1855 PetscFunctionBegin; 1856 if (!a->rank) { 1857 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1858 } 1859 if (called) {PetscFunctionReturn(0);} else called = 1; 1860 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1861 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1862 PetscFunctionReturn(0); 1863 } 1864 1865 #undef __FUNC__ 1866 #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPIBAIJ" 1867 int MatSetUnfactored_MPIBAIJ(Mat A) 1868 { 1869 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1870 int ierr; 1871 1872 PetscFunctionBegin; 1873 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1874 PetscFunctionReturn(0); 1875 } 1876 1877 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1878 1879 #undef __FUNC__ 1880 #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPIBAIJ" 1881 int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1882 { 1883 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1884 Mat a,b,c,d; 1885 PetscTruth flg; 1886 int ierr; 1887 1888 PetscFunctionBegin; 1889 if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1890 a = matA->A; b = matA->B; 1891 c = matB->A; d = matB->B; 1892 1893 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1894 if (flg == PETSC_TRUE) { 1895 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1896 } 1897 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1898 PetscFunctionReturn(0); 1899 } 1900 1901 /* -------------------------------------------------------------------*/ 1902 static struct _MatOps MatOps_Values = { 1903 MatSetValues_MPIBAIJ, 1904 MatGetRow_MPIBAIJ, 1905 MatRestoreRow_MPIBAIJ, 1906 MatMult_MPIBAIJ, 1907 MatMultAdd_MPIBAIJ, 1908 MatMultTranspose_MPIBAIJ, 1909 MatMultTransposeAdd_MPIBAIJ, 1910 0, 1911 0, 1912 0, 1913 0, 1914 0, 1915 0, 1916 0, 1917 MatTranspose_MPIBAIJ, 1918 MatGetInfo_MPIBAIJ, 1919 MatEqual_MPIBAIJ, 1920 MatGetDiagonal_MPIBAIJ, 1921 MatDiagonalScale_MPIBAIJ, 1922 MatNorm_MPIBAIJ, 1923 MatAssemblyBegin_MPIBAIJ, 1924 MatAssemblyEnd_MPIBAIJ, 1925 0, 1926 MatSetOption_MPIBAIJ, 1927 MatZeroEntries_MPIBAIJ, 1928 MatZeroRows_MPIBAIJ, 1929 0, 1930 0, 1931 0, 1932 0, 1933 MatGetOwnershipRange_MPIBAIJ, 1934 0, 1935 0, 1936 0, 1937 0, 1938 MatDuplicate_MPIBAIJ, 1939 0, 1940 0, 1941 0, 1942 0, 1943 0, 1944 MatGetSubMatrices_MPIBAIJ, 1945 MatIncreaseOverlap_MPIBAIJ, 1946 MatGetValues_MPIBAIJ, 1947 0, 1948 MatPrintHelp_MPIBAIJ, 1949 MatScale_MPIBAIJ, 1950 0, 1951 0, 1952 0, 1953 MatGetBlockSize_MPIBAIJ, 1954 0, 1955 0, 1956 0, 1957 0, 1958 0, 1959 0, 1960 MatSetUnfactored_MPIBAIJ, 1961 0, 1962 MatSetValuesBlocked_MPIBAIJ, 1963 0, 1964 MatDestroy_MPIBAIJ, 1965 MatView_MPIBAIJ, 1966 MatGetMaps_Petsc, 1967 0, 1968 0, 1969 0, 1970 0, 1971 0, 1972 0, 1973 MatGetRowMax_MPIBAIJ}; 1974 1975 1976 EXTERN_C_BEGIN 1977 #undef __FUNC__ 1978 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPIBAIJ" 1979 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1980 { 1981 PetscFunctionBegin; 1982 *a = ((Mat_MPIBAIJ *)A->data)->A; 1983 *iscopy = PETSC_FALSE; 1984 PetscFunctionReturn(0); 1985 } 1986 EXTERN_C_END 1987 1988 #undef __FUNC__ 1989 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPIBAIJ" 1990 /*@C 1991 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1992 (block compressed row). For good matrix assembly performance 1993 the user should preallocate the matrix storage by setting the parameters 1994 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1995 performance can be increased by more than a factor of 50. 1996 1997 Collective on MPI_Comm 1998 1999 Input Parameters: 2000 + comm - MPI communicator 2001 . bs - size of blockk 2002 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2003 This value should be the same as the local size used in creating the 2004 y vector for the matrix-vector product y = Ax. 2005 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2006 This value should be the same as the local size used in creating the 2007 x vector for the matrix-vector product y = Ax. 2008 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2009 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2010 . d_nz - number of block nonzeros per block row in diagonal portion of local 2011 submatrix (same for all local rows) 2012 . d_nnz - array containing the number of block nonzeros in the various block rows 2013 of the in diagonal portion of the local (possibly different for each block 2014 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2015 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2016 submatrix (same for all local rows). 2017 - o_nnz - array containing the number of nonzeros in the various block rows of the 2018 off-diagonal portion of the local submatrix (possibly different for 2019 each block row) or PETSC_NULL. 2020 2021 Output Parameter: 2022 . A - the matrix 2023 2024 Options Database Keys: 2025 . -mat_no_unroll - uses code that does not unroll the loops in the 2026 block calculations (much slower) 2027 . -mat_block_size - size of the blocks to use 2028 . -mat_mpi - use the parallel matrix data structures even on one processor 2029 (defaults to using SeqBAIJ format on one processor) 2030 2031 Notes: 2032 The user MUST specify either the local or global matrix dimensions 2033 (possibly both). 2034 2035 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2036 than it must be used on all processors that share the object for that argument. 2037 2038 Storage Information: 2039 For a square global matrix we define each processor's diagonal portion 2040 to be its local rows and the corresponding columns (a square submatrix); 2041 each processor's off-diagonal portion encompasses the remainder of the 2042 local matrix (a rectangular submatrix). 2043 2044 The user can specify preallocated storage for the diagonal part of 2045 the local submatrix with either d_nz or d_nnz (not both). Set 2046 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2047 memory allocation. Likewise, specify preallocated storage for the 2048 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2049 2050 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2051 the figure below we depict these three local rows and all columns (0-11). 2052 2053 .vb 2054 0 1 2 3 4 5 6 7 8 9 10 11 2055 ------------------- 2056 row 3 | o o o d d d o o o o o o 2057 row 4 | o o o d d d o o o o o o 2058 row 5 | o o o d d d o o o o o o 2059 ------------------- 2060 .ve 2061 2062 Thus, any entries in the d locations are stored in the d (diagonal) 2063 submatrix, and any entries in the o locations are stored in the 2064 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2065 stored simply in the MATSEQBAIJ format for compressed row storage. 2066 2067 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2068 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2069 In general, for PDE problems in which most nonzeros are near the diagonal, 2070 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2071 or you will get TERRIBLE performance; see the users' manual chapter on 2072 matrices. 2073 2074 Level: intermediate 2075 2076 .keywords: matrix, block, aij, compressed row, sparse, parallel 2077 2078 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2079 @*/ 2080 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2081 { 2082 Mat B; 2083 Mat_MPIBAIJ *b; 2084 int ierr,i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 2085 PetscTruth flag1,flag2,flg; 2086 2087 PetscFunctionBegin; 2088 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2089 2090 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2091 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than -1: value %d",d_nz); 2092 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than -1: value %d",o_nz); 2093 if (d_nnz) { 2094 for (i=0; i<m/bs; i++) { 2095 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 2096 } 2097 } 2098 if (o_nnz) { 2099 for (i=0; i<m/bs; i++) { 2100 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 2101 } 2102 } 2103 2104 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2105 ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr); 2106 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 2107 if (!flag1 && !flag2 && size == 1) { 2108 if (M == PETSC_DECIDE) M = m; 2109 if (N == PETSC_DECIDE) N = n; 2110 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 2111 PetscFunctionReturn(0); 2112 } 2113 2114 *A = 0; 2115 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 2116 PLogObjectCreate(B); 2117 B->data = (void*)(b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b); 2118 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2119 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2120 B->mapping = 0; 2121 B->factor = 0; 2122 B->assembled = PETSC_FALSE; 2123 2124 B->insertmode = NOT_SET_VALUES; 2125 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 2126 ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr); 2127 2128 if (m == PETSC_DECIDE && (d_nnz || o_nnz)) { 2129 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 2130 } 2131 if (M == PETSC_DECIDE && m == PETSC_DECIDE) { 2132 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"either M or m should be specified"); 2133 } 2134 if (N == PETSC_DECIDE && n == PETSC_DECIDE) { 2135 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"either N or n should be specified"); 2136 } 2137 if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 2138 if (N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 2139 2140 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 2141 work[0] = m; work[1] = n; 2142 mbs = m/bs; nbs = n/bs; 2143 ierr = MPI_Allreduce(work,sum,2,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2144 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 2145 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 2146 } 2147 if (m == PETSC_DECIDE) { 2148 Mbs = M/bs; 2149 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,"No of global rows must be divisible by blocksize"); 2150 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 2151 m = mbs*bs; 2152 } 2153 if (n == PETSC_DECIDE) { 2154 Nbs = N/bs; 2155 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,"No of global cols must be divisible by blocksize"); 2156 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 2157 n = nbs*bs; 2158 } 2159 if (mbs*bs != m || nbs*bs != n) { 2160 SETERRQ(PETSC_ERR_ARG_SIZ,"No of local rows, cols must be divisible by blocksize"); 2161 } 2162 2163 b->m = m; B->m = m; 2164 b->n = n; B->n = n; 2165 b->N = N; B->N = N; 2166 b->M = M; B->M = M; 2167 b->bs = bs; 2168 b->bs2 = bs*bs; 2169 b->mbs = mbs; 2170 b->nbs = nbs; 2171 b->Mbs = Mbs; 2172 b->Nbs = Nbs; 2173 2174 /* the information in the maps duplicates the information computed below, eventually 2175 we should remove the duplicate information that is not contained in the maps */ 2176 ierr = MapCreateMPI(B->comm,m,M,&B->rmap);CHKERRQ(ierr); 2177 ierr = MapCreateMPI(B->comm,n,N,&B->cmap);CHKERRQ(ierr); 2178 2179 /* build local table of row and column ownerships */ 2180 b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 2181 PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2182 b->cowners = b->rowners + b->size + 2; 2183 b->rowners_bs = b->cowners + b->size + 2; 2184 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2185 b->rowners[0] = 0; 2186 for (i=2; i<=b->size; i++) { 2187 b->rowners[i] += b->rowners[i-1]; 2188 } 2189 for (i=0; i<=b->size; i++) { 2190 b->rowners_bs[i] = b->rowners[i]*bs; 2191 } 2192 b->rstart = b->rowners[b->rank]; 2193 b->rend = b->rowners[b->rank+1]; 2194 b->rstart_bs = b->rstart * bs; 2195 b->rend_bs = b->rend * bs; 2196 2197 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2198 b->cowners[0] = 0; 2199 for (i=2; i<=b->size; i++) { 2200 b->cowners[i] += b->cowners[i-1]; 2201 } 2202 b->cstart = b->cowners[b->rank]; 2203 b->cend = b->cowners[b->rank+1]; 2204 b->cstart_bs = b->cstart * bs; 2205 b->cend_bs = b->cend * bs; 2206 2207 2208 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2209 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2210 PLogObjectParent(B,b->A); 2211 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2212 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2213 PLogObjectParent(B,b->B); 2214 2215 /* build cache for off array entries formed */ 2216 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2217 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2218 b->donotstash = PETSC_FALSE; 2219 b->colmap = PETSC_NULL; 2220 b->garray = PETSC_NULL; 2221 b->roworiented = PETSC_TRUE; 2222 2223 #if defined(PEYSC_USE_MAT_SINGLE) 2224 /* stuff for MatSetValues_XXX in single precision */ 2225 b->lensetvalues = 0; 2226 b->setvaluescopy = PETSC_NULL; 2227 #endif 2228 2229 /* stuff used in block assembly */ 2230 b->barray = 0; 2231 2232 /* stuff used for matrix vector multiply */ 2233 b->lvec = 0; 2234 b->Mvctx = 0; 2235 2236 /* stuff for MatGetRow() */ 2237 b->rowindices = 0; 2238 b->rowvalues = 0; 2239 b->getrowactive = PETSC_FALSE; 2240 2241 /* hash table stuff */ 2242 b->ht = 0; 2243 b->hd = 0; 2244 b->ht_size = 0; 2245 b->ht_flag = PETSC_FALSE; 2246 b->ht_fact = 0; 2247 b->ht_total_ct = 0; 2248 b->ht_insert_ct = 0; 2249 2250 *A = B; 2251 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2252 if (flg) { 2253 double fact = 1.39; 2254 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2255 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2256 if (fact <= 1.0) fact = 1.39; 2257 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2258 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2259 } 2260 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2261 "MatStoreValues_MPIBAIJ", 2262 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2263 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2264 "MatRetrieveValues_MPIBAIJ", 2265 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2266 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2267 "MatGetDiagonalBlock_MPIBAIJ", 2268 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2269 PetscFunctionReturn(0); 2270 } 2271 2272 #undef __FUNC__ 2273 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIBAIJ" 2274 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2275 { 2276 Mat mat; 2277 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2278 int ierr,len=0; 2279 PetscTruth flg; 2280 2281 PetscFunctionBegin; 2282 *newmat = 0; 2283 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 2284 PLogObjectCreate(mat); 2285 mat->data = (void*)(a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a); 2286 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2287 mat->factor = matin->factor; 2288 mat->assembled = PETSC_TRUE; 2289 mat->insertmode = NOT_SET_VALUES; 2290 2291 a->m = mat->m = oldmat->m; 2292 a->n = mat->n = oldmat->n; 2293 a->M = mat->M = oldmat->M; 2294 a->N = mat->N = oldmat->N; 2295 2296 a->bs = oldmat->bs; 2297 a->bs2 = oldmat->bs2; 2298 a->mbs = oldmat->mbs; 2299 a->nbs = oldmat->nbs; 2300 a->Mbs = oldmat->Mbs; 2301 a->Nbs = oldmat->Nbs; 2302 2303 a->rstart = oldmat->rstart; 2304 a->rend = oldmat->rend; 2305 a->cstart = oldmat->cstart; 2306 a->cend = oldmat->cend; 2307 a->size = oldmat->size; 2308 a->rank = oldmat->rank; 2309 a->donotstash = oldmat->donotstash; 2310 a->roworiented = oldmat->roworiented; 2311 a->rowindices = 0; 2312 a->rowvalues = 0; 2313 a->getrowactive = PETSC_FALSE; 2314 a->barray = 0; 2315 a->rstart_bs = oldmat->rstart_bs; 2316 a->rend_bs = oldmat->rend_bs; 2317 a->cstart_bs = oldmat->cstart_bs; 2318 a->cend_bs = oldmat->cend_bs; 2319 2320 /* hash table stuff */ 2321 a->ht = 0; 2322 a->hd = 0; 2323 a->ht_size = 0; 2324 a->ht_flag = oldmat->ht_flag; 2325 a->ht_fact = oldmat->ht_fact; 2326 a->ht_total_ct = 0; 2327 a->ht_insert_ct = 0; 2328 2329 2330 a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 2331 PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2332 a->cowners = a->rowners + a->size + 2; 2333 a->rowners_bs = a->cowners + a->size + 2; 2334 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2335 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2336 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2337 if (oldmat->colmap) { 2338 #if defined (PETSC_USE_CTABLE) 2339 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2340 #else 2341 a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2342 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2343 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2344 #endif 2345 } else a->colmap = 0; 2346 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2347 a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray); 2348 PLogObjectMemory(mat,len*sizeof(int)); 2349 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2350 } else a->garray = 0; 2351 2352 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2353 PLogObjectParent(mat,a->lvec); 2354 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2355 2356 PLogObjectParent(mat,a->Mvctx); 2357 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2358 PLogObjectParent(mat,a->A); 2359 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2360 PLogObjectParent(mat,a->B); 2361 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 2362 ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2363 if (flg) { 2364 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 2365 } 2366 *newmat = mat; 2367 PetscFunctionReturn(0); 2368 } 2369 2370 #include "petscsys.h" 2371 2372 #undef __FUNC__ 2373 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIBAIJ" 2374 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2375 { 2376 Mat A; 2377 int i,nz,ierr,j,rstart,rend,fd; 2378 Scalar *vals,*buf; 2379 MPI_Comm comm = ((PetscObject)viewer)->comm; 2380 MPI_Status status; 2381 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2382 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2383 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2384 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2385 int dcount,kmax,k,nzcount,tmp; 2386 2387 PetscFunctionBegin; 2388 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2389 2390 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2391 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2392 if (!rank) { 2393 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2394 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2395 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2396 if (header[3] < 0) { 2397 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ"); 2398 } 2399 } 2400 2401 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2402 M = header[1]; N = header[2]; 2403 2404 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2405 2406 /* 2407 This code adds extra rows to make sure the number of rows is 2408 divisible by the blocksize 2409 */ 2410 Mbs = M/bs; 2411 extra_rows = bs - M + bs*(Mbs); 2412 if (extra_rows == bs) extra_rows = 0; 2413 else Mbs++; 2414 if (extra_rows &&!rank) { 2415 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2416 } 2417 2418 /* determine ownership of all rows */ 2419 mbs = Mbs/size + ((Mbs % size) > rank); 2420 m = mbs * bs; 2421 rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners); 2422 browners = rowners + size + 1; 2423 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2424 rowners[0] = 0; 2425 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2426 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2427 rstart = rowners[rank]; 2428 rend = rowners[rank+1]; 2429 2430 /* distribute row lengths to all processors */ 2431 locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens); 2432 if (!rank) { 2433 rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2434 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2435 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2436 sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts); 2437 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2438 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2439 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2440 } else { 2441 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2442 } 2443 2444 if (!rank) { 2445 /* calculate the number of nonzeros on each processor */ 2446 procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz); 2447 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2448 for (i=0; i<size; i++) { 2449 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2450 procsnz[i] += rowlengths[j]; 2451 } 2452 } 2453 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2454 2455 /* determine max buffer needed and allocate it */ 2456 maxnz = 0; 2457 for (i=0; i<size; i++) { 2458 maxnz = PetscMax(maxnz,procsnz[i]); 2459 } 2460 cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols); 2461 2462 /* read in my part of the matrix column indices */ 2463 nz = procsnz[0]; 2464 ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 2465 mycols = ibuf; 2466 if (size == 1) nz -= extra_rows; 2467 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2468 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2469 2470 /* read in every ones (except the last) and ship off */ 2471 for (i=1; i<size-1; i++) { 2472 nz = procsnz[i]; 2473 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2474 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2475 } 2476 /* read in the stuff for the last proc */ 2477 if (size != 1) { 2478 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2479 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2480 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2481 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2482 } 2483 ierr = PetscFree(cols);CHKERRQ(ierr); 2484 } else { 2485 /* determine buffer space needed for message */ 2486 nz = 0; 2487 for (i=0; i<m; i++) { 2488 nz += locrowlens[i]; 2489 } 2490 ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 2491 mycols = ibuf; 2492 /* receive message of column indices*/ 2493 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2494 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2495 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2496 } 2497 2498 /* loop over local rows, determining number of off diagonal entries */ 2499 dlens = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens); 2500 odlens = dlens + (rend-rstart); 2501 mask = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask); 2502 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2503 masked1 = mask + Mbs; 2504 masked2 = masked1 + Mbs; 2505 rowcount = 0; nzcount = 0; 2506 for (i=0; i<mbs; i++) { 2507 dcount = 0; 2508 odcount = 0; 2509 for (j=0; j<bs; j++) { 2510 kmax = locrowlens[rowcount]; 2511 for (k=0; k<kmax; k++) { 2512 tmp = mycols[nzcount++]/bs; 2513 if (!mask[tmp]) { 2514 mask[tmp] = 1; 2515 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2516 else masked1[dcount++] = tmp; 2517 } 2518 } 2519 rowcount++; 2520 } 2521 2522 dlens[i] = dcount; 2523 odlens[i] = odcount; 2524 2525 /* zero out the mask elements we set */ 2526 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2527 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2528 } 2529 2530 /* create our matrix */ 2531 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 2532 A = *newmat; 2533 MatSetOption(A,MAT_COLUMNS_SORTED); 2534 2535 if (!rank) { 2536 buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf); 2537 /* read in my part of the matrix numerical values */ 2538 nz = procsnz[0]; 2539 vals = buf; 2540 mycols = ibuf; 2541 if (size == 1) nz -= extra_rows; 2542 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2543 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2544 2545 /* insert into matrix */ 2546 jj = rstart*bs; 2547 for (i=0; i<m; i++) { 2548 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2549 mycols += locrowlens[i]; 2550 vals += locrowlens[i]; 2551 jj++; 2552 } 2553 /* read in other processors (except the last one) and ship out */ 2554 for (i=1; i<size-1; i++) { 2555 nz = procsnz[i]; 2556 vals = buf; 2557 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2558 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2559 } 2560 /* the last proc */ 2561 if (size != 1){ 2562 nz = procsnz[i] - extra_rows; 2563 vals = buf; 2564 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2565 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2566 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2567 } 2568 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2569 } else { 2570 /* receive numeric values */ 2571 buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf); 2572 2573 /* receive message of values*/ 2574 vals = buf; 2575 mycols = ibuf; 2576 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2577 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2578 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2579 2580 /* insert into matrix */ 2581 jj = rstart*bs; 2582 for (i=0; i<m; i++) { 2583 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2584 mycols += locrowlens[i]; 2585 vals += locrowlens[i]; 2586 jj++; 2587 } 2588 } 2589 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2590 ierr = PetscFree(buf);CHKERRQ(ierr); 2591 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2592 ierr = PetscFree(rowners);CHKERRQ(ierr); 2593 ierr = PetscFree(dlens);CHKERRQ(ierr); 2594 ierr = PetscFree(mask);CHKERRQ(ierr); 2595 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2596 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2597 PetscFunctionReturn(0); 2598 } 2599 2600 #undef __FUNC__ 2601 #define __FUNC__ /*<a name=""></a>*/"MatMPIBAIJSetHashTableFactor" 2602 /*@ 2603 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2604 2605 Input Parameters: 2606 . mat - the matrix 2607 . fact - factor 2608 2609 Collective on Mat 2610 2611 Level: advanced 2612 2613 Notes: 2614 This can also be set by the command line option: -mat_use_hash_table fact 2615 2616 .keywords: matrix, hashtable, factor, HT 2617 2618 .seealso: MatSetOption() 2619 @*/ 2620 int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2621 { 2622 Mat_MPIBAIJ *baij; 2623 2624 PetscFunctionBegin; 2625 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2626 if (mat->type != MATMPIBAIJ) { 2627 SETERRQ(PETSC_ERR_ARG_WRONG,"Incorrect matrix type. Use MPIBAIJ only."); 2628 } 2629 baij = (Mat_MPIBAIJ*)mat->data; 2630 baij->ht_fact = fact; 2631 PetscFunctionReturn(0); 2632 } 2633