1 /*$Id: mpibaij.c,v 1.200 2000/06/30 18:55:36 bsmith Exp bsmith $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 6 EXTERN int MatSetUpMultiply_MPIBAIJ(Mat); 7 EXTERN int DisAssemble_MPIBAIJ(Mat); 8 EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 9 EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 10 EXTERN int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *); 11 EXTERN int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 12 EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 13 EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 14 EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 15 EXTERN int MatPrintHelp_SeqBAIJ(Mat); 16 EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,Scalar*); 17 18 /* UGLY, ugly, ugly 19 When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 20 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 21 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 22 converts the entries into single precision and then calls ..._MatScalar() to put them 23 into the single precision data structures. 24 */ 25 #if defined(PETSC_USE_MAT_SINGLE) 26 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 27 EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 28 EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 29 EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 30 EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 31 #else 32 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 33 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 34 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 35 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 36 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 37 #endif 38 39 EXTERN_C_BEGIN 40 #undef __FUNC__ 41 #define __FUNC__ /*<a name="MatStoreValues_MPIBAIJ"></a>*/"MatStoreValues_MPIBAIJ" 42 int MatStoreValues_MPIBAIJ(Mat mat) 43 { 44 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 45 int ierr; 46 47 PetscFunctionBegin; 48 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 49 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 50 PetscFunctionReturn(0); 51 } 52 EXTERN_C_END 53 54 EXTERN_C_BEGIN 55 #undef __FUNC__ 56 #define __FUNC__ /*<a name="MatRetrieveValues_MPIBAIJ"></a>*/"MatRetrieveValues_MPIBAIJ" 57 int MatRetrieveValues_MPIBAIJ(Mat mat) 58 { 59 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 60 int ierr; 61 62 PetscFunctionBegin; 63 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 64 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 65 PetscFunctionReturn(0); 66 } 67 EXTERN_C_END 68 69 /* 70 Local utility routine that creates a mapping from the global column 71 number to the local number in the off-diagonal part of the local 72 storage of the matrix. This is done in a non scable way since the 73 length of colmap equals the global matrix length. 74 */ 75 #undef __FUNC__ 76 #define __FUNC__ /*<a name="CreateColmap_MPIBAIJ_Private"></a>*/"CreateColmap_MPIBAIJ_Private" 77 static int CreateColmap_MPIBAIJ_Private(Mat mat) 78 { 79 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 80 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 81 int nbs = B->nbs,i,bs=B->bs,ierr; 82 83 PetscFunctionBegin; 84 #if defined (PETSC_USE_CTABLE) 85 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 86 for (i=0; i<nbs; i++){ 87 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 88 } 89 #else 90 baij->colmap = (int*)PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 91 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 92 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 93 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 94 #endif 95 PetscFunctionReturn(0); 96 } 97 98 #define CHUNKSIZE 10 99 100 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 101 { \ 102 \ 103 brow = row/bs; \ 104 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 105 rmax = aimax[brow]; nrow = ailen[brow]; \ 106 bcol = col/bs; \ 107 ridx = row % bs; cidx = col % bs; \ 108 low = 0; high = nrow; \ 109 while (high-low > 3) { \ 110 t = (low+high)/2; \ 111 if (rp[t] > bcol) high = t; \ 112 else low = t; \ 113 } \ 114 for (_i=low; _i<high; _i++) { \ 115 if (rp[_i] > bcol) break; \ 116 if (rp[_i] == bcol) { \ 117 bap = ap + bs2*_i + bs*cidx + ridx; \ 118 if (addv == ADD_VALUES) *bap += value; \ 119 else *bap = value; \ 120 goto a_noinsert; \ 121 } \ 122 } \ 123 if (a->nonew == 1) goto a_noinsert; \ 124 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 125 if (nrow >= rmax) { \ 126 /* there is no extra room in row, therefore enlarge */ \ 127 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 128 MatScalar *new_a; \ 129 \ 130 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 131 \ 132 /* malloc new storage space */ \ 133 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 134 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 135 new_j = (int*)(new_a + bs2*new_nz); \ 136 new_i = new_j + new_nz; \ 137 \ 138 /* copy over old data into new slots */ \ 139 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 140 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 141 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 142 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 143 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 144 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 145 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 146 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 147 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 148 /* free up old matrix storage */ \ 149 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 150 if (!a->singlemalloc) { \ 151 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 152 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 153 } \ 154 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 155 a->singlemalloc = PETSC_TRUE; \ 156 \ 157 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 158 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 159 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 160 a->maxnz += bs2*CHUNKSIZE; \ 161 a->reallocs++; \ 162 a->nz++; \ 163 } \ 164 N = nrow++ - 1; \ 165 /* shift up all the later entries in this row */ \ 166 for (ii=N; ii>=_i; ii--) { \ 167 rp[ii+1] = rp[ii]; \ 168 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 169 } \ 170 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 171 rp[_i] = bcol; \ 172 ap[bs2*_i + bs*cidx + ridx] = value; \ 173 a_noinsert:; \ 174 ailen[brow] = nrow; \ 175 } 176 177 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 178 { \ 179 brow = row/bs; \ 180 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 181 rmax = bimax[brow]; nrow = bilen[brow]; \ 182 bcol = col/bs; \ 183 ridx = row % bs; cidx = col % bs; \ 184 low = 0; high = nrow; \ 185 while (high-low > 3) { \ 186 t = (low+high)/2; \ 187 if (rp[t] > bcol) high = t; \ 188 else low = t; \ 189 } \ 190 for (_i=low; _i<high; _i++) { \ 191 if (rp[_i] > bcol) break; \ 192 if (rp[_i] == bcol) { \ 193 bap = ap + bs2*_i + bs*cidx + ridx; \ 194 if (addv == ADD_VALUES) *bap += value; \ 195 else *bap = value; \ 196 goto b_noinsert; \ 197 } \ 198 } \ 199 if (b->nonew == 1) goto b_noinsert; \ 200 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 201 if (nrow >= rmax) { \ 202 /* there is no extra room in row, therefore enlarge */ \ 203 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 204 MatScalar *new_a; \ 205 \ 206 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 207 \ 208 /* malloc new storage space */ \ 209 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 210 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 211 new_j = (int*)(new_a + bs2*new_nz); \ 212 new_i = new_j + new_nz; \ 213 \ 214 /* copy over old data into new slots */ \ 215 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 216 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 217 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 218 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 219 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 220 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 221 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 222 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 223 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 224 /* free up old matrix storage */ \ 225 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 226 if (!b->singlemalloc) { \ 227 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 228 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 229 } \ 230 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 231 b->singlemalloc = PETSC_TRUE; \ 232 \ 233 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 234 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 235 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 236 b->maxnz += bs2*CHUNKSIZE; \ 237 b->reallocs++; \ 238 b->nz++; \ 239 } \ 240 N = nrow++ - 1; \ 241 /* shift up all the later entries in this row */ \ 242 for (ii=N; ii>=_i; ii--) { \ 243 rp[ii+1] = rp[ii]; \ 244 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 245 } \ 246 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 247 rp[_i] = bcol; \ 248 ap[bs2*_i + bs*cidx + ridx] = value; \ 249 b_noinsert:; \ 250 bilen[brow] = nrow; \ 251 } 252 253 #if defined(PETSC_USE_MAT_SINGLE) 254 #undef __FUNC__ 255 #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ" 256 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 257 { 258 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 259 int ierr,i,N = m*n; 260 MatScalar *vsingle; 261 262 PetscFunctionBegin; 263 if (N > b->setvalueslen) { 264 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 265 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 266 b->setvalueslen = N; 267 } 268 vsingle = b->setvaluescopy; 269 270 for (i=0; i<N; i++) { 271 vsingle[i] = v[i]; 272 } 273 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 274 PetscFunctionReturn(0); 275 } 276 277 #undef __FUNC__ 278 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPIBAIJ"></a>*/"MatSetValuesBlocked_MPIBAIJ" 279 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 280 { 281 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 282 int ierr,i,N = m*n*b->bs2; 283 MatScalar *vsingle; 284 285 PetscFunctionBegin; 286 if (N > b->setvalueslen) { 287 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 288 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 289 b->setvalueslen = N; 290 } 291 vsingle = b->setvaluescopy; 292 for (i=0; i<N; i++) { 293 vsingle[i] = v[i]; 294 } 295 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 296 PetscFunctionReturn(0); 297 } 298 299 #undef __FUNC__ 300 #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ_HT"></a>*/"MatSetValues_MPIBAIJ_HT" 301 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 302 { 303 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 304 int ierr,i,N = m*n; 305 MatScalar *vsingle; 306 307 PetscFunctionBegin; 308 if (N > b->setvalueslen) { 309 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 310 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 311 b->setvalueslen = N; 312 } 313 vsingle = b->setvaluescopy; 314 for (i=0; i<N; i++) { 315 vsingle[i] = v[i]; 316 } 317 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNC__ 322 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPIBAIJ_HT"></a>*/"MatSetValuesBlocked_MPIBAIJ_HT" 323 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 324 { 325 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 326 int ierr,i,N = m*n*b->bs2; 327 MatScalar *vsingle; 328 329 PetscFunctionBegin; 330 if (N > b->setvalueslen) { 331 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 332 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 333 b->setvalueslen = N; 334 } 335 vsingle = b->setvaluescopy; 336 for (i=0; i<N; i++) { 337 vsingle[i] = v[i]; 338 } 339 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 340 PetscFunctionReturn(0); 341 } 342 #endif 343 344 #undef __FUNC__ 345 #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ" 346 int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 347 { 348 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 349 MatScalar value; 350 int ierr,i,j,row,col; 351 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 352 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 353 int cend_orig=baij->cend_bs,bs=baij->bs; 354 355 /* Some Variables required in the macro */ 356 Mat A = baij->A; 357 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 358 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 359 MatScalar *aa=a->a; 360 361 Mat B = baij->B; 362 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 363 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 364 MatScalar *ba=b->a; 365 366 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 367 int low,high,t,ridx,cidx,bs2=a->bs2; 368 MatScalar *ap,*bap; 369 370 PetscFunctionBegin; 371 for (i=0; i<m; i++) { 372 if (im[i] < 0) continue; 373 #if defined(PETSC_USE_BOPT_g) 374 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 375 #endif 376 if (im[i] >= rstart_orig && im[i] < rend_orig) { 377 row = im[i] - rstart_orig; 378 for (j=0; j<n; j++) { 379 if (in[j] >= cstart_orig && in[j] < cend_orig){ 380 col = in[j] - cstart_orig; 381 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 382 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 383 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 384 } else if (in[j] < 0) continue; 385 #if defined(PETSC_USE_BOPT_g) 386 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 387 #endif 388 else { 389 if (mat->was_assembled) { 390 if (!baij->colmap) { 391 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 392 } 393 #if defined (PETSC_USE_CTABLE) 394 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 395 col = col - 1 + in[j]%bs; 396 #else 397 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 398 #endif 399 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 400 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 401 col = in[j]; 402 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 403 B = baij->B; 404 b = (Mat_SeqBAIJ*)(B)->data; 405 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 406 ba=b->a; 407 } 408 } else col = in[j]; 409 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 410 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 411 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 412 } 413 } 414 } else { 415 if (!baij->donotstash) { 416 if (roworiented) { 417 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 418 } else { 419 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 420 } 421 } 422 } 423 } 424 PetscFunctionReturn(0); 425 } 426 427 #undef __FUNC__ 428 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPIBAIJ"></a>*/"MatSetValuesBlocked_MPIBAIJ" 429 int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 430 { 431 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 432 MatScalar *value,*barray=baij->barray; 433 int ierr,i,j,ii,jj,row,col; 434 int roworiented = baij->roworiented,rstart=baij->rstart ; 435 int rend=baij->rend,cstart=baij->cstart,stepval; 436 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 437 438 PetscFunctionBegin; 439 if(!barray) { 440 baij->barray = barray = (MatScalar*)PetscMalloc(bs2*sizeof(MatScalar));CHKPTRQ(barray); 441 } 442 443 if (roworiented) { 444 stepval = (n-1)*bs; 445 } else { 446 stepval = (m-1)*bs; 447 } 448 for (i=0; i<m; i++) { 449 if (im[i] < 0) continue; 450 #if defined(PETSC_USE_BOPT_g) 451 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 452 #endif 453 if (im[i] >= rstart && im[i] < rend) { 454 row = im[i] - rstart; 455 for (j=0; j<n; j++) { 456 /* If NumCol = 1 then a copy is not required */ 457 if ((roworiented) && (n == 1)) { 458 barray = v + i*bs2; 459 } else if((!roworiented) && (m == 1)) { 460 barray = v + j*bs2; 461 } else { /* Here a copy is required */ 462 if (roworiented) { 463 value = v + i*(stepval+bs)*bs + j*bs; 464 } else { 465 value = v + j*(stepval+bs)*bs + i*bs; 466 } 467 for (ii=0; ii<bs; ii++,value+=stepval) { 468 for (jj=0; jj<bs; jj++) { 469 *barray++ = *value++; 470 } 471 } 472 barray -=bs2; 473 } 474 475 if (in[j] >= cstart && in[j] < cend){ 476 col = in[j] - cstart; 477 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 478 } 479 else if (in[j] < 0) continue; 480 #if defined(PETSC_USE_BOPT_g) 481 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 482 #endif 483 else { 484 if (mat->was_assembled) { 485 if (!baij->colmap) { 486 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 487 } 488 489 #if defined(PETSC_USE_BOPT_g) 490 #if defined (PETSC_USE_CTABLE) 491 { int data; 492 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 493 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap"); 494 } 495 #else 496 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap"); 497 #endif 498 #endif 499 #if defined (PETSC_USE_CTABLE) 500 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 501 col = (col - 1)/bs; 502 #else 503 col = (baij->colmap[in[j]] - 1)/bs; 504 #endif 505 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 506 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 507 col = in[j]; 508 } 509 } 510 else col = in[j]; 511 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 512 } 513 } 514 } else { 515 if (!baij->donotstash) { 516 if (roworiented) { 517 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 518 } else { 519 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 520 } 521 } 522 } 523 } 524 PetscFunctionReturn(0); 525 } 526 527 #define HASH_KEY 0.6180339887 528 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 529 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 530 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 531 #undef __FUNC__ 532 #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ_HT_MatScalar"></a>*/"MatSetValues_MPIBAIJ_HT_MatScalar" 533 int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 534 { 535 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 536 int ierr,i,j,row,col; 537 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 538 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 539 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 540 PetscReal tmp; 541 MatScalar **HD = baij->hd,value; 542 #if defined(PETSC_USE_BOPT_g) 543 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 544 #endif 545 546 PetscFunctionBegin; 547 548 for (i=0; i<m; i++) { 549 #if defined(PETSC_USE_BOPT_g) 550 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 551 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 552 #endif 553 row = im[i]; 554 if (row >= rstart_orig && row < rend_orig) { 555 for (j=0; j<n; j++) { 556 col = in[j]; 557 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 558 /* Look up into the Hash Table */ 559 key = (row/bs)*Nbs+(col/bs)+1; 560 h1 = HASH(size,key,tmp); 561 562 563 idx = h1; 564 #if defined(PETSC_USE_BOPT_g) 565 insert_ct++; 566 total_ct++; 567 if (HT[idx] != key) { 568 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 569 if (idx == size) { 570 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 571 if (idx == h1) { 572 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 573 } 574 } 575 } 576 #else 577 if (HT[idx] != key) { 578 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 579 if (idx == size) { 580 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 581 if (idx == h1) { 582 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 583 } 584 } 585 } 586 #endif 587 /* A HASH table entry is found, so insert the values at the correct address */ 588 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 589 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 590 } 591 } else { 592 if (!baij->donotstash) { 593 if (roworiented) { 594 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 595 } else { 596 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 597 } 598 } 599 } 600 } 601 #if defined(PETSC_USE_BOPT_g) 602 baij->ht_total_ct = total_ct; 603 baij->ht_insert_ct = insert_ct; 604 #endif 605 PetscFunctionReturn(0); 606 } 607 608 #undef __FUNC__ 609 #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 610 int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 611 { 612 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 613 int ierr,i,j,ii,jj,row,col; 614 int roworiented = baij->roworiented,rstart=baij->rstart ; 615 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 616 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 617 PetscReal tmp; 618 MatScalar **HD = baij->hd,*baij_a; 619 MatScalar *v_t,*value; 620 #if defined(PETSC_USE_BOPT_g) 621 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 622 #endif 623 624 PetscFunctionBegin; 625 626 if (roworiented) { 627 stepval = (n-1)*bs; 628 } else { 629 stepval = (m-1)*bs; 630 } 631 for (i=0; i<m; i++) { 632 #if defined(PETSC_USE_BOPT_g) 633 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 634 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 635 #endif 636 row = im[i]; 637 v_t = v + i*bs2; 638 if (row >= rstart && row < rend) { 639 for (j=0; j<n; j++) { 640 col = in[j]; 641 642 /* Look up into the Hash Table */ 643 key = row*Nbs+col+1; 644 h1 = HASH(size,key,tmp); 645 646 idx = h1; 647 #if defined(PETSC_USE_BOPT_g) 648 total_ct++; 649 insert_ct++; 650 if (HT[idx] != key) { 651 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 652 if (idx == size) { 653 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 654 if (idx == h1) { 655 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 656 } 657 } 658 } 659 #else 660 if (HT[idx] != key) { 661 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 662 if (idx == size) { 663 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 664 if (idx == h1) { 665 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 666 } 667 } 668 } 669 #endif 670 baij_a = HD[idx]; 671 if (roworiented) { 672 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 673 /* value = v + (i*(stepval+bs)+j)*bs; */ 674 value = v_t; 675 v_t += bs; 676 if (addv == ADD_VALUES) { 677 for (ii=0; ii<bs; ii++,value+=stepval) { 678 for (jj=ii; jj<bs2; jj+=bs) { 679 baij_a[jj] += *value++; 680 } 681 } 682 } else { 683 for (ii=0; ii<bs; ii++,value+=stepval) { 684 for (jj=ii; jj<bs2; jj+=bs) { 685 baij_a[jj] = *value++; 686 } 687 } 688 } 689 } else { 690 value = v + j*(stepval+bs)*bs + i*bs; 691 if (addv == ADD_VALUES) { 692 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 693 for (jj=0; jj<bs; jj++) { 694 baij_a[jj] += *value++; 695 } 696 } 697 } else { 698 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 699 for (jj=0; jj<bs; jj++) { 700 baij_a[jj] = *value++; 701 } 702 } 703 } 704 } 705 } 706 } else { 707 if (!baij->donotstash) { 708 if (roworiented) { 709 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 710 } else { 711 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 712 } 713 } 714 } 715 } 716 #if defined(PETSC_USE_BOPT_g) 717 baij->ht_total_ct = total_ct; 718 baij->ht_insert_ct = insert_ct; 719 #endif 720 PetscFunctionReturn(0); 721 } 722 723 #undef __FUNC__ 724 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIBAIJ" 725 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 726 { 727 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 728 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 729 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 730 731 PetscFunctionBegin; 732 for (i=0; i<m; i++) { 733 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 734 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 735 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 736 row = idxm[i] - bsrstart; 737 for (j=0; j<n; j++) { 738 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 739 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 740 if (idxn[j] >= bscstart && idxn[j] < bscend){ 741 col = idxn[j] - bscstart; 742 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 743 } else { 744 if (!baij->colmap) { 745 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 746 } 747 #if defined (PETSC_USE_CTABLE) 748 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 749 data --; 750 #else 751 data = baij->colmap[idxn[j]/bs]-1; 752 #endif 753 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 754 else { 755 col = data + idxn[j]%bs; 756 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 757 } 758 } 759 } 760 } else { 761 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 762 } 763 } 764 PetscFunctionReturn(0); 765 } 766 767 #undef __FUNC__ 768 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPIBAIJ" 769 int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *norm) 770 { 771 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 772 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 773 int ierr,i,bs2=baij->bs2; 774 PetscReal sum = 0.0; 775 MatScalar *v; 776 777 PetscFunctionBegin; 778 if (baij->size == 1) { 779 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 780 } else { 781 if (type == NORM_FROBENIUS) { 782 v = amat->a; 783 for (i=0; i<amat->nz*bs2; i++) { 784 #if defined(PETSC_USE_COMPLEX) 785 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 786 #else 787 sum += (*v)*(*v); v++; 788 #endif 789 } 790 v = bmat->a; 791 for (i=0; i<bmat->nz*bs2; i++) { 792 #if defined(PETSC_USE_COMPLEX) 793 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 794 #else 795 sum += (*v)*(*v); v++; 796 #endif 797 } 798 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 799 *norm = sqrt(*norm); 800 } else { 801 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 802 } 803 } 804 PetscFunctionReturn(0); 805 } 806 807 808 /* 809 Creates the hash table, and sets the table 810 This table is created only once. 811 If new entried need to be added to the matrix 812 then the hash table has to be destroyed and 813 recreated. 814 */ 815 #undef __FUNC__ 816 #define __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPIBAIJ_Private" 817 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 818 { 819 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 820 Mat A = baij->A,B=baij->B; 821 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 822 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 823 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 824 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 825 int *HT,key; 826 MatScalar **HD; 827 PetscReal tmp; 828 #if defined(PETSC_USE_BOPT_g) 829 int ct=0,max=0; 830 #endif 831 832 PetscFunctionBegin; 833 baij->ht_size=(int)(factor*nz); 834 size = baij->ht_size; 835 836 if (baij->ht) { 837 PetscFunctionReturn(0); 838 } 839 840 /* Allocate Memory for Hash Table */ 841 baij->hd = (MatScalar**)PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1);CHKPTRQ(baij->hd); 842 baij->ht = (int*)(baij->hd + size); 843 HD = baij->hd; 844 HT = baij->ht; 845 846 847 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr); 848 849 850 /* Loop Over A */ 851 for (i=0; i<a->mbs; i++) { 852 for (j=ai[i]; j<ai[i+1]; j++) { 853 row = i+rstart; 854 col = aj[j]+cstart; 855 856 key = row*Nbs + col + 1; 857 h1 = HASH(size,key,tmp); 858 for (k=0; k<size; k++){ 859 if (HT[(h1+k)%size] == 0.0) { 860 HT[(h1+k)%size] = key; 861 HD[(h1+k)%size] = a->a + j*bs2; 862 break; 863 #if defined(PETSC_USE_BOPT_g) 864 } else { 865 ct++; 866 #endif 867 } 868 } 869 #if defined(PETSC_USE_BOPT_g) 870 if (k> max) max = k; 871 #endif 872 } 873 } 874 /* Loop Over B */ 875 for (i=0; i<b->mbs; i++) { 876 for (j=bi[i]; j<bi[i+1]; j++) { 877 row = i+rstart; 878 col = garray[bj[j]]; 879 key = row*Nbs + col + 1; 880 h1 = HASH(size,key,tmp); 881 for (k=0; k<size; k++){ 882 if (HT[(h1+k)%size] == 0.0) { 883 HT[(h1+k)%size] = key; 884 HD[(h1+k)%size] = b->a + j*bs2; 885 break; 886 #if defined(PETSC_USE_BOPT_g) 887 } else { 888 ct++; 889 #endif 890 } 891 } 892 #if defined(PETSC_USE_BOPT_g) 893 if (k> max) max = k; 894 #endif 895 } 896 } 897 898 /* Print Summary */ 899 #if defined(PETSC_USE_BOPT_g) 900 for (i=0,j=0; i<size; i++) { 901 if (HT[i]) {j++;} 902 } 903 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max); 904 #endif 905 PetscFunctionReturn(0); 906 } 907 908 #undef __FUNC__ 909 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIBAIJ" 910 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 911 { 912 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 913 int ierr,nstash,reallocs; 914 InsertMode addv; 915 916 PetscFunctionBegin; 917 if (baij->donotstash) { 918 PetscFunctionReturn(0); 919 } 920 921 /* make sure all processors are either in INSERTMODE or ADDMODE */ 922 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 923 if (addv == (ADD_VALUES|INSERT_VALUES)) { 924 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 925 } 926 mat->insertmode = addv; /* in case this processor had no cache */ 927 928 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 929 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 930 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 931 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 932 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 933 PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 934 PetscFunctionReturn(0); 935 } 936 937 #undef __FUNC__ 938 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIBAIJ" 939 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 940 { 941 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 942 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 943 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 944 int *row,*col,other_disassembled; 945 PetscTruth r1,r2,r3; 946 MatScalar *val; 947 InsertMode addv = mat->insertmode; 948 949 PetscFunctionBegin; 950 if (!baij->donotstash) { 951 while (1) { 952 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 953 if (!flg) break; 954 955 for (i=0; i<n;) { 956 /* Now identify the consecutive vals belonging to the same row */ 957 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 958 if (j < n) ncols = j-i; 959 else ncols = n-i; 960 /* Now assemble all these values with a single function call */ 961 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 962 i = j; 963 } 964 } 965 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 966 /* Now process the block-stash. Since the values are stashed column-oriented, 967 set the roworiented flag to column oriented, and after MatSetValues() 968 restore the original flags */ 969 r1 = baij->roworiented; 970 r2 = a->roworiented; 971 r3 = b->roworiented; 972 baij->roworiented = PETSC_FALSE; 973 a->roworiented = PETSC_FALSE; 974 b->roworiented = PETSC_FALSE; 975 while (1) { 976 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 977 if (!flg) break; 978 979 for (i=0; i<n;) { 980 /* Now identify the consecutive vals belonging to the same row */ 981 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 982 if (j < n) ncols = j-i; 983 else ncols = n-i; 984 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 985 i = j; 986 } 987 } 988 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 989 baij->roworiented = r1; 990 a->roworiented = r2; 991 b->roworiented = r3; 992 } 993 994 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 995 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 996 997 /* determine if any processor has disassembled, if so we must 998 also disassemble ourselfs, in order that we may reassemble. */ 999 /* 1000 if nonzero structure of submatrix B cannot change then we know that 1001 no processor disassembled thus we can skip this stuff 1002 */ 1003 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1004 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1005 if (mat->was_assembled && !other_disassembled) { 1006 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1007 } 1008 } 1009 1010 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1011 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1012 } 1013 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1014 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1015 1016 #if defined(PETSC_USE_BOPT_g) 1017 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1018 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct); 1019 baij->ht_total_ct = 0; 1020 baij->ht_insert_ct = 0; 1021 } 1022 #endif 1023 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1024 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1025 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1026 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1027 } 1028 1029 if (baij->rowvalues) { 1030 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1031 baij->rowvalues = 0; 1032 } 1033 PetscFunctionReturn(0); 1034 } 1035 1036 #undef __FUNC__ 1037 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ_ASCIIorDraworSocket" 1038 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 1039 { 1040 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1041 int ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank; 1042 PetscTruth isascii,isdraw; 1043 Viewer sviewer; 1044 1045 PetscFunctionBegin; 1046 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 1047 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 1048 if (isascii) { 1049 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1050 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 1051 MatInfo info; 1052 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1053 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1054 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 1055 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 1056 baij->bs,(int)info.memory);CHKERRQ(ierr); 1057 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1058 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1059 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1060 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1061 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 1062 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1063 PetscFunctionReturn(0); 1064 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 1065 ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 1066 PetscFunctionReturn(0); 1067 } 1068 } 1069 1070 if (isdraw) { 1071 Draw draw; 1072 PetscTruth isnull; 1073 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1074 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1075 } 1076 1077 if (size == 1) { 1078 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1079 } else { 1080 /* assemble the entire matrix onto first processor. */ 1081 Mat A; 1082 Mat_SeqBAIJ *Aloc; 1083 int M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1084 MatScalar *a; 1085 1086 if (!rank) { 1087 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1088 } else { 1089 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1090 } 1091 PLogObjectParent(mat,A); 1092 1093 /* copy over the A part */ 1094 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1095 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1096 rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1097 1098 for (i=0; i<mbs; i++) { 1099 rvals[0] = bs*(baij->rstart + i); 1100 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1101 for (j=ai[i]; j<ai[i+1]; j++) { 1102 col = (baij->cstart+aj[j])*bs; 1103 for (k=0; k<bs; k++) { 1104 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1105 col++; a += bs; 1106 } 1107 } 1108 } 1109 /* copy over the B part */ 1110 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1111 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1112 for (i=0; i<mbs; i++) { 1113 rvals[0] = bs*(baij->rstart + i); 1114 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1115 for (j=ai[i]; j<ai[i+1]; j++) { 1116 col = baij->garray[aj[j]]*bs; 1117 for (k=0; k<bs; k++) { 1118 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1119 col++; a += bs; 1120 } 1121 } 1122 } 1123 ierr = PetscFree(rvals);CHKERRQ(ierr); 1124 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1125 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1126 /* 1127 Everyone has to call to draw the matrix since the graphics waits are 1128 synchronized across all processors that share the Draw object 1129 */ 1130 ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1131 if (!rank) { 1132 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1133 } 1134 ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1135 ierr = MatDestroy(A);CHKERRQ(ierr); 1136 } 1137 PetscFunctionReturn(0); 1138 } 1139 1140 #undef __FUNC__ 1141 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ" 1142 int MatView_MPIBAIJ(Mat mat,Viewer viewer) 1143 { 1144 int ierr; 1145 PetscTruth isascii,isdraw,issocket,isbinary; 1146 1147 PetscFunctionBegin; 1148 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 1149 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 1150 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 1151 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 1152 if (isascii || isdraw || issocket || isbinary) { 1153 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1154 } else { 1155 SETERRQ1(1,1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1156 } 1157 PetscFunctionReturn(0); 1158 } 1159 1160 #undef __FUNC__ 1161 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIBAIJ" 1162 int MatDestroy_MPIBAIJ(Mat mat) 1163 { 1164 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1165 int ierr; 1166 1167 PetscFunctionBegin; 1168 1169 if (mat->mapping) { 1170 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 1171 } 1172 if (mat->bmapping) { 1173 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 1174 } 1175 if (mat->rmap) { 1176 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 1177 } 1178 if (mat->cmap) { 1179 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 1180 } 1181 #if defined(PETSC_USE_LOG) 1182 PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N); 1183 #endif 1184 1185 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1186 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1187 1188 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1189 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1190 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1191 #if defined (PETSC_USE_CTABLE) 1192 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1193 #else 1194 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1195 #endif 1196 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1197 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1198 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1199 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1200 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1201 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1202 #if defined(PETSC_USE_MAT_SINGLE) 1203 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 1204 #endif 1205 ierr = PetscFree(baij);CHKERRQ(ierr); 1206 PLogObjectDestroy(mat); 1207 PetscHeaderDestroy(mat); 1208 PetscFunctionReturn(0); 1209 } 1210 1211 #undef __FUNC__ 1212 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPIBAIJ" 1213 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1214 { 1215 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1216 int ierr,nt; 1217 1218 PetscFunctionBegin; 1219 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1220 if (nt != a->n) { 1221 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1222 } 1223 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1224 if (nt != a->m) { 1225 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1226 } 1227 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1228 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1229 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1230 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1231 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1232 PetscFunctionReturn(0); 1233 } 1234 1235 #undef __FUNC__ 1236 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIBAIJ" 1237 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1238 { 1239 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1240 int ierr; 1241 1242 PetscFunctionBegin; 1243 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1244 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1245 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1246 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1247 PetscFunctionReturn(0); 1248 } 1249 1250 #undef __FUNC__ 1251 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIBAIJ" 1252 int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1253 { 1254 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1255 int ierr; 1256 1257 PetscFunctionBegin; 1258 /* do nondiagonal part */ 1259 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1260 /* send it on its way */ 1261 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1262 /* do local part */ 1263 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1264 /* receive remote parts: note this assumes the values are not actually */ 1265 /* inserted in yy until the next line, which is true for my implementation*/ 1266 /* but is not perhaps always true. */ 1267 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #undef __FUNC__ 1272 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIBAIJ" 1273 int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1274 { 1275 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1276 int ierr; 1277 1278 PetscFunctionBegin; 1279 /* do nondiagonal part */ 1280 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1281 /* send it on its way */ 1282 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1283 /* do local part */ 1284 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1285 /* receive remote parts: note this assumes the values are not actually */ 1286 /* inserted in yy until the next line, which is true for my implementation*/ 1287 /* but is not perhaps always true. */ 1288 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1289 PetscFunctionReturn(0); 1290 } 1291 1292 /* 1293 This only works correctly for square matrices where the subblock A->A is the 1294 diagonal block 1295 */ 1296 #undef __FUNC__ 1297 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIBAIJ" 1298 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1299 { 1300 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1301 int ierr; 1302 1303 PetscFunctionBegin; 1304 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1305 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1306 PetscFunctionReturn(0); 1307 } 1308 1309 #undef __FUNC__ 1310 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPIBAIJ" 1311 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1312 { 1313 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1314 int ierr; 1315 1316 PetscFunctionBegin; 1317 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1318 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1319 PetscFunctionReturn(0); 1320 } 1321 1322 #undef __FUNC__ 1323 #define __FUNC__ /*<a name=""></a>*/"MatGetSize_MPIBAIJ" 1324 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1325 { 1326 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1327 1328 PetscFunctionBegin; 1329 if (m) *m = mat->M; 1330 if (n) *n = mat->N; 1331 PetscFunctionReturn(0); 1332 } 1333 1334 #undef __FUNC__ 1335 #define __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPIBAIJ" 1336 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1337 { 1338 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1339 1340 PetscFunctionBegin; 1341 *m = mat->m; *n = mat->n; 1342 PetscFunctionReturn(0); 1343 } 1344 1345 #undef __FUNC__ 1346 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIBAIJ" 1347 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1348 { 1349 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1350 1351 PetscFunctionBegin; 1352 if (m) *m = mat->rstart*mat->bs; 1353 if (n) *n = mat->rend*mat->bs; 1354 PetscFunctionReturn(0); 1355 } 1356 1357 #undef __FUNC__ 1358 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIBAIJ" 1359 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1360 { 1361 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1362 Scalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1363 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1364 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1365 int *cmap,*idx_p,cstart = mat->cstart; 1366 1367 PetscFunctionBegin; 1368 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1369 mat->getrowactive = PETSC_TRUE; 1370 1371 if (!mat->rowvalues && (idx || v)) { 1372 /* 1373 allocate enough space to hold information from the longest row. 1374 */ 1375 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1376 int max = 1,mbs = mat->mbs,tmp; 1377 for (i=0; i<mbs; i++) { 1378 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1379 if (max < tmp) { max = tmp; } 1380 } 1381 mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1382 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1383 } 1384 1385 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1386 lrow = row - brstart; 1387 1388 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1389 if (!v) {pvA = 0; pvB = 0;} 1390 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1391 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1392 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1393 nztot = nzA + nzB; 1394 1395 cmap = mat->garray; 1396 if (v || idx) { 1397 if (nztot) { 1398 /* Sort by increasing column numbers, assuming A and B already sorted */ 1399 int imark = -1; 1400 if (v) { 1401 *v = v_p = mat->rowvalues; 1402 for (i=0; i<nzB; i++) { 1403 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1404 else break; 1405 } 1406 imark = i; 1407 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1408 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1409 } 1410 if (idx) { 1411 *idx = idx_p = mat->rowindices; 1412 if (imark > -1) { 1413 for (i=0; i<imark; i++) { 1414 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1415 } 1416 } else { 1417 for (i=0; i<nzB; i++) { 1418 if (cmap[cworkB[i]/bs] < cstart) 1419 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1420 else break; 1421 } 1422 imark = i; 1423 } 1424 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1425 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1426 } 1427 } else { 1428 if (idx) *idx = 0; 1429 if (v) *v = 0; 1430 } 1431 } 1432 *nz = nztot; 1433 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1434 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1435 PetscFunctionReturn(0); 1436 } 1437 1438 #undef __FUNC__ 1439 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIBAIJ" 1440 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1441 { 1442 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1443 1444 PetscFunctionBegin; 1445 if (baij->getrowactive == PETSC_FALSE) { 1446 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1447 } 1448 baij->getrowactive = PETSC_FALSE; 1449 PetscFunctionReturn(0); 1450 } 1451 1452 #undef __FUNC__ 1453 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIBAIJ" 1454 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1455 { 1456 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1457 1458 PetscFunctionBegin; 1459 *bs = baij->bs; 1460 PetscFunctionReturn(0); 1461 } 1462 1463 #undef __FUNC__ 1464 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIBAIJ" 1465 int MatZeroEntries_MPIBAIJ(Mat A) 1466 { 1467 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1468 int ierr; 1469 1470 PetscFunctionBegin; 1471 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1472 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1473 PetscFunctionReturn(0); 1474 } 1475 1476 #undef __FUNC__ 1477 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIBAIJ" 1478 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1479 { 1480 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1481 Mat A = a->A,B = a->B; 1482 int ierr; 1483 PetscReal isend[5],irecv[5]; 1484 1485 PetscFunctionBegin; 1486 info->block_size = (double)a->bs; 1487 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1488 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1489 isend[3] = info->memory; isend[4] = info->mallocs; 1490 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1491 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1492 isend[3] += info->memory; isend[4] += info->mallocs; 1493 if (flag == MAT_LOCAL) { 1494 info->nz_used = isend[0]; 1495 info->nz_allocated = isend[1]; 1496 info->nz_unneeded = isend[2]; 1497 info->memory = isend[3]; 1498 info->mallocs = isend[4]; 1499 } else if (flag == MAT_GLOBAL_MAX) { 1500 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1501 info->nz_used = irecv[0]; 1502 info->nz_allocated = irecv[1]; 1503 info->nz_unneeded = irecv[2]; 1504 info->memory = irecv[3]; 1505 info->mallocs = irecv[4]; 1506 } else if (flag == MAT_GLOBAL_SUM) { 1507 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1508 info->nz_used = irecv[0]; 1509 info->nz_allocated = irecv[1]; 1510 info->nz_unneeded = irecv[2]; 1511 info->memory = irecv[3]; 1512 info->mallocs = irecv[4]; 1513 } else { 1514 SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 1515 } 1516 info->rows_global = (double)a->M; 1517 info->columns_global = (double)a->N; 1518 info->rows_local = (double)a->m; 1519 info->columns_local = (double)a->N; 1520 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1521 info->fill_ratio_needed = 0; 1522 info->factor_mallocs = 0; 1523 PetscFunctionReturn(0); 1524 } 1525 1526 #undef __FUNC__ 1527 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIBAIJ" 1528 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1529 { 1530 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1531 int ierr; 1532 1533 PetscFunctionBegin; 1534 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1535 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1536 op == MAT_COLUMNS_UNSORTED || 1537 op == MAT_COLUMNS_SORTED || 1538 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1539 op == MAT_KEEP_ZEROED_ROWS || 1540 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1541 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1542 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1543 } else if (op == MAT_ROW_ORIENTED) { 1544 a->roworiented = PETSC_TRUE; 1545 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1546 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1547 } else if (op == MAT_ROWS_SORTED || 1548 op == MAT_ROWS_UNSORTED || 1549 op == MAT_SYMMETRIC || 1550 op == MAT_STRUCTURALLY_SYMMETRIC || 1551 op == MAT_YES_NEW_DIAGONALS || 1552 op == MAT_USE_HASH_TABLE) { 1553 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1554 } else if (op == MAT_COLUMN_ORIENTED) { 1555 a->roworiented = PETSC_FALSE; 1556 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1557 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1558 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1559 a->donotstash = PETSC_TRUE; 1560 } else if (op == MAT_NO_NEW_DIAGONALS) { 1561 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1562 } else if (op == MAT_USE_HASH_TABLE) { 1563 a->ht_flag = PETSC_TRUE; 1564 } else { 1565 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1566 } 1567 PetscFunctionReturn(0); 1568 } 1569 1570 #undef __FUNC__ 1571 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIBAIJ(" 1572 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1573 { 1574 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1575 Mat_SeqBAIJ *Aloc; 1576 Mat B; 1577 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1578 int bs=baij->bs,mbs=baij->mbs; 1579 MatScalar *a; 1580 1581 PetscFunctionBegin; 1582 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1583 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1584 1585 /* copy over the A part */ 1586 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1587 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1588 rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1589 1590 for (i=0; i<mbs; i++) { 1591 rvals[0] = bs*(baij->rstart + i); 1592 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1593 for (j=ai[i]; j<ai[i+1]; j++) { 1594 col = (baij->cstart+aj[j])*bs; 1595 for (k=0; k<bs; k++) { 1596 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1597 col++; a += bs; 1598 } 1599 } 1600 } 1601 /* copy over the B part */ 1602 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1603 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1604 for (i=0; i<mbs; i++) { 1605 rvals[0] = bs*(baij->rstart + i); 1606 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1607 for (j=ai[i]; j<ai[i+1]; j++) { 1608 col = baij->garray[aj[j]]*bs; 1609 for (k=0; k<bs; k++) { 1610 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1611 col++; a += bs; 1612 } 1613 } 1614 } 1615 ierr = PetscFree(rvals);CHKERRQ(ierr); 1616 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1617 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1618 1619 if (matout) { 1620 *matout = B; 1621 } else { 1622 PetscOps *Abops; 1623 MatOps Aops; 1624 1625 /* This isn't really an in-place transpose .... but free data structures from baij */ 1626 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1627 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1628 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1629 #if defined (PETSC_USE_CTABLE) 1630 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1631 #else 1632 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1633 #endif 1634 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1635 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1636 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1637 ierr = PetscFree(baij);CHKERRQ(ierr); 1638 1639 /* 1640 This is horrible, horrible code. We need to keep the 1641 A pointers for the bops and ops but copy everything 1642 else from C. 1643 */ 1644 Abops = A->bops; 1645 Aops = A->ops; 1646 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1647 A->bops = Abops; 1648 A->ops = Aops; 1649 1650 PetscHeaderDestroy(B); 1651 } 1652 PetscFunctionReturn(0); 1653 } 1654 1655 #undef __FUNC__ 1656 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIBAIJ" 1657 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1658 { 1659 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1660 Mat a = baij->A,b = baij->B; 1661 int ierr,s1,s2,s3; 1662 1663 PetscFunctionBegin; 1664 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1665 if (rr) { 1666 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1667 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1668 /* Overlap communication with computation. */ 1669 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1670 } 1671 if (ll) { 1672 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1673 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1674 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1675 } 1676 /* scale the diagonal block */ 1677 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1678 1679 if (rr) { 1680 /* Do a scatter end and then right scale the off-diagonal block */ 1681 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1682 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1683 } 1684 1685 PetscFunctionReturn(0); 1686 } 1687 1688 #undef __FUNC__ 1689 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIBAIJ" 1690 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1691 { 1692 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1693 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1694 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1695 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1696 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1697 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1698 MPI_Comm comm = A->comm; 1699 MPI_Request *send_waits,*recv_waits; 1700 MPI_Status recv_status,*send_status; 1701 IS istmp; 1702 1703 PetscFunctionBegin; 1704 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 1705 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1706 1707 /* first count number of contributors to each processor */ 1708 nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs); 1709 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1710 procs = nprocs + size; 1711 owner = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 1712 for (i=0; i<N; i++) { 1713 idx = rows[i]; 1714 found = 0; 1715 for (j=0; j<size; j++) { 1716 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1717 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1718 } 1719 } 1720 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1721 } 1722 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 1723 1724 /* inform other processors of number of messages and max length*/ 1725 work = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work); 1726 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 1727 nmax = work[rank]; 1728 nrecvs = work[size+rank]; 1729 ierr = PetscFree(work);CHKERRQ(ierr); 1730 1731 /* post receives: */ 1732 rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 1733 recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1734 for (i=0; i<nrecvs; i++) { 1735 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1736 } 1737 1738 /* do sends: 1739 1) starts[i] gives the starting index in svalues for stuff going to 1740 the ith processor 1741 */ 1742 svalues = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues); 1743 send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1744 starts = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts); 1745 starts[0] = 0; 1746 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1747 for (i=0; i<N; i++) { 1748 svalues[starts[owner[i]]++] = rows[i]; 1749 } 1750 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1751 1752 starts[0] = 0; 1753 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1754 count = 0; 1755 for (i=0; i<size; i++) { 1756 if (procs[i]) { 1757 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1758 } 1759 } 1760 ierr = PetscFree(starts);CHKERRQ(ierr); 1761 1762 base = owners[rank]*bs; 1763 1764 /* wait on receives */ 1765 lens = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens); 1766 source = lens + nrecvs; 1767 count = nrecvs; slen = 0; 1768 while (count) { 1769 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1770 /* unpack receives into our local space */ 1771 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1772 source[imdex] = recv_status.MPI_SOURCE; 1773 lens[imdex] = n; 1774 slen += n; 1775 count--; 1776 } 1777 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1778 1779 /* move the data into the send scatter */ 1780 lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows); 1781 count = 0; 1782 for (i=0; i<nrecvs; i++) { 1783 values = rvalues + i*nmax; 1784 for (j=0; j<lens[i]; j++) { 1785 lrows[count++] = values[j] - base; 1786 } 1787 } 1788 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1789 ierr = PetscFree(lens);CHKERRQ(ierr); 1790 ierr = PetscFree(owner);CHKERRQ(ierr); 1791 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1792 1793 /* actually zap the local rows */ 1794 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1795 PLogObjectParent(A,istmp); 1796 1797 /* 1798 Zero the required rows. If the "diagonal block" of the matrix 1799 is square and the user wishes to set the diagonal we use seperate 1800 code so that MatSetValues() is not called for each diagonal allocating 1801 new memory, thus calling lots of mallocs and slowing things down. 1802 1803 Contributed by: Mathew Knepley 1804 */ 1805 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1806 ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1807 if (diag && (l->A->M == l->A->N)) { 1808 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1809 } else if (diag) { 1810 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1811 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1812 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1813 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1814 } 1815 for (i=0; i<slen; i++) { 1816 row = lrows[i] + rstart_bs; 1817 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1818 } 1819 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1820 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1821 } else { 1822 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1823 } 1824 1825 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1826 ierr = PetscFree(lrows);CHKERRQ(ierr); 1827 1828 /* wait on sends */ 1829 if (nsends) { 1830 send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1831 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1832 ierr = PetscFree(send_status);CHKERRQ(ierr); 1833 } 1834 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1835 ierr = PetscFree(svalues);CHKERRQ(ierr); 1836 1837 PetscFunctionReturn(0); 1838 } 1839 1840 #undef __FUNC__ 1841 #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPIBAIJ" 1842 int MatPrintHelp_MPIBAIJ(Mat A) 1843 { 1844 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1845 MPI_Comm comm = A->comm; 1846 static int called = 0; 1847 int ierr; 1848 1849 PetscFunctionBegin; 1850 if (!a->rank) { 1851 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1852 } 1853 if (called) {PetscFunctionReturn(0);} else called = 1; 1854 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1855 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1856 PetscFunctionReturn(0); 1857 } 1858 1859 #undef __FUNC__ 1860 #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPIBAIJ" 1861 int MatSetUnfactored_MPIBAIJ(Mat A) 1862 { 1863 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1864 int ierr; 1865 1866 PetscFunctionBegin; 1867 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1868 PetscFunctionReturn(0); 1869 } 1870 1871 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1872 1873 #undef __FUNC__ 1874 #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPIBAIJ" 1875 int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1876 { 1877 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1878 Mat a,b,c,d; 1879 PetscTruth flg; 1880 int ierr; 1881 1882 PetscFunctionBegin; 1883 if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1884 a = matA->A; b = matA->B; 1885 c = matB->A; d = matB->B; 1886 1887 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1888 if (flg == PETSC_TRUE) { 1889 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1890 } 1891 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1892 PetscFunctionReturn(0); 1893 } 1894 1895 /* -------------------------------------------------------------------*/ 1896 static struct _MatOps MatOps_Values = { 1897 MatSetValues_MPIBAIJ, 1898 MatGetRow_MPIBAIJ, 1899 MatRestoreRow_MPIBAIJ, 1900 MatMult_MPIBAIJ, 1901 MatMultAdd_MPIBAIJ, 1902 MatMultTranspose_MPIBAIJ, 1903 MatMultTransposeAdd_MPIBAIJ, 1904 0, 1905 0, 1906 0, 1907 0, 1908 0, 1909 0, 1910 0, 1911 MatTranspose_MPIBAIJ, 1912 MatGetInfo_MPIBAIJ, 1913 MatEqual_MPIBAIJ, 1914 MatGetDiagonal_MPIBAIJ, 1915 MatDiagonalScale_MPIBAIJ, 1916 MatNorm_MPIBAIJ, 1917 MatAssemblyBegin_MPIBAIJ, 1918 MatAssemblyEnd_MPIBAIJ, 1919 0, 1920 MatSetOption_MPIBAIJ, 1921 MatZeroEntries_MPIBAIJ, 1922 MatZeroRows_MPIBAIJ, 1923 0, 1924 0, 1925 0, 1926 0, 1927 MatGetSize_MPIBAIJ, 1928 MatGetLocalSize_MPIBAIJ, 1929 MatGetOwnershipRange_MPIBAIJ, 1930 0, 1931 0, 1932 0, 1933 0, 1934 MatDuplicate_MPIBAIJ, 1935 0, 1936 0, 1937 0, 1938 0, 1939 0, 1940 MatGetSubMatrices_MPIBAIJ, 1941 MatIncreaseOverlap_MPIBAIJ, 1942 MatGetValues_MPIBAIJ, 1943 0, 1944 MatPrintHelp_MPIBAIJ, 1945 MatScale_MPIBAIJ, 1946 0, 1947 0, 1948 0, 1949 MatGetBlockSize_MPIBAIJ, 1950 0, 1951 0, 1952 0, 1953 0, 1954 0, 1955 0, 1956 MatSetUnfactored_MPIBAIJ, 1957 0, 1958 MatSetValuesBlocked_MPIBAIJ, 1959 0, 1960 MatDestroy_MPIBAIJ, 1961 MatView_MPIBAIJ, 1962 MatGetMaps_Petsc}; 1963 1964 1965 EXTERN_C_BEGIN 1966 #undef __FUNC__ 1967 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPIBAIJ" 1968 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1969 { 1970 PetscFunctionBegin; 1971 *a = ((Mat_MPIBAIJ *)A->data)->A; 1972 *iscopy = PETSC_FALSE; 1973 PetscFunctionReturn(0); 1974 } 1975 EXTERN_C_END 1976 1977 #undef __FUNC__ 1978 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPIBAIJ" 1979 /*@C 1980 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1981 (block compressed row). For good matrix assembly performance 1982 the user should preallocate the matrix storage by setting the parameters 1983 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1984 performance can be increased by more than a factor of 50. 1985 1986 Collective on MPI_Comm 1987 1988 Input Parameters: 1989 + comm - MPI communicator 1990 . bs - size of blockk 1991 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1992 This value should be the same as the local size used in creating the 1993 y vector for the matrix-vector product y = Ax. 1994 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1995 This value should be the same as the local size used in creating the 1996 x vector for the matrix-vector product y = Ax. 1997 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1998 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1999 . d_nz - number of block nonzeros per block row in diagonal portion of local 2000 submatrix (same for all local rows) 2001 . d_nnz - array containing the number of block nonzeros in the various block rows 2002 of the in diagonal portion of the local (possibly different for each block 2003 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2004 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2005 submatrix (same for all local rows). 2006 - o_nnz - array containing the number of nonzeros in the various block rows of the 2007 off-diagonal portion of the local submatrix (possibly different for 2008 each block row) or PETSC_NULL. 2009 2010 Output Parameter: 2011 . A - the matrix 2012 2013 Options Database Keys: 2014 . -mat_no_unroll - uses code that does not unroll the loops in the 2015 block calculations (much slower) 2016 . -mat_block_size - size of the blocks to use 2017 . -mat_mpi - use the parallel matrix data structures even on one processor 2018 (defaults to using SeqBAIJ format on one processor) 2019 2020 Notes: 2021 The user MUST specify either the local or global matrix dimensions 2022 (possibly both). 2023 2024 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2025 than it must be used on all processors that share the object for that argument. 2026 2027 Storage Information: 2028 For a square global matrix we define each processor's diagonal portion 2029 to be its local rows and the corresponding columns (a square submatrix); 2030 each processor's off-diagonal portion encompasses the remainder of the 2031 local matrix (a rectangular submatrix). 2032 2033 The user can specify preallocated storage for the diagonal part of 2034 the local submatrix with either d_nz or d_nnz (not both). Set 2035 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2036 memory allocation. Likewise, specify preallocated storage for the 2037 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2038 2039 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2040 the figure below we depict these three local rows and all columns (0-11). 2041 2042 .vb 2043 0 1 2 3 4 5 6 7 8 9 10 11 2044 ------------------- 2045 row 3 | o o o d d d o o o o o o 2046 row 4 | o o o d d d o o o o o o 2047 row 5 | o o o d d d o o o o o o 2048 ------------------- 2049 .ve 2050 2051 Thus, any entries in the d locations are stored in the d (diagonal) 2052 submatrix, and any entries in the o locations are stored in the 2053 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2054 stored simply in the MATSEQBAIJ format for compressed row storage. 2055 2056 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2057 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2058 In general, for PDE problems in which most nonzeros are near the diagonal, 2059 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2060 or you will get TERRIBLE performance; see the users' manual chapter on 2061 matrices. 2062 2063 Level: intermediate 2064 2065 .keywords: matrix, block, aij, compressed row, sparse, parallel 2066 2067 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2068 @*/ 2069 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2070 { 2071 Mat B; 2072 Mat_MPIBAIJ *b; 2073 int ierr,i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 2074 PetscTruth flag1,flag2,flg; 2075 2076 PetscFunctionBegin; 2077 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2078 2079 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 2080 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -1: value %d",d_nz); 2081 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -1: value %d",o_nz); 2082 if (d_nnz) { 2083 for (i=0; i<m/bs; i++) { 2084 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 2085 } 2086 } 2087 if (o_nnz) { 2088 for (i=0; i<m/bs; i++) { 2089 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 2090 } 2091 } 2092 2093 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2094 ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr); 2095 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 2096 if (!flag1 && !flag2 && size == 1) { 2097 if (M == PETSC_DECIDE) M = m; 2098 if (N == PETSC_DECIDE) N = n; 2099 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 2100 PetscFunctionReturn(0); 2101 } 2102 2103 *A = 0; 2104 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView); 2105 PLogObjectCreate(B); 2106 B->data = (void*)(b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b); 2107 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2108 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2109 B->mapping = 0; 2110 B->factor = 0; 2111 B->assembled = PETSC_FALSE; 2112 2113 B->insertmode = NOT_SET_VALUES; 2114 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 2115 ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr); 2116 2117 if (m == PETSC_DECIDE && (d_nnz || o_nnz)) { 2118 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 2119 } 2120 if (M == PETSC_DECIDE && m == PETSC_DECIDE) { 2121 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 2122 } 2123 if (N == PETSC_DECIDE && n == PETSC_DECIDE) { 2124 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 2125 } 2126 if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 2127 if (N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 2128 2129 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 2130 work[0] = m; work[1] = n; 2131 mbs = m/bs; nbs = n/bs; 2132 ierr = MPI_Allreduce(work,sum,2,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2133 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 2134 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 2135 } 2136 if (m == PETSC_DECIDE) { 2137 Mbs = M/bs; 2138 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 2139 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 2140 m = mbs*bs; 2141 } 2142 if (n == PETSC_DECIDE) { 2143 Nbs = N/bs; 2144 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 2145 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 2146 n = nbs*bs; 2147 } 2148 if (mbs*bs != m || nbs*bs != n) { 2149 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 2150 } 2151 2152 b->m = m; B->m = m; 2153 b->n = n; B->n = n; 2154 b->N = N; B->N = N; 2155 b->M = M; B->M = M; 2156 b->bs = bs; 2157 b->bs2 = bs*bs; 2158 b->mbs = mbs; 2159 b->nbs = nbs; 2160 b->Mbs = Mbs; 2161 b->Nbs = Nbs; 2162 2163 /* the information in the maps duplicates the information computed below, eventually 2164 we should remove the duplicate information that is not contained in the maps */ 2165 ierr = MapCreateMPI(B->comm,m,M,&B->rmap);CHKERRQ(ierr); 2166 ierr = MapCreateMPI(B->comm,n,N,&B->cmap);CHKERRQ(ierr); 2167 2168 /* build local table of row and column ownerships */ 2169 b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 2170 PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2171 b->cowners = b->rowners + b->size + 2; 2172 b->rowners_bs = b->cowners + b->size + 2; 2173 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2174 b->rowners[0] = 0; 2175 for (i=2; i<=b->size; i++) { 2176 b->rowners[i] += b->rowners[i-1]; 2177 } 2178 for (i=0; i<=b->size; i++) { 2179 b->rowners_bs[i] = b->rowners[i]*bs; 2180 } 2181 b->rstart = b->rowners[b->rank]; 2182 b->rend = b->rowners[b->rank+1]; 2183 b->rstart_bs = b->rstart * bs; 2184 b->rend_bs = b->rend * bs; 2185 2186 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2187 b->cowners[0] = 0; 2188 for (i=2; i<=b->size; i++) { 2189 b->cowners[i] += b->cowners[i-1]; 2190 } 2191 b->cstart = b->cowners[b->rank]; 2192 b->cend = b->cowners[b->rank+1]; 2193 b->cstart_bs = b->cstart * bs; 2194 b->cend_bs = b->cend * bs; 2195 2196 2197 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2198 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2199 PLogObjectParent(B,b->A); 2200 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2201 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2202 PLogObjectParent(B,b->B); 2203 2204 /* build cache for off array entries formed */ 2205 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2206 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2207 b->donotstash = PETSC_FALSE; 2208 b->colmap = PETSC_NULL; 2209 b->garray = PETSC_NULL; 2210 b->roworiented = PETSC_TRUE; 2211 2212 #if defined(PEYSC_USE_MAT_SINGLE) 2213 /* stuff for MatSetValues_XXX in single precision */ 2214 b->lensetvalues = 0; 2215 b->setvaluescopy = PETSC_NULL; 2216 #endif 2217 2218 /* stuff used in block assembly */ 2219 b->barray = 0; 2220 2221 /* stuff used for matrix vector multiply */ 2222 b->lvec = 0; 2223 b->Mvctx = 0; 2224 2225 /* stuff for MatGetRow() */ 2226 b->rowindices = 0; 2227 b->rowvalues = 0; 2228 b->getrowactive = PETSC_FALSE; 2229 2230 /* hash table stuff */ 2231 b->ht = 0; 2232 b->hd = 0; 2233 b->ht_size = 0; 2234 b->ht_flag = PETSC_FALSE; 2235 b->ht_fact = 0; 2236 b->ht_total_ct = 0; 2237 b->ht_insert_ct = 0; 2238 2239 *A = B; 2240 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2241 if (flg) { 2242 double fact = 1.39; 2243 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2244 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2245 if (fact <= 1.0) fact = 1.39; 2246 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2247 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2248 } 2249 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2250 "MatStoreValues_MPIBAIJ", 2251 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2252 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2253 "MatRetrieveValues_MPIBAIJ", 2254 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2255 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2256 "MatGetDiagonalBlock_MPIBAIJ", 2257 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2258 PetscFunctionReturn(0); 2259 } 2260 2261 #undef __FUNC__ 2262 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIBAIJ" 2263 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2264 { 2265 Mat mat; 2266 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2267 int ierr,len=0; 2268 PetscTruth flg; 2269 2270 PetscFunctionBegin; 2271 *newmat = 0; 2272 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView); 2273 PLogObjectCreate(mat); 2274 mat->data = (void*)(a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a); 2275 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2276 mat->factor = matin->factor; 2277 mat->assembled = PETSC_TRUE; 2278 mat->insertmode = NOT_SET_VALUES; 2279 2280 a->m = mat->m = oldmat->m; 2281 a->n = mat->n = oldmat->n; 2282 a->M = mat->M = oldmat->M; 2283 a->N = mat->N = oldmat->N; 2284 2285 a->bs = oldmat->bs; 2286 a->bs2 = oldmat->bs2; 2287 a->mbs = oldmat->mbs; 2288 a->nbs = oldmat->nbs; 2289 a->Mbs = oldmat->Mbs; 2290 a->Nbs = oldmat->Nbs; 2291 2292 a->rstart = oldmat->rstart; 2293 a->rend = oldmat->rend; 2294 a->cstart = oldmat->cstart; 2295 a->cend = oldmat->cend; 2296 a->size = oldmat->size; 2297 a->rank = oldmat->rank; 2298 a->donotstash = oldmat->donotstash; 2299 a->roworiented = oldmat->roworiented; 2300 a->rowindices = 0; 2301 a->rowvalues = 0; 2302 a->getrowactive = PETSC_FALSE; 2303 a->barray = 0; 2304 a->rstart_bs = oldmat->rstart_bs; 2305 a->rend_bs = oldmat->rend_bs; 2306 a->cstart_bs = oldmat->cstart_bs; 2307 a->cend_bs = oldmat->cend_bs; 2308 2309 /* hash table stuff */ 2310 a->ht = 0; 2311 a->hd = 0; 2312 a->ht_size = 0; 2313 a->ht_flag = oldmat->ht_flag; 2314 a->ht_fact = oldmat->ht_fact; 2315 a->ht_total_ct = 0; 2316 a->ht_insert_ct = 0; 2317 2318 2319 a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 2320 PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2321 a->cowners = a->rowners + a->size + 2; 2322 a->rowners_bs = a->cowners + a->size + 2; 2323 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2324 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2325 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2326 if (oldmat->colmap) { 2327 #if defined (PETSC_USE_CTABLE) 2328 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2329 #else 2330 a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2331 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2332 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2333 #endif 2334 } else a->colmap = 0; 2335 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2336 a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray); 2337 PLogObjectMemory(mat,len*sizeof(int)); 2338 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2339 } else a->garray = 0; 2340 2341 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2342 PLogObjectParent(mat,a->lvec); 2343 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2344 2345 PLogObjectParent(mat,a->Mvctx); 2346 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2347 PLogObjectParent(mat,a->A); 2348 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2349 PLogObjectParent(mat,a->B); 2350 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 2351 ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2352 if (flg) { 2353 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 2354 } 2355 *newmat = mat; 2356 PetscFunctionReturn(0); 2357 } 2358 2359 #include "petscsys.h" 2360 2361 #undef __FUNC__ 2362 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIBAIJ" 2363 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2364 { 2365 Mat A; 2366 int i,nz,ierr,j,rstart,rend,fd; 2367 Scalar *vals,*buf; 2368 MPI_Comm comm = ((PetscObject)viewer)->comm; 2369 MPI_Status status; 2370 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2371 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2372 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2373 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2374 int dcount,kmax,k,nzcount,tmp; 2375 2376 PetscFunctionBegin; 2377 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2378 2379 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2380 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2381 if (!rank) { 2382 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2383 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2384 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2385 if (header[3] < 0) { 2386 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2387 } 2388 } 2389 2390 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2391 M = header[1]; N = header[2]; 2392 2393 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2394 2395 /* 2396 This code adds extra rows to make sure the number of rows is 2397 divisible by the blocksize 2398 */ 2399 Mbs = M/bs; 2400 extra_rows = bs - M + bs*(Mbs); 2401 if (extra_rows == bs) extra_rows = 0; 2402 else Mbs++; 2403 if (extra_rows &&!rank) { 2404 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2405 } 2406 2407 /* determine ownership of all rows */ 2408 mbs = Mbs/size + ((Mbs % size) > rank); 2409 m = mbs * bs; 2410 rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners); 2411 browners = rowners + size + 1; 2412 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2413 rowners[0] = 0; 2414 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2415 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2416 rstart = rowners[rank]; 2417 rend = rowners[rank+1]; 2418 2419 /* distribute row lengths to all processors */ 2420 locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens); 2421 if (!rank) { 2422 rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2423 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2424 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2425 sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts); 2426 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2427 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2428 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2429 } else { 2430 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2431 } 2432 2433 if (!rank) { 2434 /* calculate the number of nonzeros on each processor */ 2435 procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz); 2436 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2437 for (i=0; i<size; i++) { 2438 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2439 procsnz[i] += rowlengths[j]; 2440 } 2441 } 2442 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2443 2444 /* determine max buffer needed and allocate it */ 2445 maxnz = 0; 2446 for (i=0; i<size; i++) { 2447 maxnz = PetscMax(maxnz,procsnz[i]); 2448 } 2449 cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols); 2450 2451 /* read in my part of the matrix column indices */ 2452 nz = procsnz[0]; 2453 ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 2454 mycols = ibuf; 2455 if (size == 1) nz -= extra_rows; 2456 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2457 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2458 2459 /* read in every ones (except the last) and ship off */ 2460 for (i=1; i<size-1; i++) { 2461 nz = procsnz[i]; 2462 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2463 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2464 } 2465 /* read in the stuff for the last proc */ 2466 if (size != 1) { 2467 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2468 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2469 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2470 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2471 } 2472 ierr = PetscFree(cols);CHKERRQ(ierr); 2473 } else { 2474 /* determine buffer space needed for message */ 2475 nz = 0; 2476 for (i=0; i<m; i++) { 2477 nz += locrowlens[i]; 2478 } 2479 ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 2480 mycols = ibuf; 2481 /* receive message of column indices*/ 2482 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2483 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2484 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2485 } 2486 2487 /* loop over local rows, determining number of off diagonal entries */ 2488 dlens = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens); 2489 odlens = dlens + (rend-rstart); 2490 mask = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask); 2491 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2492 masked1 = mask + Mbs; 2493 masked2 = masked1 + Mbs; 2494 rowcount = 0; nzcount = 0; 2495 for (i=0; i<mbs; i++) { 2496 dcount = 0; 2497 odcount = 0; 2498 for (j=0; j<bs; j++) { 2499 kmax = locrowlens[rowcount]; 2500 for (k=0; k<kmax; k++) { 2501 tmp = mycols[nzcount++]/bs; 2502 if (!mask[tmp]) { 2503 mask[tmp] = 1; 2504 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2505 else masked1[dcount++] = tmp; 2506 } 2507 } 2508 rowcount++; 2509 } 2510 2511 dlens[i] = dcount; 2512 odlens[i] = odcount; 2513 2514 /* zero out the mask elements we set */ 2515 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2516 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2517 } 2518 2519 /* create our matrix */ 2520 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 2521 A = *newmat; 2522 MatSetOption(A,MAT_COLUMNS_SORTED); 2523 2524 if (!rank) { 2525 buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf); 2526 /* read in my part of the matrix numerical values */ 2527 nz = procsnz[0]; 2528 vals = buf; 2529 mycols = ibuf; 2530 if (size == 1) nz -= extra_rows; 2531 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2532 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2533 2534 /* insert into matrix */ 2535 jj = rstart*bs; 2536 for (i=0; i<m; i++) { 2537 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2538 mycols += locrowlens[i]; 2539 vals += locrowlens[i]; 2540 jj++; 2541 } 2542 /* read in other processors (except the last one) and ship out */ 2543 for (i=1; i<size-1; i++) { 2544 nz = procsnz[i]; 2545 vals = buf; 2546 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2547 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2548 } 2549 /* the last proc */ 2550 if (size != 1){ 2551 nz = procsnz[i] - extra_rows; 2552 vals = buf; 2553 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2554 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2555 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2556 } 2557 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2558 } else { 2559 /* receive numeric values */ 2560 buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf); 2561 2562 /* receive message of values*/ 2563 vals = buf; 2564 mycols = ibuf; 2565 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2566 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2567 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2568 2569 /* insert into matrix */ 2570 jj = rstart*bs; 2571 for (i=0; i<m; i++) { 2572 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2573 mycols += locrowlens[i]; 2574 vals += locrowlens[i]; 2575 jj++; 2576 } 2577 } 2578 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2579 ierr = PetscFree(buf);CHKERRQ(ierr); 2580 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2581 ierr = PetscFree(rowners);CHKERRQ(ierr); 2582 ierr = PetscFree(dlens);CHKERRQ(ierr); 2583 ierr = PetscFree(mask);CHKERRQ(ierr); 2584 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2585 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2586 PetscFunctionReturn(0); 2587 } 2588 2589 #undef __FUNC__ 2590 #define __FUNC__ /*<a name=""></a>*/"MatMPIBAIJSetHashTableFactor" 2591 /*@ 2592 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2593 2594 Input Parameters: 2595 . mat - the matrix 2596 . fact - factor 2597 2598 Collective on Mat 2599 2600 Level: advanced 2601 2602 Notes: 2603 This can also be set by the command line option: -mat_use_hash_table fact 2604 2605 .keywords: matrix, hashtable, factor, HT 2606 2607 .seealso: MatSetOption() 2608 @*/ 2609 int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2610 { 2611 Mat_MPIBAIJ *baij; 2612 2613 PetscFunctionBegin; 2614 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2615 if (mat->type != MATMPIBAIJ) { 2616 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2617 } 2618 baij = (Mat_MPIBAIJ*)mat->data; 2619 baij->ht_fact = fact; 2620 PetscFunctionReturn(0); 2621 } 2622