1 2 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 3 4 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 5 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 6 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 7 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 8 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 9 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 10 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 11 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat); 14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*); 15 16 /* UGLY, ugly, ugly 17 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 18 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 19 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 20 converts the entries into single precision and then calls ..._MatScalar() to put them 21 into the single precision data structures. 22 */ 23 #if defined(PETSC_USE_MAT_SINGLE) 24 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 25 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 26 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 27 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 28 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 29 #else 30 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 31 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 32 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 33 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 34 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 35 #endif 36 37 #undef __FUNCT__ 38 #define __FUNCT__ "MatGetRowMax_MPIBAIJ" 39 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v) 40 { 41 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 42 PetscErrorCode ierr; 43 PetscInt i; 44 PetscScalar *va,*vb; 45 Vec vtmp; 46 47 PetscFunctionBegin; 48 49 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 50 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 51 52 ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr); 53 ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 54 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 55 56 for (i=0; i<A->m; i++){ 57 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 58 } 59 60 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 61 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 62 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 63 64 PetscFunctionReturn(0); 65 } 66 67 EXTERN_C_BEGIN 68 #undef __FUNCT__ 69 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 70 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 71 { 72 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 73 PetscErrorCode ierr; 74 75 PetscFunctionBegin; 76 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 77 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 EXTERN_C_END 81 82 EXTERN_C_BEGIN 83 #undef __FUNCT__ 84 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 85 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 86 { 87 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 88 PetscErrorCode ierr; 89 90 PetscFunctionBegin; 91 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 92 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 93 PetscFunctionReturn(0); 94 } 95 EXTERN_C_END 96 97 /* 98 Local utility routine that creates a mapping from the global column 99 number to the local number in the off-diagonal part of the local 100 storage of the matrix. This is done in a non scable way since the 101 length of colmap equals the global matrix length. 102 */ 103 #undef __FUNCT__ 104 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 105 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 106 { 107 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 108 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 109 PetscErrorCode ierr; 110 PetscInt nbs = B->nbs,i,bs=mat->bs; 111 112 PetscFunctionBegin; 113 #if defined (PETSC_USE_CTABLE) 114 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 115 for (i=0; i<nbs; i++){ 116 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 117 } 118 #else 119 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 120 PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt)); 121 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 122 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 123 #endif 124 PetscFunctionReturn(0); 125 } 126 127 #define CHUNKSIZE 10 128 129 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 130 { \ 131 \ 132 brow = row/bs; \ 133 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 134 rmax = aimax[brow]; nrow = ailen[brow]; \ 135 bcol = col/bs; \ 136 ridx = row % bs; cidx = col % bs; \ 137 low = 0; high = nrow; \ 138 while (high-low > 3) { \ 139 t = (low+high)/2; \ 140 if (rp[t] > bcol) high = t; \ 141 else low = t; \ 142 } \ 143 for (_i=low; _i<high; _i++) { \ 144 if (rp[_i] > bcol) break; \ 145 if (rp[_i] == bcol) { \ 146 bap = ap + bs2*_i + bs*cidx + ridx; \ 147 if (addv == ADD_VALUES) *bap += value; \ 148 else *bap = value; \ 149 goto a_noinsert; \ 150 } \ 151 } \ 152 if (a->nonew == 1) goto a_noinsert; \ 153 else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 154 if (nrow >= rmax) { \ 155 /* there is no extra room in row, therefore enlarge */ \ 156 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 157 MatScalar *new_a; \ 158 \ 159 if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 160 \ 161 /* malloc new storage space */ \ 162 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); \ 163 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 164 new_j = (PetscInt*)(new_a + bs2*new_nz); \ 165 new_i = new_j + new_nz; \ 166 \ 167 /* copy over old data into new slots */ \ 168 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 169 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 170 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \ 171 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 172 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \ 173 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 174 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 175 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 176 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 177 /* free up old matrix storage */ \ 178 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 179 if (!a->singlemalloc) { \ 180 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 181 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 182 } \ 183 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 184 a->singlemalloc = PETSC_TRUE; \ 185 \ 186 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 187 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 188 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \ 189 a->maxnz += bs2*CHUNKSIZE; \ 190 a->reallocs++; \ 191 a->nz++; \ 192 } \ 193 N = nrow++ - 1; \ 194 /* shift up all the later entries in this row */ \ 195 for (ii=N; ii>=_i; ii--) { \ 196 rp[ii+1] = rp[ii]; \ 197 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 198 } \ 199 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 200 rp[_i] = bcol; \ 201 ap[bs2*_i + bs*cidx + ridx] = value; \ 202 a_noinsert:; \ 203 ailen[brow] = nrow; \ 204 } 205 206 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 207 { \ 208 brow = row/bs; \ 209 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 210 rmax = bimax[brow]; nrow = bilen[brow]; \ 211 bcol = col/bs; \ 212 ridx = row % bs; cidx = col % bs; \ 213 low = 0; high = nrow; \ 214 while (high-low > 3) { \ 215 t = (low+high)/2; \ 216 if (rp[t] > bcol) high = t; \ 217 else low = t; \ 218 } \ 219 for (_i=low; _i<high; _i++) { \ 220 if (rp[_i] > bcol) break; \ 221 if (rp[_i] == bcol) { \ 222 bap = ap + bs2*_i + bs*cidx + ridx; \ 223 if (addv == ADD_VALUES) *bap += value; \ 224 else *bap = value; \ 225 goto b_noinsert; \ 226 } \ 227 } \ 228 if (b->nonew == 1) goto b_noinsert; \ 229 else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 230 if (nrow >= rmax) { \ 231 /* there is no extra room in row, therefore enlarge */ \ 232 PetscInt new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 233 MatScalar *new_a; \ 234 \ 235 if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 236 \ 237 /* malloc new storage space */ \ 238 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(PetscInt); \ 239 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 240 new_j = (PetscInt*)(new_a + bs2*new_nz); \ 241 new_i = new_j + new_nz; \ 242 \ 243 /* copy over old data into new slots */ \ 244 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 245 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 246 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \ 247 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 248 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \ 249 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 250 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 251 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 252 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 253 /* free up old matrix storage */ \ 254 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 255 if (!b->singlemalloc) { \ 256 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 257 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 258 } \ 259 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 260 b->singlemalloc = PETSC_TRUE; \ 261 \ 262 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 263 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 264 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \ 265 b->maxnz += bs2*CHUNKSIZE; \ 266 b->reallocs++; \ 267 b->nz++; \ 268 } \ 269 N = nrow++ - 1; \ 270 /* shift up all the later entries in this row */ \ 271 for (ii=N; ii>=_i; ii--) { \ 272 rp[ii+1] = rp[ii]; \ 273 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 274 } \ 275 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 276 rp[_i] = bcol; \ 277 ap[bs2*_i + bs*cidx + ridx] = value; \ 278 b_noinsert:; \ 279 bilen[brow] = nrow; \ 280 } 281 282 #if defined(PETSC_USE_MAT_SINGLE) 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatSetValues_MPIBAIJ" 285 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 286 { 287 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 288 PetscErrorCode ierr; 289 PetscInt i,N = m*n; 290 MatScalar *vsingle; 291 292 PetscFunctionBegin; 293 if (N > b->setvalueslen) { 294 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 295 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 296 b->setvalueslen = N; 297 } 298 vsingle = b->setvaluescopy; 299 300 for (i=0; i<N; i++) { 301 vsingle[i] = v[i]; 302 } 303 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 304 PetscFunctionReturn(0); 305 } 306 307 #undef __FUNCT__ 308 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 309 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 310 { 311 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 312 PetscErrorCode ierr; 313 PetscInt i,N = m*n*b->bs2; 314 MatScalar *vsingle; 315 316 PetscFunctionBegin; 317 if (N > b->setvalueslen) { 318 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 319 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 320 b->setvalueslen = N; 321 } 322 vsingle = b->setvaluescopy; 323 for (i=0; i<N; i++) { 324 vsingle[i] = v[i]; 325 } 326 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 327 PetscFunctionReturn(0); 328 } 329 330 #undef __FUNCT__ 331 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 332 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 333 { 334 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 335 PetscErrorCode ierr; 336 PetscInt i,N = m*n; 337 MatScalar *vsingle; 338 339 PetscFunctionBegin; 340 if (N > b->setvalueslen) { 341 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 342 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 343 b->setvalueslen = N; 344 } 345 vsingle = b->setvaluescopy; 346 for (i=0; i<N; i++) { 347 vsingle[i] = v[i]; 348 } 349 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 350 PetscFunctionReturn(0); 351 } 352 353 #undef __FUNCT__ 354 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 355 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 356 { 357 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 358 PetscErrorCode ierr; 359 PetscInt i,N = m*n*b->bs2; 360 MatScalar *vsingle; 361 362 PetscFunctionBegin; 363 if (N > b->setvalueslen) { 364 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 365 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 366 b->setvalueslen = N; 367 } 368 vsingle = b->setvaluescopy; 369 for (i=0; i<N; i++) { 370 vsingle[i] = v[i]; 371 } 372 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 373 PetscFunctionReturn(0); 374 } 375 #endif 376 377 #undef __FUNCT__ 378 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 379 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 380 { 381 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 382 MatScalar value; 383 PetscTruth roworiented = baij->roworiented; 384 PetscErrorCode ierr; 385 PetscInt i,j,row,col; 386 PetscInt rstart_orig=baij->rstart_bs; 387 PetscInt rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 388 PetscInt cend_orig=baij->cend_bs,bs=mat->bs; 389 390 /* Some Variables required in the macro */ 391 Mat A = baij->A; 392 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 393 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 394 MatScalar *aa=a->a; 395 396 Mat B = baij->B; 397 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 398 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 399 MatScalar *ba=b->a; 400 401 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 402 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 403 MatScalar *ap,*bap; 404 405 PetscFunctionBegin; 406 for (i=0; i<m; i++) { 407 if (im[i] < 0) continue; 408 #if defined(PETSC_USE_BOPT_g) 409 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 410 #endif 411 if (im[i] >= rstart_orig && im[i] < rend_orig) { 412 row = im[i] - rstart_orig; 413 for (j=0; j<n; j++) { 414 if (in[j] >= cstart_orig && in[j] < cend_orig){ 415 col = in[j] - cstart_orig; 416 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 417 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 418 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 419 } else if (in[j] < 0) continue; 420 #if defined(PETSC_USE_BOPT_g) 421 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->N-1);} 422 #endif 423 else { 424 if (mat->was_assembled) { 425 if (!baij->colmap) { 426 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 427 } 428 #if defined (PETSC_USE_CTABLE) 429 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 430 col = col - 1; 431 #else 432 col = baij->colmap[in[j]/bs] - 1; 433 #endif 434 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 435 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 436 col = in[j]; 437 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 438 B = baij->B; 439 b = (Mat_SeqBAIJ*)(B)->data; 440 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 441 ba=b->a; 442 } else col += in[j]%bs; 443 } else col = in[j]; 444 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 445 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 446 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 447 } 448 } 449 } else { 450 if (!baij->donotstash) { 451 if (roworiented) { 452 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 453 } else { 454 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 455 } 456 } 457 } 458 } 459 PetscFunctionReturn(0); 460 } 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 464 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 465 { 466 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 467 const MatScalar *value; 468 MatScalar *barray=baij->barray; 469 PetscTruth roworiented = baij->roworiented; 470 PetscErrorCode ierr; 471 PetscInt i,j,ii,jj,row,col,rstart=baij->rstart; 472 PetscInt rend=baij->rend,cstart=baij->cstart,stepval; 473 PetscInt cend=baij->cend,bs=mat->bs,bs2=baij->bs2; 474 475 PetscFunctionBegin; 476 if(!barray) { 477 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 478 baij->barray = barray; 479 } 480 481 if (roworiented) { 482 stepval = (n-1)*bs; 483 } else { 484 stepval = (m-1)*bs; 485 } 486 for (i=0; i<m; i++) { 487 if (im[i] < 0) continue; 488 #if defined(PETSC_USE_BOPT_g) 489 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 490 #endif 491 if (im[i] >= rstart && im[i] < rend) { 492 row = im[i] - rstart; 493 for (j=0; j<n; j++) { 494 /* If NumCol = 1 then a copy is not required */ 495 if ((roworiented) && (n == 1)) { 496 barray = (MatScalar*)v + i*bs2; 497 } else if((!roworiented) && (m == 1)) { 498 barray = (MatScalar*)v + j*bs2; 499 } else { /* Here a copy is required */ 500 if (roworiented) { 501 value = v + i*(stepval+bs)*bs + j*bs; 502 } else { 503 value = v + j*(stepval+bs)*bs + i*bs; 504 } 505 for (ii=0; ii<bs; ii++,value+=stepval) { 506 for (jj=0; jj<bs; jj++) { 507 *barray++ = *value++; 508 } 509 } 510 barray -=bs2; 511 } 512 513 if (in[j] >= cstart && in[j] < cend){ 514 col = in[j] - cstart; 515 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 516 } 517 else if (in[j] < 0) continue; 518 #if defined(PETSC_USE_BOPT_g) 519 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 520 #endif 521 else { 522 if (mat->was_assembled) { 523 if (!baij->colmap) { 524 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 525 } 526 527 #if defined(PETSC_USE_BOPT_g) 528 #if defined (PETSC_USE_CTABLE) 529 { PetscInt data; 530 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 531 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 532 } 533 #else 534 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 535 #endif 536 #endif 537 #if defined (PETSC_USE_CTABLE) 538 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 539 col = (col - 1)/bs; 540 #else 541 col = (baij->colmap[in[j]] - 1)/bs; 542 #endif 543 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 544 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 545 col = in[j]; 546 } 547 } 548 else col = in[j]; 549 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 550 } 551 } 552 } else { 553 if (!baij->donotstash) { 554 if (roworiented) { 555 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 556 } else { 557 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 558 } 559 } 560 } 561 } 562 PetscFunctionReturn(0); 563 } 564 565 #define HASH_KEY 0.6180339887 566 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 567 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 568 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 569 #undef __FUNCT__ 570 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 571 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 572 { 573 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 574 PetscTruth roworiented = baij->roworiented; 575 PetscErrorCode ierr; 576 PetscInt i,j,row,col; 577 PetscInt rstart_orig=baij->rstart_bs; 578 PetscInt rend_orig=baij->rend_bs,Nbs=baij->Nbs; 579 PetscInt h1,key,size=baij->ht_size,bs=mat->bs,*HT=baij->ht,idx; 580 PetscReal tmp; 581 MatScalar **HD = baij->hd,value; 582 #if defined(PETSC_USE_BOPT_g) 583 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 584 #endif 585 586 PetscFunctionBegin; 587 588 for (i=0; i<m; i++) { 589 #if defined(PETSC_USE_BOPT_g) 590 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 591 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 592 #endif 593 row = im[i]; 594 if (row >= rstart_orig && row < rend_orig) { 595 for (j=0; j<n; j++) { 596 col = in[j]; 597 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 598 /* Look up PetscInto the Hash Table */ 599 key = (row/bs)*Nbs+(col/bs)+1; 600 h1 = HASH(size,key,tmp); 601 602 603 idx = h1; 604 #if defined(PETSC_USE_BOPT_g) 605 insert_ct++; 606 total_ct++; 607 if (HT[idx] != key) { 608 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 609 if (idx == size) { 610 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 611 if (idx == h1) { 612 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 613 } 614 } 615 } 616 #else 617 if (HT[idx] != key) { 618 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 619 if (idx == size) { 620 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 621 if (idx == h1) { 622 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 623 } 624 } 625 } 626 #endif 627 /* A HASH table entry is found, so insert the values at the correct address */ 628 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 629 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 630 } 631 } else { 632 if (!baij->donotstash) { 633 if (roworiented) { 634 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 635 } else { 636 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 637 } 638 } 639 } 640 } 641 #if defined(PETSC_USE_BOPT_g) 642 baij->ht_total_ct = total_ct; 643 baij->ht_insert_ct = insert_ct; 644 #endif 645 PetscFunctionReturn(0); 646 } 647 648 #undef __FUNCT__ 649 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 650 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 651 { 652 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 653 PetscTruth roworiented = baij->roworiented; 654 PetscErrorCode ierr; 655 PetscInt i,j,ii,jj,row,col; 656 PetscInt rstart=baij->rstart ; 657 PetscInt rend=baij->rend,stepval,bs=mat->bs,bs2=baij->bs2; 658 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 659 PetscReal tmp; 660 MatScalar **HD = baij->hd,*baij_a; 661 const MatScalar *v_t,*value; 662 #if defined(PETSC_USE_BOPT_g) 663 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 664 #endif 665 666 PetscFunctionBegin; 667 668 if (roworiented) { 669 stepval = (n-1)*bs; 670 } else { 671 stepval = (m-1)*bs; 672 } 673 for (i=0; i<m; i++) { 674 #if defined(PETSC_USE_BOPT_g) 675 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 676 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 677 #endif 678 row = im[i]; 679 v_t = v + i*bs2; 680 if (row >= rstart && row < rend) { 681 for (j=0; j<n; j++) { 682 col = in[j]; 683 684 /* Look up into the Hash Table */ 685 key = row*Nbs+col+1; 686 h1 = HASH(size,key,tmp); 687 688 idx = h1; 689 #if defined(PETSC_USE_BOPT_g) 690 total_ct++; 691 insert_ct++; 692 if (HT[idx] != key) { 693 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 694 if (idx == size) { 695 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 696 if (idx == h1) { 697 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 698 } 699 } 700 } 701 #else 702 if (HT[idx] != key) { 703 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 704 if (idx == size) { 705 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 706 if (idx == h1) { 707 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 708 } 709 } 710 } 711 #endif 712 baij_a = HD[idx]; 713 if (roworiented) { 714 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 715 /* value = v + (i*(stepval+bs)+j)*bs; */ 716 value = v_t; 717 v_t += bs; 718 if (addv == ADD_VALUES) { 719 for (ii=0; ii<bs; ii++,value+=stepval) { 720 for (jj=ii; jj<bs2; jj+=bs) { 721 baij_a[jj] += *value++; 722 } 723 } 724 } else { 725 for (ii=0; ii<bs; ii++,value+=stepval) { 726 for (jj=ii; jj<bs2; jj+=bs) { 727 baij_a[jj] = *value++; 728 } 729 } 730 } 731 } else { 732 value = v + j*(stepval+bs)*bs + i*bs; 733 if (addv == ADD_VALUES) { 734 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 735 for (jj=0; jj<bs; jj++) { 736 baij_a[jj] += *value++; 737 } 738 } 739 } else { 740 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 741 for (jj=0; jj<bs; jj++) { 742 baij_a[jj] = *value++; 743 } 744 } 745 } 746 } 747 } 748 } else { 749 if (!baij->donotstash) { 750 if (roworiented) { 751 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 752 } else { 753 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 754 } 755 } 756 } 757 } 758 #if defined(PETSC_USE_BOPT_g) 759 baij->ht_total_ct = total_ct; 760 baij->ht_insert_ct = insert_ct; 761 #endif 762 PetscFunctionReturn(0); 763 } 764 765 #undef __FUNCT__ 766 #define __FUNCT__ "MatGetValues_MPIBAIJ" 767 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 768 { 769 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 770 PetscErrorCode ierr; 771 PetscInt bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 772 PetscInt bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 773 774 PetscFunctionBegin; 775 for (i=0; i<m; i++) { 776 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 777 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 778 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 779 row = idxm[i] - bsrstart; 780 for (j=0; j<n; j++) { 781 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 782 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 783 if (idxn[j] >= bscstart && idxn[j] < bscend){ 784 col = idxn[j] - bscstart; 785 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 786 } else { 787 if (!baij->colmap) { 788 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 789 } 790 #if defined (PETSC_USE_CTABLE) 791 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 792 data --; 793 #else 794 data = baij->colmap[idxn[j]/bs]-1; 795 #endif 796 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 797 else { 798 col = data + idxn[j]%bs; 799 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 800 } 801 } 802 } 803 } else { 804 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 805 } 806 } 807 PetscFunctionReturn(0); 808 } 809 810 #undef __FUNCT__ 811 #define __FUNCT__ "MatNorm_MPIBAIJ" 812 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 813 { 814 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 815 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 816 PetscErrorCode ierr; 817 PetscInt i,bs2=baij->bs2; 818 PetscReal sum = 0.0; 819 MatScalar *v; 820 821 PetscFunctionBegin; 822 if (baij->size == 1) { 823 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 824 } else { 825 if (type == NORM_FROBENIUS) { 826 v = amat->a; 827 for (i=0; i<amat->nz*bs2; i++) { 828 #if defined(PETSC_USE_COMPLEX) 829 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 830 #else 831 sum += (*v)*(*v); v++; 832 #endif 833 } 834 v = bmat->a; 835 for (i=0; i<bmat->nz*bs2; i++) { 836 #if defined(PETSC_USE_COMPLEX) 837 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 838 #else 839 sum += (*v)*(*v); v++; 840 #endif 841 } 842 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 843 *nrm = sqrt(*nrm); 844 } else { 845 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 846 } 847 } 848 PetscFunctionReturn(0); 849 } 850 851 852 /* 853 Creates the hash table, and sets the table 854 This table is created only once. 855 If new entried need to be added to the matrix 856 then the hash table has to be destroyed and 857 recreated. 858 */ 859 #undef __FUNCT__ 860 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 861 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 862 { 863 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 864 Mat A = baij->A,B=baij->B; 865 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 866 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 867 PetscErrorCode ierr; 868 PetscInt size,bs2=baij->bs2,rstart=baij->rstart; 869 PetscInt cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 870 PetscInt *HT,key; 871 MatScalar **HD; 872 PetscReal tmp; 873 #if defined(PETSC_USE_BOPT_g) 874 PetscInt ct=0,max=0; 875 #endif 876 877 PetscFunctionBegin; 878 baij->ht_size=(PetscInt)(factor*nz); 879 size = baij->ht_size; 880 881 if (baij->ht) { 882 PetscFunctionReturn(0); 883 } 884 885 /* Allocate Memory for Hash Table */ 886 ierr = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 887 baij->ht = (PetscInt*)(baij->hd + size); 888 HD = baij->hd; 889 HT = baij->ht; 890 891 892 ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr); 893 894 895 /* Loop Over A */ 896 for (i=0; i<a->mbs; i++) { 897 for (j=ai[i]; j<ai[i+1]; j++) { 898 row = i+rstart; 899 col = aj[j]+cstart; 900 901 key = row*Nbs + col + 1; 902 h1 = HASH(size,key,tmp); 903 for (k=0; k<size; k++){ 904 if (!HT[(h1+k)%size]) { 905 HT[(h1+k)%size] = key; 906 HD[(h1+k)%size] = a->a + j*bs2; 907 break; 908 #if defined(PETSC_USE_BOPT_g) 909 } else { 910 ct++; 911 #endif 912 } 913 } 914 #if defined(PETSC_USE_BOPT_g) 915 if (k> max) max = k; 916 #endif 917 } 918 } 919 /* Loop Over B */ 920 for (i=0; i<b->mbs; i++) { 921 for (j=bi[i]; j<bi[i+1]; j++) { 922 row = i+rstart; 923 col = garray[bj[j]]; 924 key = row*Nbs + col + 1; 925 h1 = HASH(size,key,tmp); 926 for (k=0; k<size; k++){ 927 if (!HT[(h1+k)%size]) { 928 HT[(h1+k)%size] = key; 929 HD[(h1+k)%size] = b->a + j*bs2; 930 break; 931 #if defined(PETSC_USE_BOPT_g) 932 } else { 933 ct++; 934 #endif 935 } 936 } 937 #if defined(PETSC_USE_BOPT_g) 938 if (k> max) max = k; 939 #endif 940 } 941 } 942 943 /* Print Summary */ 944 #if defined(PETSC_USE_BOPT_g) 945 for (i=0,j=0; i<size; i++) { 946 if (HT[i]) {j++;} 947 } 948 PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max); 949 #endif 950 PetscFunctionReturn(0); 951 } 952 953 #undef __FUNCT__ 954 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 955 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 956 { 957 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 958 PetscErrorCode ierr; 959 PetscInt nstash,reallocs; 960 InsertMode addv; 961 962 PetscFunctionBegin; 963 if (baij->donotstash) { 964 PetscFunctionReturn(0); 965 } 966 967 /* make sure all processors are either in INSERTMODE or ADDMODE */ 968 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 969 if (addv == (ADD_VALUES|INSERT_VALUES)) { 970 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 971 } 972 mat->insertmode = addv; /* in case this processor had no cache */ 973 974 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 975 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 976 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 977 PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs); 978 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 979 PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs); 980 PetscFunctionReturn(0); 981 } 982 983 #undef __FUNCT__ 984 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 985 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 986 { 987 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 988 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 989 PetscErrorCode ierr; 990 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 991 PetscInt *row,*col,other_disassembled; 992 PetscTruth r1,r2,r3; 993 MatScalar *val; 994 InsertMode addv = mat->insertmode; 995 PetscMPIInt n; 996 997 PetscFunctionBegin; 998 if (!baij->donotstash) { 999 while (1) { 1000 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1001 if (!flg) break; 1002 1003 for (i=0; i<n;) { 1004 /* Now identify the consecutive vals belonging to the same row */ 1005 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1006 if (j < n) ncols = j-i; 1007 else ncols = n-i; 1008 /* Now assemble all these values with a single function call */ 1009 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 1010 i = j; 1011 } 1012 } 1013 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1014 /* Now process the block-stash. Since the values are stashed column-oriented, 1015 set the roworiented flag to column oriented, and after MatSetValues() 1016 restore the original flags */ 1017 r1 = baij->roworiented; 1018 r2 = a->roworiented; 1019 r3 = b->roworiented; 1020 baij->roworiented = PETSC_FALSE; 1021 a->roworiented = PETSC_FALSE; 1022 b->roworiented = PETSC_FALSE; 1023 while (1) { 1024 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1025 if (!flg) break; 1026 1027 for (i=0; i<n;) { 1028 /* Now identify the consecutive vals belonging to the same row */ 1029 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1030 if (j < n) ncols = j-i; 1031 else ncols = n-i; 1032 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1033 i = j; 1034 } 1035 } 1036 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1037 baij->roworiented = r1; 1038 a->roworiented = r2; 1039 b->roworiented = r3; 1040 } 1041 1042 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1043 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1044 1045 /* determine if any processor has disassembled, if so we must 1046 also disassemble ourselfs, in order that we may reassemble. */ 1047 /* 1048 if nonzero structure of submatrix B cannot change then we know that 1049 no processor disassembled thus we can skip this stuff 1050 */ 1051 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1052 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1053 if (mat->was_assembled && !other_disassembled) { 1054 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1055 } 1056 } 1057 1058 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1059 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1060 } 1061 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1062 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1063 1064 #if defined(PETSC_USE_BOPT_g) 1065 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1066 PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 1067 baij->ht_total_ct = 0; 1068 baij->ht_insert_ct = 0; 1069 } 1070 #endif 1071 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1072 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1073 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1074 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1075 } 1076 1077 if (baij->rowvalues) { 1078 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1079 baij->rowvalues = 0; 1080 } 1081 PetscFunctionReturn(0); 1082 } 1083 1084 #undef __FUNCT__ 1085 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1086 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1087 { 1088 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1089 PetscErrorCode ierr; 1090 PetscMPIInt size = baij->size,rank = baij->rank; 1091 PetscInt bs = mat->bs; 1092 PetscTruth iascii,isdraw; 1093 PetscViewer sviewer; 1094 PetscViewerFormat format; 1095 1096 PetscFunctionBegin; 1097 /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */ 1098 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1099 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1100 if (iascii) { 1101 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1102 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1103 MatInfo info; 1104 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1105 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1106 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 1107 rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 1108 mat->bs,(PetscInt)info.memory);CHKERRQ(ierr); 1109 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1110 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1111 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1112 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1113 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1114 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1117 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1118 PetscFunctionReturn(0); 1119 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1120 PetscFunctionReturn(0); 1121 } 1122 } 1123 1124 if (isdraw) { 1125 PetscDraw draw; 1126 PetscTruth isnull; 1127 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1128 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1129 } 1130 1131 if (size == 1) { 1132 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 1133 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1134 } else { 1135 /* assemble the entire matrix onto first processor. */ 1136 Mat A; 1137 Mat_SeqBAIJ *Aloc; 1138 PetscInt M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1139 MatScalar *a; 1140 1141 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1142 /* Perhaps this should be the type of mat? */ 1143 if (!rank) { 1144 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 1145 } else { 1146 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 1147 } 1148 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1149 ierr = MatMPIBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1150 PetscLogObjectParent(mat,A); 1151 1152 /* copy over the A part */ 1153 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1154 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1155 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1156 1157 for (i=0; i<mbs; i++) { 1158 rvals[0] = bs*(baij->rstart + i); 1159 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1160 for (j=ai[i]; j<ai[i+1]; j++) { 1161 col = (baij->cstart+aj[j])*bs; 1162 for (k=0; k<bs; k++) { 1163 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1164 col++; a += bs; 1165 } 1166 } 1167 } 1168 /* copy over the B part */ 1169 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1170 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1171 for (i=0; i<mbs; i++) { 1172 rvals[0] = bs*(baij->rstart + i); 1173 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1174 for (j=ai[i]; j<ai[i+1]; j++) { 1175 col = baij->garray[aj[j]]*bs; 1176 for (k=0; k<bs; k++) { 1177 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1178 col++; a += bs; 1179 } 1180 } 1181 } 1182 ierr = PetscFree(rvals);CHKERRQ(ierr); 1183 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1184 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1185 /* 1186 Everyone has to call to draw the matrix since the graphics waits are 1187 synchronized across all processors that share the PetscDraw object 1188 */ 1189 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1190 if (!rank) { 1191 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1192 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1193 } 1194 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1195 ierr = MatDestroy(A);CHKERRQ(ierr); 1196 } 1197 PetscFunctionReturn(0); 1198 } 1199 1200 #undef __FUNCT__ 1201 #define __FUNCT__ "MatView_MPIBAIJ" 1202 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1203 { 1204 PetscErrorCode ierr; 1205 PetscTruth iascii,isdraw,issocket,isbinary; 1206 1207 PetscFunctionBegin; 1208 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1209 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1210 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1211 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1212 if (iascii || isdraw || issocket || isbinary) { 1213 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1214 } else { 1215 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1216 } 1217 PetscFunctionReturn(0); 1218 } 1219 1220 #undef __FUNCT__ 1221 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1222 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1223 { 1224 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1225 PetscErrorCode ierr; 1226 1227 PetscFunctionBegin; 1228 #if defined(PETSC_USE_LOG) 1229 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N); 1230 #endif 1231 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1232 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1233 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1234 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1235 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1236 #if defined (PETSC_USE_CTABLE) 1237 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1238 #else 1239 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1240 #endif 1241 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1242 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1243 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1244 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1245 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1246 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1247 #if defined(PETSC_USE_MAT_SINGLE) 1248 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 1249 #endif 1250 ierr = PetscFree(baij);CHKERRQ(ierr); 1251 1252 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1253 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1254 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1255 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1256 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1257 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1258 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatMult_MPIBAIJ" 1264 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1265 { 1266 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1267 PetscErrorCode ierr; 1268 PetscInt nt; 1269 1270 PetscFunctionBegin; 1271 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1272 if (nt != A->n) { 1273 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1274 } 1275 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1276 if (nt != A->m) { 1277 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1278 } 1279 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1280 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1281 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1282 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1283 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1284 PetscFunctionReturn(0); 1285 } 1286 1287 #undef __FUNCT__ 1288 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1289 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1290 { 1291 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1292 PetscErrorCode ierr; 1293 1294 PetscFunctionBegin; 1295 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1296 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1297 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1298 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1299 PetscFunctionReturn(0); 1300 } 1301 1302 #undef __FUNCT__ 1303 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1304 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1305 { 1306 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1307 PetscErrorCode ierr; 1308 1309 PetscFunctionBegin; 1310 /* do nondiagonal part */ 1311 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1312 /* send it on its way */ 1313 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1314 /* do local part */ 1315 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1316 /* receive remote parts: note this assumes the values are not actually */ 1317 /* inserted in yy until the next line, which is true for my implementation*/ 1318 /* but is not perhaps always true. */ 1319 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1320 PetscFunctionReturn(0); 1321 } 1322 1323 #undef __FUNCT__ 1324 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1325 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1326 { 1327 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1328 PetscErrorCode ierr; 1329 1330 PetscFunctionBegin; 1331 /* do nondiagonal part */ 1332 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1333 /* send it on its way */ 1334 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1335 /* do local part */ 1336 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1337 /* receive remote parts: note this assumes the values are not actually */ 1338 /* inserted in yy until the next line, which is true for my implementation*/ 1339 /* but is not perhaps always true. */ 1340 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1341 PetscFunctionReturn(0); 1342 } 1343 1344 /* 1345 This only works correctly for square matrices where the subblock A->A is the 1346 diagonal block 1347 */ 1348 #undef __FUNCT__ 1349 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1350 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1351 { 1352 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1353 PetscErrorCode ierr; 1354 1355 PetscFunctionBegin; 1356 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1357 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1358 PetscFunctionReturn(0); 1359 } 1360 1361 #undef __FUNCT__ 1362 #define __FUNCT__ "MatScale_MPIBAIJ" 1363 PetscErrorCode MatScale_MPIBAIJ(const PetscScalar *aa,Mat A) 1364 { 1365 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1366 PetscErrorCode ierr; 1367 1368 PetscFunctionBegin; 1369 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1370 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1371 PetscFunctionReturn(0); 1372 } 1373 1374 #undef __FUNCT__ 1375 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1376 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1377 { 1378 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1379 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1380 PetscErrorCode ierr; 1381 PetscInt bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1382 PetscInt nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1383 PetscInt *cmap,*idx_p,cstart = mat->cstart; 1384 1385 PetscFunctionBegin; 1386 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1387 mat->getrowactive = PETSC_TRUE; 1388 1389 if (!mat->rowvalues && (idx || v)) { 1390 /* 1391 allocate enough space to hold information from the longest row. 1392 */ 1393 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1394 PetscInt max = 1,mbs = mat->mbs,tmp; 1395 for (i=0; i<mbs; i++) { 1396 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1397 if (max < tmp) { max = tmp; } 1398 } 1399 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1400 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1401 } 1402 1403 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1404 lrow = row - brstart; 1405 1406 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1407 if (!v) {pvA = 0; pvB = 0;} 1408 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1409 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1410 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1411 nztot = nzA + nzB; 1412 1413 cmap = mat->garray; 1414 if (v || idx) { 1415 if (nztot) { 1416 /* Sort by increasing column numbers, assuming A and B already sorted */ 1417 PetscInt imark = -1; 1418 if (v) { 1419 *v = v_p = mat->rowvalues; 1420 for (i=0; i<nzB; i++) { 1421 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1422 else break; 1423 } 1424 imark = i; 1425 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1426 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1427 } 1428 if (idx) { 1429 *idx = idx_p = mat->rowindices; 1430 if (imark > -1) { 1431 for (i=0; i<imark; i++) { 1432 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1433 } 1434 } else { 1435 for (i=0; i<nzB; i++) { 1436 if (cmap[cworkB[i]/bs] < cstart) 1437 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1438 else break; 1439 } 1440 imark = i; 1441 } 1442 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1443 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1444 } 1445 } else { 1446 if (idx) *idx = 0; 1447 if (v) *v = 0; 1448 } 1449 } 1450 *nz = nztot; 1451 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1452 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1453 PetscFunctionReturn(0); 1454 } 1455 1456 #undef __FUNCT__ 1457 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1458 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1459 { 1460 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1461 1462 PetscFunctionBegin; 1463 if (baij->getrowactive == PETSC_FALSE) { 1464 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1465 } 1466 baij->getrowactive = PETSC_FALSE; 1467 PetscFunctionReturn(0); 1468 } 1469 1470 #undef __FUNCT__ 1471 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1472 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1473 { 1474 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1475 PetscErrorCode ierr; 1476 1477 PetscFunctionBegin; 1478 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1479 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1480 PetscFunctionReturn(0); 1481 } 1482 1483 #undef __FUNCT__ 1484 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1485 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1486 { 1487 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1488 Mat A = a->A,B = a->B; 1489 PetscErrorCode ierr; 1490 PetscReal isend[5],irecv[5]; 1491 1492 PetscFunctionBegin; 1493 info->block_size = (PetscReal)matin->bs; 1494 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1495 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1496 isend[3] = info->memory; isend[4] = info->mallocs; 1497 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1498 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1499 isend[3] += info->memory; isend[4] += info->mallocs; 1500 if (flag == MAT_LOCAL) { 1501 info->nz_used = isend[0]; 1502 info->nz_allocated = isend[1]; 1503 info->nz_unneeded = isend[2]; 1504 info->memory = isend[3]; 1505 info->mallocs = isend[4]; 1506 } else if (flag == MAT_GLOBAL_MAX) { 1507 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1508 info->nz_used = irecv[0]; 1509 info->nz_allocated = irecv[1]; 1510 info->nz_unneeded = irecv[2]; 1511 info->memory = irecv[3]; 1512 info->mallocs = irecv[4]; 1513 } else if (flag == MAT_GLOBAL_SUM) { 1514 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1515 info->nz_used = irecv[0]; 1516 info->nz_allocated = irecv[1]; 1517 info->nz_unneeded = irecv[2]; 1518 info->memory = irecv[3]; 1519 info->mallocs = irecv[4]; 1520 } else { 1521 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1522 } 1523 info->rows_global = (PetscReal)A->M; 1524 info->columns_global = (PetscReal)A->N; 1525 info->rows_local = (PetscReal)A->m; 1526 info->columns_local = (PetscReal)A->N; 1527 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1528 info->fill_ratio_needed = 0; 1529 info->factor_mallocs = 0; 1530 PetscFunctionReturn(0); 1531 } 1532 1533 #undef __FUNCT__ 1534 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1535 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op) 1536 { 1537 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1538 PetscErrorCode ierr; 1539 1540 PetscFunctionBegin; 1541 switch (op) { 1542 case MAT_NO_NEW_NONZERO_LOCATIONS: 1543 case MAT_YES_NEW_NONZERO_LOCATIONS: 1544 case MAT_COLUMNS_UNSORTED: 1545 case MAT_COLUMNS_SORTED: 1546 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1547 case MAT_KEEP_ZEROED_ROWS: 1548 case MAT_NEW_NONZERO_LOCATION_ERR: 1549 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1550 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1551 break; 1552 case MAT_ROW_ORIENTED: 1553 a->roworiented = PETSC_TRUE; 1554 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1555 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1556 break; 1557 case MAT_ROWS_SORTED: 1558 case MAT_ROWS_UNSORTED: 1559 case MAT_YES_NEW_DIAGONALS: 1560 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1561 break; 1562 case MAT_COLUMN_ORIENTED: 1563 a->roworiented = PETSC_FALSE; 1564 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1565 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1566 break; 1567 case MAT_IGNORE_OFF_PROC_ENTRIES: 1568 a->donotstash = PETSC_TRUE; 1569 break; 1570 case MAT_NO_NEW_DIAGONALS: 1571 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1572 case MAT_USE_HASH_TABLE: 1573 a->ht_flag = PETSC_TRUE; 1574 break; 1575 case MAT_SYMMETRIC: 1576 case MAT_STRUCTURALLY_SYMMETRIC: 1577 case MAT_HERMITIAN: 1578 case MAT_SYMMETRY_ETERNAL: 1579 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1580 break; 1581 case MAT_NOT_SYMMETRIC: 1582 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1583 case MAT_NOT_HERMITIAN: 1584 case MAT_NOT_SYMMETRY_ETERNAL: 1585 break; 1586 default: 1587 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1588 } 1589 PetscFunctionReturn(0); 1590 } 1591 1592 #undef __FUNCT__ 1593 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1594 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1595 { 1596 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1597 Mat_SeqBAIJ *Aloc; 1598 Mat B; 1599 PetscErrorCode ierr; 1600 PetscInt M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col; 1601 PetscInt bs=A->bs,mbs=baij->mbs; 1602 MatScalar *a; 1603 1604 PetscFunctionBegin; 1605 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1606 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1607 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1608 ierr = MatMPIBAIJSetPreallocation(B,A->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1609 1610 /* copy over the A part */ 1611 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1612 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1613 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1614 1615 for (i=0; i<mbs; i++) { 1616 rvals[0] = bs*(baij->rstart + i); 1617 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1618 for (j=ai[i]; j<ai[i+1]; j++) { 1619 col = (baij->cstart+aj[j])*bs; 1620 for (k=0; k<bs; k++) { 1621 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1622 col++; a += bs; 1623 } 1624 } 1625 } 1626 /* copy over the B part */ 1627 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1628 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1629 for (i=0; i<mbs; i++) { 1630 rvals[0] = bs*(baij->rstart + i); 1631 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1632 for (j=ai[i]; j<ai[i+1]; j++) { 1633 col = baij->garray[aj[j]]*bs; 1634 for (k=0; k<bs; k++) { 1635 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1636 col++; a += bs; 1637 } 1638 } 1639 } 1640 ierr = PetscFree(rvals);CHKERRQ(ierr); 1641 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1642 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1643 1644 if (matout) { 1645 *matout = B; 1646 } else { 1647 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1648 } 1649 PetscFunctionReturn(0); 1650 } 1651 1652 #undef __FUNCT__ 1653 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1654 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1655 { 1656 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1657 Mat a = baij->A,b = baij->B; 1658 PetscErrorCode ierr; 1659 PetscInt s1,s2,s3; 1660 1661 PetscFunctionBegin; 1662 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1663 if (rr) { 1664 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1665 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1666 /* Overlap communication with computation. */ 1667 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1668 } 1669 if (ll) { 1670 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1671 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1672 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1673 } 1674 /* scale the diagonal block */ 1675 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1676 1677 if (rr) { 1678 /* Do a scatter end and then right scale the off-diagonal block */ 1679 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1680 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1681 } 1682 1683 PetscFunctionReturn(0); 1684 } 1685 1686 #undef __FUNCT__ 1687 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1688 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag) 1689 { 1690 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1691 PetscErrorCode ierr; 1692 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1693 PetscInt i,N,*rows,*owners = l->rowners; 1694 PetscInt *nprocs,j,idx,nsends,row; 1695 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1696 PetscInt *rvalues,tag = A->tag,count,base,slen,*source; 1697 PetscInt *lens,*lrows,*values,bs=A->bs,rstart_bs=l->rstart_bs; 1698 MPI_Comm comm = A->comm; 1699 MPI_Request *send_waits,*recv_waits; 1700 MPI_Status recv_status,*send_status; 1701 IS istmp; 1702 PetscTruth found; 1703 1704 PetscFunctionBegin; 1705 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 1706 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1707 1708 /* first count number of contributors to each processor */ 1709 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1710 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1711 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1712 for (i=0; i<N; i++) { 1713 idx = rows[i]; 1714 found = PETSC_FALSE; 1715 for (j=0; j<size; j++) { 1716 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1717 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 1718 } 1719 } 1720 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1721 } 1722 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1723 1724 /* inform other processors of number of messages and max length*/ 1725 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1726 1727 /* post receives: */ 1728 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1729 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1730 for (i=0; i<nrecvs; i++) { 1731 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1732 } 1733 1734 /* do sends: 1735 1) starts[i] gives the starting index in svalues for stuff going to 1736 the ith processor 1737 */ 1738 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1739 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1740 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1741 starts[0] = 0; 1742 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1743 for (i=0; i<N; i++) { 1744 svalues[starts[owner[i]]++] = rows[i]; 1745 } 1746 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1747 1748 starts[0] = 0; 1749 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1750 count = 0; 1751 for (i=0; i<size; i++) { 1752 if (nprocs[2*i+1]) { 1753 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1754 } 1755 } 1756 ierr = PetscFree(starts);CHKERRQ(ierr); 1757 1758 base = owners[rank]*bs; 1759 1760 /* wait on receives */ 1761 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 1762 source = lens + nrecvs; 1763 count = nrecvs; slen = 0; 1764 while (count) { 1765 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1766 /* unpack receives into our local space */ 1767 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1768 source[imdex] = recv_status.MPI_SOURCE; 1769 lens[imdex] = n; 1770 slen += n; 1771 count--; 1772 } 1773 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1774 1775 /* move the data into the send scatter */ 1776 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1777 count = 0; 1778 for (i=0; i<nrecvs; i++) { 1779 values = rvalues + i*nmax; 1780 for (j=0; j<lens[i]; j++) { 1781 lrows[count++] = values[j] - base; 1782 } 1783 } 1784 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1785 ierr = PetscFree(lens);CHKERRQ(ierr); 1786 ierr = PetscFree(owner);CHKERRQ(ierr); 1787 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1788 1789 /* actually zap the local rows */ 1790 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1791 PetscLogObjectParent(A,istmp); 1792 1793 /* 1794 Zero the required rows. If the "diagonal block" of the matrix 1795 is square and the user wishes to set the diagonal we use seperate 1796 code so that MatSetValues() is not called for each diagonal allocating 1797 new memory, thus calling lots of mallocs and slowing things down. 1798 1799 Contributed by: Mathew Knepley 1800 */ 1801 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1802 ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1803 if (diag && (l->A->M == l->A->N)) { 1804 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1805 } else if (diag) { 1806 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1807 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1808 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1809 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1810 } 1811 for (i=0; i<slen; i++) { 1812 row = lrows[i] + rstart_bs; 1813 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1814 } 1815 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1816 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1817 } else { 1818 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1819 } 1820 1821 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1822 ierr = PetscFree(lrows);CHKERRQ(ierr); 1823 1824 /* wait on sends */ 1825 if (nsends) { 1826 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1827 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1828 ierr = PetscFree(send_status);CHKERRQ(ierr); 1829 } 1830 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1831 ierr = PetscFree(svalues);CHKERRQ(ierr); 1832 1833 PetscFunctionReturn(0); 1834 } 1835 1836 #undef __FUNCT__ 1837 #define __FUNCT__ "MatPrintHelp_MPIBAIJ" 1838 PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A) 1839 { 1840 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1841 MPI_Comm comm = A->comm; 1842 static PetscTruth called = PETSC_FALSE; 1843 PetscErrorCode ierr; 1844 1845 PetscFunctionBegin; 1846 if (!a->rank) { 1847 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1848 } 1849 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1850 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1851 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1852 PetscFunctionReturn(0); 1853 } 1854 1855 #undef __FUNCT__ 1856 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1857 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1858 { 1859 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1860 PetscErrorCode ierr; 1861 1862 PetscFunctionBegin; 1863 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1864 PetscFunctionReturn(0); 1865 } 1866 1867 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1868 1869 #undef __FUNCT__ 1870 #define __FUNCT__ "MatEqual_MPIBAIJ" 1871 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1872 { 1873 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1874 Mat a,b,c,d; 1875 PetscTruth flg; 1876 PetscErrorCode ierr; 1877 1878 PetscFunctionBegin; 1879 a = matA->A; b = matA->B; 1880 c = matB->A; d = matB->B; 1881 1882 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1883 if (flg == PETSC_TRUE) { 1884 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1885 } 1886 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1887 PetscFunctionReturn(0); 1888 } 1889 1890 1891 #undef __FUNCT__ 1892 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1893 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1894 { 1895 PetscErrorCode ierr; 1896 1897 PetscFunctionBegin; 1898 ierr = MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1899 PetscFunctionReturn(0); 1900 } 1901 1902 /* -------------------------------------------------------------------*/ 1903 static struct _MatOps MatOps_Values = { 1904 MatSetValues_MPIBAIJ, 1905 MatGetRow_MPIBAIJ, 1906 MatRestoreRow_MPIBAIJ, 1907 MatMult_MPIBAIJ, 1908 /* 4*/ MatMultAdd_MPIBAIJ, 1909 MatMultTranspose_MPIBAIJ, 1910 MatMultTransposeAdd_MPIBAIJ, 1911 0, 1912 0, 1913 0, 1914 /*10*/ 0, 1915 0, 1916 0, 1917 0, 1918 MatTranspose_MPIBAIJ, 1919 /*15*/ MatGetInfo_MPIBAIJ, 1920 MatEqual_MPIBAIJ, 1921 MatGetDiagonal_MPIBAIJ, 1922 MatDiagonalScale_MPIBAIJ, 1923 MatNorm_MPIBAIJ, 1924 /*20*/ MatAssemblyBegin_MPIBAIJ, 1925 MatAssemblyEnd_MPIBAIJ, 1926 0, 1927 MatSetOption_MPIBAIJ, 1928 MatZeroEntries_MPIBAIJ, 1929 /*25*/ MatZeroRows_MPIBAIJ, 1930 0, 1931 0, 1932 0, 1933 0, 1934 /*30*/ MatSetUpPreallocation_MPIBAIJ, 1935 0, 1936 0, 1937 0, 1938 0, 1939 /*35*/ MatDuplicate_MPIBAIJ, 1940 0, 1941 0, 1942 0, 1943 0, 1944 /*40*/ 0, 1945 MatGetSubMatrices_MPIBAIJ, 1946 MatIncreaseOverlap_MPIBAIJ, 1947 MatGetValues_MPIBAIJ, 1948 0, 1949 /*45*/ MatPrintHelp_MPIBAIJ, 1950 MatScale_MPIBAIJ, 1951 0, 1952 0, 1953 0, 1954 /*50*/ 0, 1955 0, 1956 0, 1957 0, 1958 0, 1959 /*55*/ 0, 1960 0, 1961 MatSetUnfactored_MPIBAIJ, 1962 0, 1963 MatSetValuesBlocked_MPIBAIJ, 1964 /*60*/ 0, 1965 MatDestroy_MPIBAIJ, 1966 MatView_MPIBAIJ, 1967 MatGetPetscMaps_Petsc, 1968 0, 1969 /*65*/ 0, 1970 0, 1971 0, 1972 0, 1973 0, 1974 /*70*/ MatGetRowMax_MPIBAIJ, 1975 0, 1976 0, 1977 0, 1978 0, 1979 /*75*/ 0, 1980 0, 1981 0, 1982 0, 1983 0, 1984 /*80*/ 0, 1985 0, 1986 0, 1987 0, 1988 MatLoad_MPIBAIJ, 1989 /*85*/ 0, 1990 0, 1991 0, 1992 0, 1993 0, 1994 /*90*/ 0, 1995 0, 1996 0, 1997 0, 1998 0, 1999 /*95*/ 0, 2000 0, 2001 0, 2002 0}; 2003 2004 2005 EXTERN_C_BEGIN 2006 #undef __FUNCT__ 2007 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2008 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2009 { 2010 PetscFunctionBegin; 2011 *a = ((Mat_MPIBAIJ *)A->data)->A; 2012 *iscopy = PETSC_FALSE; 2013 PetscFunctionReturn(0); 2014 } 2015 EXTERN_C_END 2016 2017 EXTERN_C_BEGIN 2018 extern int MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,Mat*); 2019 EXTERN_C_END 2020 2021 #undef __FUNCT__ 2022 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2023 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2024 { 2025 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2026 PetscInt m = B->m/bs,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2027 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2028 const PetscInt *JJ; 2029 PetscScalar *values; 2030 PetscErrorCode ierr; 2031 2032 PetscFunctionBegin; 2033 #if defined(PETSC_OPT_g) 2034 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2035 #endif 2036 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2037 o_nnz = d_nnz + m; 2038 2039 for (i=0; i<m; i++) { 2040 nnz = I[i+1]- I[i]; 2041 JJ = J + I[i]; 2042 nnz_max = PetscMax(nnz_max,nnz); 2043 #if defined(PETSC_OPT_g) 2044 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2045 #endif 2046 for (j=0; j<nnz; j++) { 2047 if (*JJ >= cstart) break; 2048 JJ++; 2049 } 2050 d = 0; 2051 for (; j<nnz; j++) { 2052 if (*JJ++ >= cend) break; 2053 d++; 2054 } 2055 d_nnz[i] = d; 2056 o_nnz[i] = nnz - d; 2057 } 2058 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2059 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2060 2061 if (v) values = (PetscScalar*)v; 2062 else { 2063 ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2064 ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2065 } 2066 2067 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2068 for (i=0; i<m; i++) { 2069 ii = i + rstart; 2070 nnz = I[i+1]- I[i]; 2071 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr); 2072 } 2073 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2074 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2075 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2076 2077 if (!v) { 2078 ierr = PetscFree(values);CHKERRQ(ierr); 2079 } 2080 PetscFunctionReturn(0); 2081 } 2082 2083 #undef __FUNCT__ 2084 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2085 /*@C 2086 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2087 (the default parallel PETSc format). 2088 2089 Collective on MPI_Comm 2090 2091 Input Parameters: 2092 + A - the matrix 2093 . i - the indices into j for the start of each local row (starts with zero) 2094 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2095 - v - optional values in the matrix 2096 2097 Level: developer 2098 2099 .keywords: matrix, aij, compressed row, sparse, parallel 2100 2101 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2102 @*/ 2103 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2104 { 2105 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2106 2107 PetscFunctionBegin; 2108 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2109 if (f) { 2110 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2111 } 2112 PetscFunctionReturn(0); 2113 } 2114 2115 EXTERN_C_BEGIN 2116 #undef __FUNCT__ 2117 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2118 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2119 { 2120 Mat_MPIBAIJ *b; 2121 PetscErrorCode ierr; 2122 PetscInt i; 2123 2124 PetscFunctionBegin; 2125 B->preallocated = PETSC_TRUE; 2126 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2127 2128 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2129 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2130 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2131 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2132 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2133 if (d_nnz) { 2134 for (i=0; i<B->m/bs; i++) { 2135 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2136 } 2137 } 2138 if (o_nnz) { 2139 for (i=0; i<B->m/bs; i++) { 2140 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2141 } 2142 } 2143 2144 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 2145 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 2146 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 2147 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 2148 2149 b = (Mat_MPIBAIJ*)B->data; 2150 B->bs = bs; 2151 b->bs2 = bs*bs; 2152 b->mbs = B->m/bs; 2153 b->nbs = B->n/bs; 2154 b->Mbs = B->M/bs; 2155 b->Nbs = B->N/bs; 2156 2157 ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 2158 b->rowners[0] = 0; 2159 for (i=2; i<=b->size; i++) { 2160 b->rowners[i] += b->rowners[i-1]; 2161 } 2162 b->rstart = b->rowners[b->rank]; 2163 b->rend = b->rowners[b->rank+1]; 2164 2165 ierr = MPI_Allgather(&b->nbs,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 2166 b->cowners[0] = 0; 2167 for (i=2; i<=b->size; i++) { 2168 b->cowners[i] += b->cowners[i-1]; 2169 } 2170 b->cstart = b->cowners[b->rank]; 2171 b->cend = b->cowners[b->rank+1]; 2172 2173 for (i=0; i<=b->size; i++) { 2174 b->rowners_bs[i] = b->rowners[i]*bs; 2175 } 2176 b->rstart_bs = b->rstart*bs; 2177 b->rend_bs = b->rend*bs; 2178 b->cstart_bs = b->cstart*bs; 2179 b->cend_bs = b->cend*bs; 2180 2181 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 2182 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2183 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2184 PetscLogObjectParent(B,b->A); 2185 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 2186 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2187 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2188 PetscLogObjectParent(B,b->B); 2189 2190 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2191 2192 PetscFunctionReturn(0); 2193 } 2194 EXTERN_C_END 2195 2196 EXTERN_C_BEGIN 2197 EXTERN PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2198 EXTERN PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2199 EXTERN_C_END 2200 2201 /*MC 2202 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2203 2204 Options Database Keys: 2205 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2206 2207 Level: beginner 2208 2209 .seealso: MatCreateMPIBAIJ 2210 M*/ 2211 2212 EXTERN_C_BEGIN 2213 #undef __FUNCT__ 2214 #define __FUNCT__ "MatCreate_MPIBAIJ" 2215 PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2216 { 2217 Mat_MPIBAIJ *b; 2218 PetscErrorCode ierr; 2219 PetscTruth flg; 2220 2221 PetscFunctionBegin; 2222 ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2223 B->data = (void*)b; 2224 2225 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2226 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2227 B->mapping = 0; 2228 B->factor = 0; 2229 B->assembled = PETSC_FALSE; 2230 2231 B->insertmode = NOT_SET_VALUES; 2232 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2233 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2234 2235 /* build local table of row and column ownerships */ 2236 ierr = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 2237 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2238 b->cowners = b->rowners + b->size + 2; 2239 b->rowners_bs = b->cowners + b->size + 2; 2240 2241 /* build cache for off array entries formed */ 2242 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2243 b->donotstash = PETSC_FALSE; 2244 b->colmap = PETSC_NULL; 2245 b->garray = PETSC_NULL; 2246 b->roworiented = PETSC_TRUE; 2247 2248 #if defined(PETSC_USE_MAT_SINGLE) 2249 /* stuff for MatSetValues_XXX in single precision */ 2250 b->setvalueslen = 0; 2251 b->setvaluescopy = PETSC_NULL; 2252 #endif 2253 2254 /* stuff used in block assembly */ 2255 b->barray = 0; 2256 2257 /* stuff used for matrix vector multiply */ 2258 b->lvec = 0; 2259 b->Mvctx = 0; 2260 2261 /* stuff for MatGetRow() */ 2262 b->rowindices = 0; 2263 b->rowvalues = 0; 2264 b->getrowactive = PETSC_FALSE; 2265 2266 /* hash table stuff */ 2267 b->ht = 0; 2268 b->hd = 0; 2269 b->ht_size = 0; 2270 b->ht_flag = PETSC_FALSE; 2271 b->ht_fact = 0; 2272 b->ht_total_ct = 0; 2273 b->ht_insert_ct = 0; 2274 2275 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2276 if (flg) { 2277 PetscReal fact = 1.39; 2278 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2279 ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2280 if (fact <= 1.0) fact = 1.39; 2281 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2282 PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2283 } 2284 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2285 "MatStoreValues_MPIBAIJ", 2286 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2287 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2288 "MatRetrieveValues_MPIBAIJ", 2289 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2290 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2291 "MatGetDiagonalBlock_MPIBAIJ", 2292 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2293 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2294 "MatMPIBAIJSetPreallocation_MPIBAIJ", 2295 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2296 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 2297 "MatMPIBAIJSetPreallocationCSR_MPIAIJ", 2298 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 2299 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 2300 "MatDiagonalScaleLocal_MPIBAIJ", 2301 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2302 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 2303 "MatSetHashTableFactor_MPIBAIJ", 2304 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2305 PetscFunctionReturn(0); 2306 } 2307 EXTERN_C_END 2308 2309 /*MC 2310 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2311 2312 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2313 and MATMPIBAIJ otherwise. 2314 2315 Options Database Keys: 2316 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2317 2318 Level: beginner 2319 2320 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2321 M*/ 2322 2323 EXTERN_C_BEGIN 2324 #undef __FUNCT__ 2325 #define __FUNCT__ "MatCreate_BAIJ" 2326 PetscErrorCode MatCreate_BAIJ(Mat A) 2327 { 2328 PetscErrorCode ierr; 2329 PetscMPIInt size; 2330 2331 PetscFunctionBegin; 2332 ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr); 2333 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2334 if (size == 1) { 2335 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2336 } else { 2337 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2338 } 2339 PetscFunctionReturn(0); 2340 } 2341 EXTERN_C_END 2342 2343 #undef __FUNCT__ 2344 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2345 /*@C 2346 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2347 (block compressed row). For good matrix assembly performance 2348 the user should preallocate the matrix storage by setting the parameters 2349 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2350 performance can be increased by more than a factor of 50. 2351 2352 Collective on Mat 2353 2354 Input Parameters: 2355 + A - the matrix 2356 . bs - size of blockk 2357 . d_nz - number of block nonzeros per block row in diagonal portion of local 2358 submatrix (same for all local rows) 2359 . d_nnz - array containing the number of block nonzeros in the various block rows 2360 of the in diagonal portion of the local (possibly different for each block 2361 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2362 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2363 submatrix (same for all local rows). 2364 - o_nnz - array containing the number of nonzeros in the various block rows of the 2365 off-diagonal portion of the local submatrix (possibly different for 2366 each block row) or PETSC_NULL. 2367 2368 Output Parameter: 2369 2370 2371 Options Database Keys: 2372 . -mat_no_unroll - uses code that does not unroll the loops in the 2373 block calculations (much slower) 2374 . -mat_block_size - size of the blocks to use 2375 2376 Notes: 2377 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2378 than it must be used on all processors that share the object for that argument. 2379 2380 Storage Information: 2381 For a square global matrix we define each processor's diagonal portion 2382 to be its local rows and the corresponding columns (a square submatrix); 2383 each processor's off-diagonal portion encompasses the remainder of the 2384 local matrix (a rectangular submatrix). 2385 2386 The user can specify preallocated storage for the diagonal part of 2387 the local submatrix with either d_nz or d_nnz (not both). Set 2388 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2389 memory allocation. Likewise, specify preallocated storage for the 2390 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2391 2392 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2393 the figure below we depict these three local rows and all columns (0-11). 2394 2395 .vb 2396 0 1 2 3 4 5 6 7 8 9 10 11 2397 ------------------- 2398 row 3 | o o o d d d o o o o o o 2399 row 4 | o o o d d d o o o o o o 2400 row 5 | o o o d d d o o o o o o 2401 ------------------- 2402 .ve 2403 2404 Thus, any entries in the d locations are stored in the d (diagonal) 2405 submatrix, and any entries in the o locations are stored in the 2406 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2407 stored simply in the MATSEQBAIJ format for compressed row storage. 2408 2409 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2410 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2411 In general, for PDE problems in which most nonzeros are near the diagonal, 2412 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2413 or you will get TERRIBLE performance; see the users' manual chapter on 2414 matrices. 2415 2416 Level: intermediate 2417 2418 .keywords: matrix, block, aij, compressed row, sparse, parallel 2419 2420 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 2421 @*/ 2422 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2423 { 2424 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2425 2426 PetscFunctionBegin; 2427 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2428 if (f) { 2429 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2430 } 2431 PetscFunctionReturn(0); 2432 } 2433 2434 #undef __FUNCT__ 2435 #define __FUNCT__ "MatCreateMPIBAIJ" 2436 /*@C 2437 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2438 (block compressed row). For good matrix assembly performance 2439 the user should preallocate the matrix storage by setting the parameters 2440 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2441 performance can be increased by more than a factor of 50. 2442 2443 Collective on MPI_Comm 2444 2445 Input Parameters: 2446 + comm - MPI communicator 2447 . bs - size of blockk 2448 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2449 This value should be the same as the local size used in creating the 2450 y vector for the matrix-vector product y = Ax. 2451 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2452 This value should be the same as the local size used in creating the 2453 x vector for the matrix-vector product y = Ax. 2454 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2455 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2456 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2457 submatrix (same for all local rows) 2458 . d_nnz - array containing the number of nonzero blocks in the various block rows 2459 of the in diagonal portion of the local (possibly different for each block 2460 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2461 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2462 submatrix (same for all local rows). 2463 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2464 off-diagonal portion of the local submatrix (possibly different for 2465 each block row) or PETSC_NULL. 2466 2467 Output Parameter: 2468 . A - the matrix 2469 2470 Options Database Keys: 2471 . -mat_no_unroll - uses code that does not unroll the loops in the 2472 block calculations (much slower) 2473 . -mat_block_size - size of the blocks to use 2474 2475 Notes: 2476 A nonzero block is any block that as 1 or more nonzeros in it 2477 2478 The user MUST specify either the local or global matrix dimensions 2479 (possibly both). 2480 2481 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2482 than it must be used on all processors that share the object for that argument. 2483 2484 Storage Information: 2485 For a square global matrix we define each processor's diagonal portion 2486 to be its local rows and the corresponding columns (a square submatrix); 2487 each processor's off-diagonal portion encompasses the remainder of the 2488 local matrix (a rectangular submatrix). 2489 2490 The user can specify preallocated storage for the diagonal part of 2491 the local submatrix with either d_nz or d_nnz (not both). Set 2492 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2493 memory allocation. Likewise, specify preallocated storage for the 2494 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2495 2496 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2497 the figure below we depict these three local rows and all columns (0-11). 2498 2499 .vb 2500 0 1 2 3 4 5 6 7 8 9 10 11 2501 ------------------- 2502 row 3 | o o o d d d o o o o o o 2503 row 4 | o o o d d d o o o o o o 2504 row 5 | o o o d d d o o o o o o 2505 ------------------- 2506 .ve 2507 2508 Thus, any entries in the d locations are stored in the d (diagonal) 2509 submatrix, and any entries in the o locations are stored in the 2510 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2511 stored simply in the MATSEQBAIJ format for compressed row storage. 2512 2513 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2514 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2515 In general, for PDE problems in which most nonzeros are near the diagonal, 2516 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2517 or you will get TERRIBLE performance; see the users' manual chapter on 2518 matrices. 2519 2520 Level: intermediate 2521 2522 .keywords: matrix, block, aij, compressed row, sparse, parallel 2523 2524 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2525 @*/ 2526 PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2527 { 2528 PetscErrorCode ierr; 2529 PetscMPIInt size; 2530 2531 PetscFunctionBegin; 2532 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2533 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2534 if (size > 1) { 2535 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2536 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2537 } else { 2538 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2539 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2540 } 2541 PetscFunctionReturn(0); 2542 } 2543 2544 #undef __FUNCT__ 2545 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 2546 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2547 { 2548 Mat mat; 2549 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2550 PetscErrorCode ierr; 2551 PetscInt len=0; 2552 2553 PetscFunctionBegin; 2554 *newmat = 0; 2555 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2556 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2557 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2558 2559 mat->factor = matin->factor; 2560 mat->preallocated = PETSC_TRUE; 2561 mat->assembled = PETSC_TRUE; 2562 mat->insertmode = NOT_SET_VALUES; 2563 2564 a = (Mat_MPIBAIJ*)mat->data; 2565 mat->bs = matin->bs; 2566 a->bs2 = oldmat->bs2; 2567 a->mbs = oldmat->mbs; 2568 a->nbs = oldmat->nbs; 2569 a->Mbs = oldmat->Mbs; 2570 a->Nbs = oldmat->Nbs; 2571 2572 a->rstart = oldmat->rstart; 2573 a->rend = oldmat->rend; 2574 a->cstart = oldmat->cstart; 2575 a->cend = oldmat->cend; 2576 a->size = oldmat->size; 2577 a->rank = oldmat->rank; 2578 a->donotstash = oldmat->donotstash; 2579 a->roworiented = oldmat->roworiented; 2580 a->rowindices = 0; 2581 a->rowvalues = 0; 2582 a->getrowactive = PETSC_FALSE; 2583 a->barray = 0; 2584 a->rstart_bs = oldmat->rstart_bs; 2585 a->rend_bs = oldmat->rend_bs; 2586 a->cstart_bs = oldmat->cstart_bs; 2587 a->cend_bs = oldmat->cend_bs; 2588 2589 /* hash table stuff */ 2590 a->ht = 0; 2591 a->hd = 0; 2592 a->ht_size = 0; 2593 a->ht_flag = oldmat->ht_flag; 2594 a->ht_fact = oldmat->ht_fact; 2595 a->ht_total_ct = 0; 2596 a->ht_insert_ct = 0; 2597 2598 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2599 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2600 ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr); 2601 if (oldmat->colmap) { 2602 #if defined (PETSC_USE_CTABLE) 2603 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2604 #else 2605 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2606 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt)); 2607 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2608 #endif 2609 } else a->colmap = 0; 2610 2611 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2612 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2613 PetscLogObjectMemory(mat,len*sizeof(PetscInt)); 2614 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2615 } else a->garray = 0; 2616 2617 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2618 PetscLogObjectParent(mat,a->lvec); 2619 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2620 PetscLogObjectParent(mat,a->Mvctx); 2621 2622 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2623 PetscLogObjectParent(mat,a->A); 2624 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2625 PetscLogObjectParent(mat,a->B); 2626 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2627 *newmat = mat; 2628 2629 PetscFunctionReturn(0); 2630 } 2631 2632 #include "petscsys.h" 2633 2634 #undef __FUNCT__ 2635 #define __FUNCT__ "MatLoad_MPIBAIJ" 2636 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 2637 { 2638 Mat A; 2639 PetscErrorCode ierr; 2640 int fd; 2641 PetscInt i,nz,j,rstart,rend; 2642 PetscScalar *vals,*buf; 2643 MPI_Comm comm = ((PetscObject)viewer)->comm; 2644 MPI_Status status; 2645 PetscMPIInt rank,size,maxnz; 2646 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 2647 PetscInt *locrowlens,*procsnz = 0,*browners; 2648 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows; 2649 PetscMPIInt tag = ((PetscObject)viewer)->tag; 2650 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2651 PetscInt dcount,kmax,k,nzcount,tmp,mend; 2652 2653 PetscFunctionBegin; 2654 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2655 2656 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2657 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2658 if (!rank) { 2659 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2660 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2661 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2662 } 2663 2664 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2665 M = header[1]; N = header[2]; 2666 2667 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2668 2669 /* 2670 This code adds extra rows to make sure the number of rows is 2671 divisible by the blocksize 2672 */ 2673 Mbs = M/bs; 2674 extra_rows = bs - M + bs*Mbs; 2675 if (extra_rows == bs) extra_rows = 0; 2676 else Mbs++; 2677 if (extra_rows && !rank) { 2678 PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2679 } 2680 2681 /* determine ownership of all rows */ 2682 mbs = Mbs/size + ((Mbs % size) > rank); 2683 m = mbs*bs; 2684 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 2685 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2686 rowners[0] = 0; 2687 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2688 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2689 rstart = rowners[rank]; 2690 rend = rowners[rank+1]; 2691 2692 /* distribute row lengths to all processors */ 2693 ierr = PetscMalloc(m*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 2694 if (!rank) { 2695 mend = m; 2696 if (size == 1) mend = mend - extra_rows; 2697 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 2698 for (j=mend; j<m; j++) locrowlens[j] = 1; 2699 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2700 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2701 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2702 for (j=0; j<m; j++) { 2703 procsnz[0] += locrowlens[j]; 2704 } 2705 for (i=1; i<size; i++) { 2706 mend = browners[i+1] - browners[i]; 2707 if (i == size-1) mend = mend - extra_rows; 2708 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 2709 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 2710 /* calculate the number of nonzeros on each processor */ 2711 for (j=0; j<browners[i+1]-browners[i]; j++) { 2712 procsnz[i] += rowlengths[j]; 2713 } 2714 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2715 } 2716 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2717 } else { 2718 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2719 } 2720 2721 if (!rank) { 2722 /* determine max buffer needed and allocate it */ 2723 maxnz = 0; 2724 for (i=0; i<size; i++) { 2725 maxnz = PetscMax(maxnz,procsnz[i]); 2726 } 2727 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2728 2729 /* read in my part of the matrix column indices */ 2730 nz = procsnz[0]; 2731 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2732 mycols = ibuf; 2733 if (size == 1) nz -= extra_rows; 2734 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2735 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2736 2737 /* read in every ones (except the last) and ship off */ 2738 for (i=1; i<size-1; i++) { 2739 nz = procsnz[i]; 2740 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2741 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2742 } 2743 /* read in the stuff for the last proc */ 2744 if (size != 1) { 2745 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2746 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2747 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2748 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2749 } 2750 ierr = PetscFree(cols);CHKERRQ(ierr); 2751 } else { 2752 /* determine buffer space needed for message */ 2753 nz = 0; 2754 for (i=0; i<m; i++) { 2755 nz += locrowlens[i]; 2756 } 2757 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2758 mycols = ibuf; 2759 /* receive message of column indices*/ 2760 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2761 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2762 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2763 } 2764 2765 /* loop over local rows, determining number of off diagonal entries */ 2766 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2767 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2768 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2769 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2770 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2771 rowcount = 0; nzcount = 0; 2772 for (i=0; i<mbs; i++) { 2773 dcount = 0; 2774 odcount = 0; 2775 for (j=0; j<bs; j++) { 2776 kmax = locrowlens[rowcount]; 2777 for (k=0; k<kmax; k++) { 2778 tmp = mycols[nzcount++]/bs; 2779 if (!mask[tmp]) { 2780 mask[tmp] = 1; 2781 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2782 else masked1[dcount++] = tmp; 2783 } 2784 } 2785 rowcount++; 2786 } 2787 2788 dlens[i] = dcount; 2789 odlens[i] = odcount; 2790 2791 /* zero out the mask elements we set */ 2792 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2793 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2794 } 2795 2796 /* create our matrix */ 2797 ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr); 2798 ierr = MatSetType(A,type);CHKERRQ(ierr) 2799 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2800 2801 /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */ 2802 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2803 2804 if (!rank) { 2805 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2806 /* read in my part of the matrix numerical values */ 2807 nz = procsnz[0]; 2808 vals = buf; 2809 mycols = ibuf; 2810 if (size == 1) nz -= extra_rows; 2811 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2812 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2813 2814 /* insert into matrix */ 2815 jj = rstart*bs; 2816 for (i=0; i<m; i++) { 2817 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2818 mycols += locrowlens[i]; 2819 vals += locrowlens[i]; 2820 jj++; 2821 } 2822 /* read in other processors (except the last one) and ship out */ 2823 for (i=1; i<size-1; i++) { 2824 nz = procsnz[i]; 2825 vals = buf; 2826 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2827 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2828 } 2829 /* the last proc */ 2830 if (size != 1){ 2831 nz = procsnz[i] - extra_rows; 2832 vals = buf; 2833 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2834 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2835 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2836 } 2837 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2838 } else { 2839 /* receive numeric values */ 2840 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2841 2842 /* receive message of values*/ 2843 vals = buf; 2844 mycols = ibuf; 2845 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2846 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2847 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2848 2849 /* insert into matrix */ 2850 jj = rstart*bs; 2851 for (i=0; i<m; i++) { 2852 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2853 mycols += locrowlens[i]; 2854 vals += locrowlens[i]; 2855 jj++; 2856 } 2857 } 2858 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2859 ierr = PetscFree(buf);CHKERRQ(ierr); 2860 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2861 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2862 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2863 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2864 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2865 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2866 2867 *newmat = A; 2868 PetscFunctionReturn(0); 2869 } 2870 2871 #undef __FUNCT__ 2872 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2873 /*@ 2874 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2875 2876 Input Parameters: 2877 . mat - the matrix 2878 . fact - factor 2879 2880 Collective on Mat 2881 2882 Level: advanced 2883 2884 Notes: 2885 This can also be set by the command line option: -mat_use_hash_table fact 2886 2887 .keywords: matrix, hashtable, factor, HT 2888 2889 .seealso: MatSetOption() 2890 @*/ 2891 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2892 { 2893 PetscErrorCode ierr,(*f)(Mat,PetscReal); 2894 2895 PetscFunctionBegin; 2896 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 2897 if (f) { 2898 ierr = (*f)(mat,fact);CHKERRQ(ierr); 2899 } 2900 PetscFunctionReturn(0); 2901 } 2902 2903 #undef __FUNCT__ 2904 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 2905 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 2906 { 2907 Mat_MPIBAIJ *baij; 2908 2909 PetscFunctionBegin; 2910 baij = (Mat_MPIBAIJ*)mat->data; 2911 baij->ht_fact = fact; 2912 PetscFunctionReturn(0); 2913 } 2914 2915 #undef __FUNCT__ 2916 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2917 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2918 { 2919 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2920 PetscFunctionBegin; 2921 *Ad = a->A; 2922 *Ao = a->B; 2923 *colmap = a->garray; 2924 PetscFunctionReturn(0); 2925 } 2926