1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat); 15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*); 16 17 /* UGLY, ugly, ugly 18 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 19 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 20 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 21 converts the entries into single precision and then calls ..._MatScalar() to put them 22 into the single precision data structures. 23 */ 24 #if defined(PETSC_USE_MAT_SINGLE) 25 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 26 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 27 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 28 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 29 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 30 #else 31 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 32 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 33 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 34 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 35 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 36 #endif 37 38 #undef __FUNCT__ 39 #define __FUNCT__ "MatGetRowMax_MPIBAIJ" 40 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v) 41 { 42 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 43 PetscErrorCode ierr; 44 PetscInt i; 45 PetscScalar *va,*vb; 46 Vec vtmp; 47 48 PetscFunctionBegin; 49 50 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 51 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 52 53 ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr); 54 ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 55 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 56 57 for (i=0; i<A->m; i++){ 58 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 59 } 60 61 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 62 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 63 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 64 65 PetscFunctionReturn(0); 66 } 67 68 EXTERN_C_BEGIN 69 #undef __FUNCT__ 70 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 71 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 72 { 73 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 74 PetscErrorCode ierr; 75 76 PetscFunctionBegin; 77 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 78 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 79 PetscFunctionReturn(0); 80 } 81 EXTERN_C_END 82 83 EXTERN_C_BEGIN 84 #undef __FUNCT__ 85 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 86 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 87 { 88 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 89 PetscErrorCode ierr; 90 91 PetscFunctionBegin; 92 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 93 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 94 PetscFunctionReturn(0); 95 } 96 EXTERN_C_END 97 98 /* 99 Local utility routine that creates a mapping from the global column 100 number to the local number in the off-diagonal part of the local 101 storage of the matrix. This is done in a non scable way since the 102 length of colmap equals the global matrix length. 103 */ 104 #undef __FUNCT__ 105 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 106 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 107 { 108 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 109 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 110 PetscErrorCode ierr; 111 PetscInt nbs = B->nbs,i,bs=mat->bs; 112 113 PetscFunctionBegin; 114 #if defined (PETSC_USE_CTABLE) 115 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 116 for (i=0; i<nbs; i++){ 117 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 118 } 119 #else 120 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 121 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 122 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 123 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 124 #endif 125 PetscFunctionReturn(0); 126 } 127 128 #define CHUNKSIZE 10 129 130 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 131 { \ 132 \ 133 brow = row/bs; \ 134 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 135 rmax = aimax[brow]; nrow = ailen[brow]; \ 136 bcol = col/bs; \ 137 ridx = row % bs; cidx = col % bs; \ 138 low = 0; high = nrow; \ 139 while (high-low > 3) { \ 140 t = (low+high)/2; \ 141 if (rp[t] > bcol) high = t; \ 142 else low = t; \ 143 } \ 144 for (_i=low; _i<high; _i++) { \ 145 if (rp[_i] > bcol) break; \ 146 if (rp[_i] == bcol) { \ 147 bap = ap + bs2*_i + bs*cidx + ridx; \ 148 if (addv == ADD_VALUES) *bap += value; \ 149 else *bap = value; \ 150 goto a_noinsert; \ 151 } \ 152 } \ 153 if (a->nonew == 1) goto a_noinsert; \ 154 else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 155 if (nrow >= rmax) { \ 156 /* there is no extra room in row, therefore enlarge */ \ 157 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 158 MatScalar *new_a; \ 159 \ 160 if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 161 \ 162 /* malloc new storage space */ \ 163 ierr = PetscMalloc3(bs2*new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,a->mbs+1,PetscInt,&new_i);CHKERRQ(ierr);\ 164 \ 165 /* copy over old data into new slots */ \ 166 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 167 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 168 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \ 169 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 170 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \ 171 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 172 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 173 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 174 /* free up old matrix storage */ \ 175 ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,a->a,a->j,a->i);CHKERRQ(ierr);\ 176 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 177 a->singlemalloc = PETSC_TRUE; \ 178 \ 179 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 180 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 181 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \ 182 a->maxnz += bs2*CHUNKSIZE; \ 183 a->reallocs++; \ 184 a->nz++; \ 185 } \ 186 N = nrow++ - 1; \ 187 /* shift up all the later entries in this row */ \ 188 for (ii=N; ii>=_i; ii--) { \ 189 rp[ii+1] = rp[ii]; \ 190 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 191 } \ 192 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 193 rp[_i] = bcol; \ 194 ap[bs2*_i + bs*cidx + ridx] = value; \ 195 a_noinsert:; \ 196 ailen[brow] = nrow; \ 197 } 198 199 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 200 { \ 201 brow = row/bs; \ 202 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 203 rmax = bimax[brow]; nrow = bilen[brow]; \ 204 bcol = col/bs; \ 205 ridx = row % bs; cidx = col % bs; \ 206 low = 0; high = nrow; \ 207 while (high-low > 3) { \ 208 t = (low+high)/2; \ 209 if (rp[t] > bcol) high = t; \ 210 else low = t; \ 211 } \ 212 for (_i=low; _i<high; _i++) { \ 213 if (rp[_i] > bcol) break; \ 214 if (rp[_i] == bcol) { \ 215 bap = ap + bs2*_i + bs*cidx + ridx; \ 216 if (addv == ADD_VALUES) *bap += value; \ 217 else *bap = value; \ 218 goto b_noinsert; \ 219 } \ 220 } \ 221 if (b->nonew == 1) goto b_noinsert; \ 222 else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 223 if (nrow >= rmax) { \ 224 /* there is no extra room in row, therefore enlarge */ \ 225 PetscInt new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 226 MatScalar *new_a; \ 227 \ 228 if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 229 \ 230 /* malloc new storage space */ \ 231 ierr = PetscMalloc3(bs2*new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,b->mbs+1,PetscInt,&new_i);CHKERRQ(ierr);\ 232 \ 233 /* copy over old data into new slots */ \ 234 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 235 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 236 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \ 237 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 238 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \ 239 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 240 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 241 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 242 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 243 /* free up old matrix storage */ \ 244 ierr = MatSeqXAIJFreeAIJ(b->singlemalloc,b->a,b->j,b->i);CHKERRQ(ierr);\ 245 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 246 b->singlemalloc = PETSC_TRUE; \ 247 \ 248 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 249 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 250 ierr = PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \ 251 b->maxnz += bs2*CHUNKSIZE; \ 252 b->reallocs++; \ 253 b->nz++; \ 254 } \ 255 N = nrow++ - 1; \ 256 /* shift up all the later entries in this row */ \ 257 for (ii=N; ii>=_i; ii--) { \ 258 rp[ii+1] = rp[ii]; \ 259 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 260 } \ 261 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 262 rp[_i] = bcol; \ 263 ap[bs2*_i + bs*cidx + ridx] = value; \ 264 b_noinsert:; \ 265 bilen[brow] = nrow; \ 266 } 267 268 #if defined(PETSC_USE_MAT_SINGLE) 269 #undef __FUNCT__ 270 #define __FUNCT__ "MatSetValues_MPIBAIJ" 271 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 272 { 273 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 274 PetscErrorCode ierr; 275 PetscInt i,N = m*n; 276 MatScalar *vsingle; 277 278 PetscFunctionBegin; 279 if (N > b->setvalueslen) { 280 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 281 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 282 b->setvalueslen = N; 283 } 284 vsingle = b->setvaluescopy; 285 286 for (i=0; i<N; i++) { 287 vsingle[i] = v[i]; 288 } 289 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 290 PetscFunctionReturn(0); 291 } 292 293 #undef __FUNCT__ 294 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 295 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 296 { 297 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 298 PetscErrorCode ierr; 299 PetscInt i,N = m*n*b->bs2; 300 MatScalar *vsingle; 301 302 PetscFunctionBegin; 303 if (N > b->setvalueslen) { 304 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 305 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 306 b->setvalueslen = N; 307 } 308 vsingle = b->setvaluescopy; 309 for (i=0; i<N; i++) { 310 vsingle[i] = v[i]; 311 } 312 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 313 PetscFunctionReturn(0); 314 } 315 316 #undef __FUNCT__ 317 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 318 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 319 { 320 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 321 PetscErrorCode ierr; 322 PetscInt i,N = m*n; 323 MatScalar *vsingle; 324 325 PetscFunctionBegin; 326 if (N > b->setvalueslen) { 327 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 328 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 329 b->setvalueslen = N; 330 } 331 vsingle = b->setvaluescopy; 332 for (i=0; i<N; i++) { 333 vsingle[i] = v[i]; 334 } 335 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 336 PetscFunctionReturn(0); 337 } 338 339 #undef __FUNCT__ 340 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 341 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 342 { 343 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 344 PetscErrorCode ierr; 345 PetscInt i,N = m*n*b->bs2; 346 MatScalar *vsingle; 347 348 PetscFunctionBegin; 349 if (N > b->setvalueslen) { 350 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 351 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 352 b->setvalueslen = N; 353 } 354 vsingle = b->setvaluescopy; 355 for (i=0; i<N; i++) { 356 vsingle[i] = v[i]; 357 } 358 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 359 PetscFunctionReturn(0); 360 } 361 #endif 362 363 #undef __FUNCT__ 364 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 365 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 366 { 367 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 368 MatScalar value; 369 PetscTruth roworiented = baij->roworiented; 370 PetscErrorCode ierr; 371 PetscInt i,j,row,col; 372 PetscInt rstart_orig=baij->rstart_bs; 373 PetscInt rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 374 PetscInt cend_orig=baij->cend_bs,bs=mat->bs; 375 376 /* Some Variables required in the macro */ 377 Mat A = baij->A; 378 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 379 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 380 MatScalar *aa=a->a; 381 382 Mat B = baij->B; 383 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 384 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 385 MatScalar *ba=b->a; 386 387 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 388 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 389 MatScalar *ap,*bap; 390 391 PetscFunctionBegin; 392 for (i=0; i<m; i++) { 393 if (im[i] < 0) continue; 394 #if defined(PETSC_USE_DEBUG) 395 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 396 #endif 397 if (im[i] >= rstart_orig && im[i] < rend_orig) { 398 row = im[i] - rstart_orig; 399 for (j=0; j<n; j++) { 400 if (in[j] >= cstart_orig && in[j] < cend_orig){ 401 col = in[j] - cstart_orig; 402 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 403 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 404 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 405 } else if (in[j] < 0) continue; 406 #if defined(PETSC_USE_DEBUG) 407 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->N-1);} 408 #endif 409 else { 410 if (mat->was_assembled) { 411 if (!baij->colmap) { 412 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 413 } 414 #if defined (PETSC_USE_CTABLE) 415 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 416 col = col - 1; 417 #else 418 col = baij->colmap[in[j]/bs] - 1; 419 #endif 420 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 421 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 422 col = in[j]; 423 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 424 B = baij->B; 425 b = (Mat_SeqBAIJ*)(B)->data; 426 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 427 ba=b->a; 428 } else col += in[j]%bs; 429 } else col = in[j]; 430 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 431 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 432 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 433 } 434 } 435 } else { 436 if (!baij->donotstash) { 437 if (roworiented) { 438 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 439 } else { 440 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 441 } 442 } 443 } 444 } 445 PetscFunctionReturn(0); 446 } 447 448 #undef __FUNCT__ 449 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 450 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 451 { 452 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 453 const MatScalar *value; 454 MatScalar *barray=baij->barray; 455 PetscTruth roworiented = baij->roworiented; 456 PetscErrorCode ierr; 457 PetscInt i,j,ii,jj,row,col,rstart=baij->rstart; 458 PetscInt rend=baij->rend,cstart=baij->cstart,stepval; 459 PetscInt cend=baij->cend,bs=mat->bs,bs2=baij->bs2; 460 461 PetscFunctionBegin; 462 if(!barray) { 463 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 464 baij->barray = barray; 465 } 466 467 if (roworiented) { 468 stepval = (n-1)*bs; 469 } else { 470 stepval = (m-1)*bs; 471 } 472 for (i=0; i<m; i++) { 473 if (im[i] < 0) continue; 474 #if defined(PETSC_USE_DEBUG) 475 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 476 #endif 477 if (im[i] >= rstart && im[i] < rend) { 478 row = im[i] - rstart; 479 for (j=0; j<n; j++) { 480 /* If NumCol = 1 then a copy is not required */ 481 if ((roworiented) && (n == 1)) { 482 barray = (MatScalar*)v + i*bs2; 483 } else if((!roworiented) && (m == 1)) { 484 barray = (MatScalar*)v + j*bs2; 485 } else { /* Here a copy is required */ 486 if (roworiented) { 487 value = v + i*(stepval+bs)*bs + j*bs; 488 } else { 489 value = v + j*(stepval+bs)*bs + i*bs; 490 } 491 for (ii=0; ii<bs; ii++,value+=stepval) { 492 for (jj=0; jj<bs; jj++) { 493 *barray++ = *value++; 494 } 495 } 496 barray -=bs2; 497 } 498 499 if (in[j] >= cstart && in[j] < cend){ 500 col = in[j] - cstart; 501 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 502 } 503 else if (in[j] < 0) continue; 504 #if defined(PETSC_USE_DEBUG) 505 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 506 #endif 507 else { 508 if (mat->was_assembled) { 509 if (!baij->colmap) { 510 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 511 } 512 513 #if defined(PETSC_USE_DEBUG) 514 #if defined (PETSC_USE_CTABLE) 515 { PetscInt data; 516 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 517 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 518 } 519 #else 520 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 521 #endif 522 #endif 523 #if defined (PETSC_USE_CTABLE) 524 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 525 col = (col - 1)/bs; 526 #else 527 col = (baij->colmap[in[j]] - 1)/bs; 528 #endif 529 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 530 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 531 col = in[j]; 532 } 533 } 534 else col = in[j]; 535 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 536 } 537 } 538 } else { 539 if (!baij->donotstash) { 540 if (roworiented) { 541 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 542 } else { 543 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 544 } 545 } 546 } 547 } 548 PetscFunctionReturn(0); 549 } 550 551 #define HASH_KEY 0.6180339887 552 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 553 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 554 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 555 #undef __FUNCT__ 556 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 557 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 558 { 559 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 560 PetscTruth roworiented = baij->roworiented; 561 PetscErrorCode ierr; 562 PetscInt i,j,row,col; 563 PetscInt rstart_orig=baij->rstart_bs; 564 PetscInt rend_orig=baij->rend_bs,Nbs=baij->Nbs; 565 PetscInt h1,key,size=baij->ht_size,bs=mat->bs,*HT=baij->ht,idx; 566 PetscReal tmp; 567 MatScalar **HD = baij->hd,value; 568 #if defined(PETSC_USE_DEBUG) 569 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 570 #endif 571 572 PetscFunctionBegin; 573 574 for (i=0; i<m; i++) { 575 #if defined(PETSC_USE_DEBUG) 576 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 577 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 578 #endif 579 row = im[i]; 580 if (row >= rstart_orig && row < rend_orig) { 581 for (j=0; j<n; j++) { 582 col = in[j]; 583 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 584 /* Look up PetscInto the Hash Table */ 585 key = (row/bs)*Nbs+(col/bs)+1; 586 h1 = HASH(size,key,tmp); 587 588 589 idx = h1; 590 #if defined(PETSC_USE_DEBUG) 591 insert_ct++; 592 total_ct++; 593 if (HT[idx] != key) { 594 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 595 if (idx == size) { 596 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 597 if (idx == h1) { 598 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 599 } 600 } 601 } 602 #else 603 if (HT[idx] != key) { 604 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 605 if (idx == size) { 606 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 607 if (idx == h1) { 608 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 609 } 610 } 611 } 612 #endif 613 /* A HASH table entry is found, so insert the values at the correct address */ 614 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 615 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 616 } 617 } else { 618 if (!baij->donotstash) { 619 if (roworiented) { 620 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 621 } else { 622 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 623 } 624 } 625 } 626 } 627 #if defined(PETSC_USE_DEBUG) 628 baij->ht_total_ct = total_ct; 629 baij->ht_insert_ct = insert_ct; 630 #endif 631 PetscFunctionReturn(0); 632 } 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 636 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 637 { 638 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 639 PetscTruth roworiented = baij->roworiented; 640 PetscErrorCode ierr; 641 PetscInt i,j,ii,jj,row,col; 642 PetscInt rstart=baij->rstart ; 643 PetscInt rend=baij->rend,stepval,bs=mat->bs,bs2=baij->bs2; 644 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 645 PetscReal tmp; 646 MatScalar **HD = baij->hd,*baij_a; 647 const MatScalar *v_t,*value; 648 #if defined(PETSC_USE_DEBUG) 649 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 650 #endif 651 652 PetscFunctionBegin; 653 654 if (roworiented) { 655 stepval = (n-1)*bs; 656 } else { 657 stepval = (m-1)*bs; 658 } 659 for (i=0; i<m; i++) { 660 #if defined(PETSC_USE_DEBUG) 661 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 662 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 663 #endif 664 row = im[i]; 665 v_t = v + i*bs2; 666 if (row >= rstart && row < rend) { 667 for (j=0; j<n; j++) { 668 col = in[j]; 669 670 /* Look up into the Hash Table */ 671 key = row*Nbs+col+1; 672 h1 = HASH(size,key,tmp); 673 674 idx = h1; 675 #if defined(PETSC_USE_DEBUG) 676 total_ct++; 677 insert_ct++; 678 if (HT[idx] != key) { 679 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 680 if (idx == size) { 681 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 682 if (idx == h1) { 683 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 684 } 685 } 686 } 687 #else 688 if (HT[idx] != key) { 689 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 690 if (idx == size) { 691 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 692 if (idx == h1) { 693 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 694 } 695 } 696 } 697 #endif 698 baij_a = HD[idx]; 699 if (roworiented) { 700 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 701 /* value = v + (i*(stepval+bs)+j)*bs; */ 702 value = v_t; 703 v_t += bs; 704 if (addv == ADD_VALUES) { 705 for (ii=0; ii<bs; ii++,value+=stepval) { 706 for (jj=ii; jj<bs2; jj+=bs) { 707 baij_a[jj] += *value++; 708 } 709 } 710 } else { 711 for (ii=0; ii<bs; ii++,value+=stepval) { 712 for (jj=ii; jj<bs2; jj+=bs) { 713 baij_a[jj] = *value++; 714 } 715 } 716 } 717 } else { 718 value = v + j*(stepval+bs)*bs + i*bs; 719 if (addv == ADD_VALUES) { 720 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 721 for (jj=0; jj<bs; jj++) { 722 baij_a[jj] += *value++; 723 } 724 } 725 } else { 726 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 727 for (jj=0; jj<bs; jj++) { 728 baij_a[jj] = *value++; 729 } 730 } 731 } 732 } 733 } 734 } else { 735 if (!baij->donotstash) { 736 if (roworiented) { 737 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 738 } else { 739 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 740 } 741 } 742 } 743 } 744 #if defined(PETSC_USE_DEBUG) 745 baij->ht_total_ct = total_ct; 746 baij->ht_insert_ct = insert_ct; 747 #endif 748 PetscFunctionReturn(0); 749 } 750 751 #undef __FUNCT__ 752 #define __FUNCT__ "MatGetValues_MPIBAIJ" 753 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 754 { 755 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 756 PetscErrorCode ierr; 757 PetscInt bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 758 PetscInt bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 759 760 PetscFunctionBegin; 761 for (i=0; i<m; i++) { 762 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 763 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 764 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 765 row = idxm[i] - bsrstart; 766 for (j=0; j<n; j++) { 767 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 768 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 769 if (idxn[j] >= bscstart && idxn[j] < bscend){ 770 col = idxn[j] - bscstart; 771 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 772 } else { 773 if (!baij->colmap) { 774 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 775 } 776 #if defined (PETSC_USE_CTABLE) 777 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 778 data --; 779 #else 780 data = baij->colmap[idxn[j]/bs]-1; 781 #endif 782 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 783 else { 784 col = data + idxn[j]%bs; 785 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 786 } 787 } 788 } 789 } else { 790 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 791 } 792 } 793 PetscFunctionReturn(0); 794 } 795 796 #undef __FUNCT__ 797 #define __FUNCT__ "MatNorm_MPIBAIJ" 798 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 799 { 800 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 801 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 802 PetscErrorCode ierr; 803 PetscInt i,j,bs2=baij->bs2,bs=baij->A->bs,nz,row,col; 804 PetscReal sum = 0.0; 805 MatScalar *v; 806 807 PetscFunctionBegin; 808 if (baij->size == 1) { 809 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 810 } else { 811 if (type == NORM_FROBENIUS) { 812 v = amat->a; 813 nz = amat->nz*bs2; 814 for (i=0; i<nz; i++) { 815 #if defined(PETSC_USE_COMPLEX) 816 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 817 #else 818 sum += (*v)*(*v); v++; 819 #endif 820 } 821 v = bmat->a; 822 nz = bmat->nz*bs2; 823 for (i=0; i<nz; i++) { 824 #if defined(PETSC_USE_COMPLEX) 825 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 826 #else 827 sum += (*v)*(*v); v++; 828 #endif 829 } 830 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 831 *nrm = sqrt(*nrm); 832 } else if (type == NORM_1) { /* max column sum */ 833 PetscReal *tmp,*tmp2; 834 PetscInt *jj,*garray=baij->garray,cstart=baij->cstart; 835 ierr = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 836 tmp2 = tmp + mat->N; 837 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 838 v = amat->a; jj = amat->j; 839 for (i=0; i<amat->nz; i++) { 840 for (j=0; j<bs; j++){ 841 col = bs*(cstart + *jj) + j; /* column index */ 842 for (row=0; row<bs; row++){ 843 tmp[col] += PetscAbsScalar(*v); v++; 844 } 845 } 846 jj++; 847 } 848 v = bmat->a; jj = bmat->j; 849 for (i=0; i<bmat->nz; i++) { 850 for (j=0; j<bs; j++){ 851 col = bs*garray[*jj] + j; 852 for (row=0; row<bs; row++){ 853 tmp[col] += PetscAbsScalar(*v); v++; 854 } 855 } 856 jj++; 857 } 858 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 859 *nrm = 0.0; 860 for (j=0; j<mat->N; j++) { 861 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 862 } 863 ierr = PetscFree(tmp);CHKERRQ(ierr); 864 } else if (type == NORM_INFINITY) { /* max row sum */ 865 PetscReal *sums; 866 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr) 867 sum = 0.0; 868 for (j=0; j<amat->mbs; j++) { 869 for (row=0; row<bs; row++) sums[row] = 0.0; 870 v = amat->a + bs2*amat->i[j]; 871 nz = amat->i[j+1]-amat->i[j]; 872 for (i=0; i<nz; i++) { 873 for (col=0; col<bs; col++){ 874 for (row=0; row<bs; row++){ 875 sums[row] += PetscAbsScalar(*v); v++; 876 } 877 } 878 } 879 v = bmat->a + bs2*bmat->i[j]; 880 nz = bmat->i[j+1]-bmat->i[j]; 881 for (i=0; i<nz; i++) { 882 for (col=0; col<bs; col++){ 883 for (row=0; row<bs; row++){ 884 sums[row] += PetscAbsScalar(*v); v++; 885 } 886 } 887 } 888 for (row=0; row<bs; row++){ 889 if (sums[row] > sum) sum = sums[row]; 890 } 891 } 892 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 893 ierr = PetscFree(sums);CHKERRQ(ierr); 894 } else { 895 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 896 } 897 } 898 PetscFunctionReturn(0); 899 } 900 901 /* 902 Creates the hash table, and sets the table 903 This table is created only once. 904 If new entried need to be added to the matrix 905 then the hash table has to be destroyed and 906 recreated. 907 */ 908 #undef __FUNCT__ 909 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 910 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 911 { 912 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 913 Mat A = baij->A,B=baij->B; 914 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 915 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 916 PetscErrorCode ierr; 917 PetscInt size,bs2=baij->bs2,rstart=baij->rstart; 918 PetscInt cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 919 PetscInt *HT,key; 920 MatScalar **HD; 921 PetscReal tmp; 922 #if defined(PETSC_USE_DEBUG) 923 PetscInt ct=0,max=0; 924 #endif 925 926 PetscFunctionBegin; 927 baij->ht_size=(PetscInt)(factor*nz); 928 size = baij->ht_size; 929 930 if (baij->ht) { 931 PetscFunctionReturn(0); 932 } 933 934 /* Allocate Memory for Hash Table */ 935 ierr = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 936 baij->ht = (PetscInt*)(baij->hd + size); 937 HD = baij->hd; 938 HT = baij->ht; 939 940 941 ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr); 942 943 944 /* Loop Over A */ 945 for (i=0; i<a->mbs; i++) { 946 for (j=ai[i]; j<ai[i+1]; j++) { 947 row = i+rstart; 948 col = aj[j]+cstart; 949 950 key = row*Nbs + col + 1; 951 h1 = HASH(size,key,tmp); 952 for (k=0; k<size; k++){ 953 if (!HT[(h1+k)%size]) { 954 HT[(h1+k)%size] = key; 955 HD[(h1+k)%size] = a->a + j*bs2; 956 break; 957 #if defined(PETSC_USE_DEBUG) 958 } else { 959 ct++; 960 #endif 961 } 962 } 963 #if defined(PETSC_USE_DEBUG) 964 if (k> max) max = k; 965 #endif 966 } 967 } 968 /* Loop Over B */ 969 for (i=0; i<b->mbs; i++) { 970 for (j=bi[i]; j<bi[i+1]; j++) { 971 row = i+rstart; 972 col = garray[bj[j]]; 973 key = row*Nbs + col + 1; 974 h1 = HASH(size,key,tmp); 975 for (k=0; k<size; k++){ 976 if (!HT[(h1+k)%size]) { 977 HT[(h1+k)%size] = key; 978 HD[(h1+k)%size] = b->a + j*bs2; 979 break; 980 #if defined(PETSC_USE_DEBUG) 981 } else { 982 ct++; 983 #endif 984 } 985 } 986 #if defined(PETSC_USE_DEBUG) 987 if (k> max) max = k; 988 #endif 989 } 990 } 991 992 /* Print Summary */ 993 #if defined(PETSC_USE_DEBUG) 994 for (i=0,j=0; i<size; i++) { 995 if (HT[i]) {j++;} 996 } 997 ierr = PetscLogInfo((0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max));CHKERRQ(ierr); 998 #endif 999 PetscFunctionReturn(0); 1000 } 1001 1002 #undef __FUNCT__ 1003 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 1004 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 1005 { 1006 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1007 PetscErrorCode ierr; 1008 PetscInt nstash,reallocs; 1009 InsertMode addv; 1010 1011 PetscFunctionBegin; 1012 if (baij->donotstash) { 1013 PetscFunctionReturn(0); 1014 } 1015 1016 /* make sure all processors are either in INSERTMODE or ADDMODE */ 1017 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 1018 if (addv == (ADD_VALUES|INSERT_VALUES)) { 1019 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 1020 } 1021 mat->insertmode = addv; /* in case this processor had no cache */ 1022 1023 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 1024 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 1025 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 1026 ierr = PetscLogInfo((0,"MatAssemblyBegin_MPIBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 1027 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 1028 ierr = PetscLogInfo((0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 1029 PetscFunctionReturn(0); 1030 } 1031 1032 #undef __FUNCT__ 1033 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 1034 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 1035 { 1036 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 1037 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 1038 PetscErrorCode ierr; 1039 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 1040 PetscInt *row,*col,other_disassembled; 1041 PetscTruth r1,r2,r3; 1042 MatScalar *val; 1043 InsertMode addv = mat->insertmode; 1044 PetscMPIInt n; 1045 1046 PetscFunctionBegin; 1047 if (!baij->donotstash) { 1048 while (1) { 1049 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1050 if (!flg) break; 1051 1052 for (i=0; i<n;) { 1053 /* Now identify the consecutive vals belonging to the same row */ 1054 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1055 if (j < n) ncols = j-i; 1056 else ncols = n-i; 1057 /* Now assemble all these values with a single function call */ 1058 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 1059 i = j; 1060 } 1061 } 1062 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1063 /* Now process the block-stash. Since the values are stashed column-oriented, 1064 set the roworiented flag to column oriented, and after MatSetValues() 1065 restore the original flags */ 1066 r1 = baij->roworiented; 1067 r2 = a->roworiented; 1068 r3 = b->roworiented; 1069 baij->roworiented = PETSC_FALSE; 1070 a->roworiented = PETSC_FALSE; 1071 b->roworiented = PETSC_FALSE; 1072 while (1) { 1073 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1074 if (!flg) break; 1075 1076 for (i=0; i<n;) { 1077 /* Now identify the consecutive vals belonging to the same row */ 1078 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1079 if (j < n) ncols = j-i; 1080 else ncols = n-i; 1081 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1082 i = j; 1083 } 1084 } 1085 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1086 baij->roworiented = r1; 1087 a->roworiented = r2; 1088 b->roworiented = r3; 1089 } 1090 1091 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1092 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1093 1094 /* determine if any processor has disassembled, if so we must 1095 also disassemble ourselfs, in order that we may reassemble. */ 1096 /* 1097 if nonzero structure of submatrix B cannot change then we know that 1098 no processor disassembled thus we can skip this stuff 1099 */ 1100 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1101 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1102 if (mat->was_assembled && !other_disassembled) { 1103 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1104 } 1105 } 1106 1107 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1108 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1109 } 1110 b->compressedrow.use = PETSC_TRUE; 1111 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1112 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1113 1114 #if defined(PETSC_USE_DEBUG) 1115 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1116 ierr = PetscLogInfo((0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct));CHKERRQ(ierr); 1117 baij->ht_total_ct = 0; 1118 baij->ht_insert_ct = 0; 1119 } 1120 #endif 1121 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1122 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1123 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1124 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1125 } 1126 1127 if (baij->rowvalues) { 1128 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1129 baij->rowvalues = 0; 1130 } 1131 PetscFunctionReturn(0); 1132 } 1133 1134 #undef __FUNCT__ 1135 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1136 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1137 { 1138 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1139 PetscErrorCode ierr; 1140 PetscMPIInt size = baij->size,rank = baij->rank; 1141 PetscInt bs = mat->bs; 1142 PetscTruth iascii,isdraw; 1143 PetscViewer sviewer; 1144 PetscViewerFormat format; 1145 1146 PetscFunctionBegin; 1147 /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */ 1148 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1149 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1150 if (iascii) { 1151 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1152 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1153 MatInfo info; 1154 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1155 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1156 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 1157 rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 1158 mat->bs,(PetscInt)info.memory);CHKERRQ(ierr); 1159 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1160 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1161 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1162 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1163 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1164 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1165 PetscFunctionReturn(0); 1166 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1167 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1168 PetscFunctionReturn(0); 1169 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1170 PetscFunctionReturn(0); 1171 } 1172 } 1173 1174 if (isdraw) { 1175 PetscDraw draw; 1176 PetscTruth isnull; 1177 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1178 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1179 } 1180 1181 if (size == 1) { 1182 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 1183 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1184 } else { 1185 /* assemble the entire matrix onto first processor. */ 1186 Mat A; 1187 Mat_SeqBAIJ *Aloc; 1188 PetscInt M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1189 MatScalar *a; 1190 1191 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1192 /* Perhaps this should be the type of mat? */ 1193 if (!rank) { 1194 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 1195 } else { 1196 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 1197 } 1198 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1199 ierr = MatMPIBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1200 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1201 1202 /* copy over the A part */ 1203 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1204 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1205 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1206 1207 for (i=0; i<mbs; i++) { 1208 rvals[0] = bs*(baij->rstart + i); 1209 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1210 for (j=ai[i]; j<ai[i+1]; j++) { 1211 col = (baij->cstart+aj[j])*bs; 1212 for (k=0; k<bs; k++) { 1213 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1214 col++; a += bs; 1215 } 1216 } 1217 } 1218 /* copy over the B part */ 1219 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1220 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1221 for (i=0; i<mbs; i++) { 1222 rvals[0] = bs*(baij->rstart + i); 1223 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1224 for (j=ai[i]; j<ai[i+1]; j++) { 1225 col = baij->garray[aj[j]]*bs; 1226 for (k=0; k<bs; k++) { 1227 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1228 col++; a += bs; 1229 } 1230 } 1231 } 1232 ierr = PetscFree(rvals);CHKERRQ(ierr); 1233 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1234 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1235 /* 1236 Everyone has to call to draw the matrix since the graphics waits are 1237 synchronized across all processors that share the PetscDraw object 1238 */ 1239 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1240 if (!rank) { 1241 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1242 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1243 } 1244 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1245 ierr = MatDestroy(A);CHKERRQ(ierr); 1246 } 1247 PetscFunctionReturn(0); 1248 } 1249 1250 #undef __FUNCT__ 1251 #define __FUNCT__ "MatView_MPIBAIJ" 1252 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1253 { 1254 PetscErrorCode ierr; 1255 PetscTruth iascii,isdraw,issocket,isbinary; 1256 1257 PetscFunctionBegin; 1258 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1259 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1260 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1261 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1262 if (iascii || isdraw || issocket || isbinary) { 1263 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1264 } else { 1265 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1266 } 1267 PetscFunctionReturn(0); 1268 } 1269 1270 #undef __FUNCT__ 1271 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1272 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1273 { 1274 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1275 PetscErrorCode ierr; 1276 1277 PetscFunctionBegin; 1278 #if defined(PETSC_USE_LOG) 1279 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N); 1280 #endif 1281 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1282 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1283 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1284 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1285 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1286 #if defined (PETSC_USE_CTABLE) 1287 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1288 #else 1289 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1290 #endif 1291 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1292 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1293 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1294 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1295 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1296 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1297 #if defined(PETSC_USE_MAT_SINGLE) 1298 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 1299 #endif 1300 ierr = PetscFree(baij);CHKERRQ(ierr); 1301 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1307 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1308 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1309 PetscFunctionReturn(0); 1310 } 1311 1312 #undef __FUNCT__ 1313 #define __FUNCT__ "MatMult_MPIBAIJ" 1314 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1315 { 1316 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1317 PetscErrorCode ierr; 1318 PetscInt nt; 1319 1320 PetscFunctionBegin; 1321 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1322 if (nt != A->n) { 1323 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1324 } 1325 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1326 if (nt != A->m) { 1327 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1328 } 1329 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1330 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1331 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1332 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1333 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1334 PetscFunctionReturn(0); 1335 } 1336 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1339 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1340 { 1341 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1342 PetscErrorCode ierr; 1343 1344 PetscFunctionBegin; 1345 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1346 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1347 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1348 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1349 PetscFunctionReturn(0); 1350 } 1351 1352 #undef __FUNCT__ 1353 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1354 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1355 { 1356 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1357 PetscErrorCode ierr; 1358 PetscTruth merged; 1359 1360 PetscFunctionBegin; 1361 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1362 /* do nondiagonal part */ 1363 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1364 if (!merged) { 1365 /* send it on its way */ 1366 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1367 /* do local part */ 1368 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1369 /* receive remote parts: note this assumes the values are not actually */ 1370 /* inserted in yy until the next line */ 1371 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1372 } else { 1373 /* do local part */ 1374 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1375 /* send it on its way */ 1376 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1377 /* values actually were received in the Begin() but we need to call this nop */ 1378 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1379 } 1380 PetscFunctionReturn(0); 1381 } 1382 1383 #undef __FUNCT__ 1384 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1385 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1386 { 1387 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1388 PetscErrorCode ierr; 1389 1390 PetscFunctionBegin; 1391 /* do nondiagonal part */ 1392 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1393 /* send it on its way */ 1394 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1395 /* do local part */ 1396 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1397 /* receive remote parts: note this assumes the values are not actually */ 1398 /* inserted in yy until the next line, which is true for my implementation*/ 1399 /* but is not perhaps always true. */ 1400 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1401 PetscFunctionReturn(0); 1402 } 1403 1404 /* 1405 This only works correctly for square matrices where the subblock A->A is the 1406 diagonal block 1407 */ 1408 #undef __FUNCT__ 1409 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1410 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1411 { 1412 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1413 PetscErrorCode ierr; 1414 1415 PetscFunctionBegin; 1416 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1417 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1418 PetscFunctionReturn(0); 1419 } 1420 1421 #undef __FUNCT__ 1422 #define __FUNCT__ "MatScale_MPIBAIJ" 1423 PetscErrorCode MatScale_MPIBAIJ(const PetscScalar *aa,Mat A) 1424 { 1425 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1426 PetscErrorCode ierr; 1427 1428 PetscFunctionBegin; 1429 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1430 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1431 PetscFunctionReturn(0); 1432 } 1433 1434 #undef __FUNCT__ 1435 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1436 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1437 { 1438 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1439 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1440 PetscErrorCode ierr; 1441 PetscInt bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1442 PetscInt nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1443 PetscInt *cmap,*idx_p,cstart = mat->cstart; 1444 1445 PetscFunctionBegin; 1446 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1447 mat->getrowactive = PETSC_TRUE; 1448 1449 if (!mat->rowvalues && (idx || v)) { 1450 /* 1451 allocate enough space to hold information from the longest row. 1452 */ 1453 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1454 PetscInt max = 1,mbs = mat->mbs,tmp; 1455 for (i=0; i<mbs; i++) { 1456 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1457 if (max < tmp) { max = tmp; } 1458 } 1459 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1460 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1461 } 1462 1463 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1464 lrow = row - brstart; 1465 1466 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1467 if (!v) {pvA = 0; pvB = 0;} 1468 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1469 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1470 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1471 nztot = nzA + nzB; 1472 1473 cmap = mat->garray; 1474 if (v || idx) { 1475 if (nztot) { 1476 /* Sort by increasing column numbers, assuming A and B already sorted */ 1477 PetscInt imark = -1; 1478 if (v) { 1479 *v = v_p = mat->rowvalues; 1480 for (i=0; i<nzB; i++) { 1481 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1482 else break; 1483 } 1484 imark = i; 1485 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1486 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1487 } 1488 if (idx) { 1489 *idx = idx_p = mat->rowindices; 1490 if (imark > -1) { 1491 for (i=0; i<imark; i++) { 1492 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1493 } 1494 } else { 1495 for (i=0; i<nzB; i++) { 1496 if (cmap[cworkB[i]/bs] < cstart) 1497 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1498 else break; 1499 } 1500 imark = i; 1501 } 1502 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1503 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1504 } 1505 } else { 1506 if (idx) *idx = 0; 1507 if (v) *v = 0; 1508 } 1509 } 1510 *nz = nztot; 1511 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1512 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1513 PetscFunctionReturn(0); 1514 } 1515 1516 #undef __FUNCT__ 1517 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1518 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1519 { 1520 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1521 1522 PetscFunctionBegin; 1523 if (!baij->getrowactive) { 1524 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1525 } 1526 baij->getrowactive = PETSC_FALSE; 1527 PetscFunctionReturn(0); 1528 } 1529 1530 #undef __FUNCT__ 1531 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1532 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1533 { 1534 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1535 PetscErrorCode ierr; 1536 1537 PetscFunctionBegin; 1538 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1539 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1540 PetscFunctionReturn(0); 1541 } 1542 1543 #undef __FUNCT__ 1544 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1545 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1546 { 1547 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1548 Mat A = a->A,B = a->B; 1549 PetscErrorCode ierr; 1550 PetscReal isend[5],irecv[5]; 1551 1552 PetscFunctionBegin; 1553 info->block_size = (PetscReal)matin->bs; 1554 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1555 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1556 isend[3] = info->memory; isend[4] = info->mallocs; 1557 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1558 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1559 isend[3] += info->memory; isend[4] += info->mallocs; 1560 if (flag == MAT_LOCAL) { 1561 info->nz_used = isend[0]; 1562 info->nz_allocated = isend[1]; 1563 info->nz_unneeded = isend[2]; 1564 info->memory = isend[3]; 1565 info->mallocs = isend[4]; 1566 } else if (flag == MAT_GLOBAL_MAX) { 1567 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1568 info->nz_used = irecv[0]; 1569 info->nz_allocated = irecv[1]; 1570 info->nz_unneeded = irecv[2]; 1571 info->memory = irecv[3]; 1572 info->mallocs = irecv[4]; 1573 } else if (flag == MAT_GLOBAL_SUM) { 1574 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1575 info->nz_used = irecv[0]; 1576 info->nz_allocated = irecv[1]; 1577 info->nz_unneeded = irecv[2]; 1578 info->memory = irecv[3]; 1579 info->mallocs = irecv[4]; 1580 } else { 1581 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1582 } 1583 info->rows_global = (PetscReal)A->M; 1584 info->columns_global = (PetscReal)A->N; 1585 info->rows_local = (PetscReal)A->m; 1586 info->columns_local = (PetscReal)A->N; 1587 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1588 info->fill_ratio_needed = 0; 1589 info->factor_mallocs = 0; 1590 PetscFunctionReturn(0); 1591 } 1592 1593 #undef __FUNCT__ 1594 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1595 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op) 1596 { 1597 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1598 PetscErrorCode ierr; 1599 1600 PetscFunctionBegin; 1601 switch (op) { 1602 case MAT_NO_NEW_NONZERO_LOCATIONS: 1603 case MAT_YES_NEW_NONZERO_LOCATIONS: 1604 case MAT_COLUMNS_UNSORTED: 1605 case MAT_COLUMNS_SORTED: 1606 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1607 case MAT_KEEP_ZEROED_ROWS: 1608 case MAT_NEW_NONZERO_LOCATION_ERR: 1609 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1610 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1611 break; 1612 case MAT_ROW_ORIENTED: 1613 a->roworiented = PETSC_TRUE; 1614 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1615 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1616 break; 1617 case MAT_ROWS_SORTED: 1618 case MAT_ROWS_UNSORTED: 1619 case MAT_YES_NEW_DIAGONALS: 1620 ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr); 1621 break; 1622 case MAT_COLUMN_ORIENTED: 1623 a->roworiented = PETSC_FALSE; 1624 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1625 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1626 break; 1627 case MAT_IGNORE_OFF_PROC_ENTRIES: 1628 a->donotstash = PETSC_TRUE; 1629 break; 1630 case MAT_NO_NEW_DIAGONALS: 1631 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1632 case MAT_USE_HASH_TABLE: 1633 a->ht_flag = PETSC_TRUE; 1634 break; 1635 case MAT_SYMMETRIC: 1636 case MAT_STRUCTURALLY_SYMMETRIC: 1637 case MAT_HERMITIAN: 1638 case MAT_SYMMETRY_ETERNAL: 1639 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1640 break; 1641 case MAT_NOT_SYMMETRIC: 1642 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1643 case MAT_NOT_HERMITIAN: 1644 case MAT_NOT_SYMMETRY_ETERNAL: 1645 break; 1646 default: 1647 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1648 } 1649 PetscFunctionReturn(0); 1650 } 1651 1652 #undef __FUNCT__ 1653 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1654 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1655 { 1656 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1657 Mat_SeqBAIJ *Aloc; 1658 Mat B; 1659 PetscErrorCode ierr; 1660 PetscInt M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col; 1661 PetscInt bs=A->bs,mbs=baij->mbs; 1662 MatScalar *a; 1663 1664 PetscFunctionBegin; 1665 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1666 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1667 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1668 ierr = MatMPIBAIJSetPreallocation(B,A->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1669 1670 /* copy over the A part */ 1671 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1672 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1673 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1674 1675 for (i=0; i<mbs; i++) { 1676 rvals[0] = bs*(baij->rstart + i); 1677 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1678 for (j=ai[i]; j<ai[i+1]; j++) { 1679 col = (baij->cstart+aj[j])*bs; 1680 for (k=0; k<bs; k++) { 1681 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1682 col++; a += bs; 1683 } 1684 } 1685 } 1686 /* copy over the B part */ 1687 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1688 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1689 for (i=0; i<mbs; i++) { 1690 rvals[0] = bs*(baij->rstart + i); 1691 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1692 for (j=ai[i]; j<ai[i+1]; j++) { 1693 col = baij->garray[aj[j]]*bs; 1694 for (k=0; k<bs; k++) { 1695 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1696 col++; a += bs; 1697 } 1698 } 1699 } 1700 ierr = PetscFree(rvals);CHKERRQ(ierr); 1701 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1702 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1703 1704 if (matout) { 1705 *matout = B; 1706 } else { 1707 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1708 } 1709 PetscFunctionReturn(0); 1710 } 1711 1712 #undef __FUNCT__ 1713 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1714 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1715 { 1716 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1717 Mat a = baij->A,b = baij->B; 1718 PetscErrorCode ierr; 1719 PetscInt s1,s2,s3; 1720 1721 PetscFunctionBegin; 1722 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1723 if (rr) { 1724 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1725 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1726 /* Overlap communication with computation. */ 1727 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1728 } 1729 if (ll) { 1730 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1731 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1732 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1733 } 1734 /* scale the diagonal block */ 1735 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1736 1737 if (rr) { 1738 /* Do a scatter end and then right scale the off-diagonal block */ 1739 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1740 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1741 } 1742 1743 PetscFunctionReturn(0); 1744 } 1745 1746 #undef __FUNCT__ 1747 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1748 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag) 1749 { 1750 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1751 PetscErrorCode ierr; 1752 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1753 PetscInt i,N,*rows,*owners = l->rowners; 1754 PetscInt *nprocs,j,idx,nsends,row; 1755 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1756 PetscInt *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1; 1757 PetscInt *lens,*lrows,*values,bs=A->bs,rstart_bs=l->rstart_bs; 1758 MPI_Comm comm = A->comm; 1759 MPI_Request *send_waits,*recv_waits; 1760 MPI_Status recv_status,*send_status; 1761 IS istmp; 1762 #if defined(PETSC_DEBUG) 1763 PetscTruth found = PETSC_FALSE; 1764 #endif 1765 1766 PetscFunctionBegin; 1767 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 1768 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1769 1770 /* first count number of contributors to each processor */ 1771 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1772 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1773 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1774 j = 0; 1775 for (i=0; i<N; i++) { 1776 if (lastidx > (idx = rows[i])) j = 0; 1777 lastidx = idx; 1778 for (; j<size; j++) { 1779 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1780 nprocs[2*j]++; 1781 nprocs[2*j+1] = 1; 1782 owner[i] = j; 1783 #if defined(PETSC_DEBUG) 1784 found = PETSC_TRUE; 1785 #endif 1786 break; 1787 } 1788 } 1789 #if defined(PETSC_DEBUG) 1790 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1791 found = PETSC_FALSE; 1792 #endif 1793 } 1794 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1795 1796 /* inform other processors of number of messages and max length*/ 1797 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1798 1799 /* post receives: */ 1800 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1801 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1802 for (i=0; i<nrecvs; i++) { 1803 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1804 } 1805 1806 /* do sends: 1807 1) starts[i] gives the starting index in svalues for stuff going to 1808 the ith processor 1809 */ 1810 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1811 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1812 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1813 starts[0] = 0; 1814 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1815 for (i=0; i<N; i++) { 1816 svalues[starts[owner[i]]++] = rows[i]; 1817 } 1818 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1819 1820 starts[0] = 0; 1821 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1822 count = 0; 1823 for (i=0; i<size; i++) { 1824 if (nprocs[2*i+1]) { 1825 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1826 } 1827 } 1828 ierr = PetscFree(starts);CHKERRQ(ierr); 1829 1830 base = owners[rank]*bs; 1831 1832 /* wait on receives */ 1833 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 1834 source = lens + nrecvs; 1835 count = nrecvs; slen = 0; 1836 while (count) { 1837 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1838 /* unpack receives into our local space */ 1839 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1840 source[imdex] = recv_status.MPI_SOURCE; 1841 lens[imdex] = n; 1842 slen += n; 1843 count--; 1844 } 1845 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1846 1847 /* move the data into the send scatter */ 1848 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1849 count = 0; 1850 for (i=0; i<nrecvs; i++) { 1851 values = rvalues + i*nmax; 1852 for (j=0; j<lens[i]; j++) { 1853 lrows[count++] = values[j] - base; 1854 } 1855 } 1856 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1857 ierr = PetscFree(lens);CHKERRQ(ierr); 1858 ierr = PetscFree(owner);CHKERRQ(ierr); 1859 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1860 1861 /* actually zap the local rows */ 1862 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1863 ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr); 1864 1865 /* 1866 Zero the required rows. If the "diagonal block" of the matrix 1867 is square and the user wishes to set the diagonal we use seperate 1868 code so that MatSetValues() is not called for each diagonal allocating 1869 new memory, thus calling lots of mallocs and slowing things down. 1870 1871 Contributed by: Mathew Knepley 1872 */ 1873 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1874 ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1875 if (diag && (l->A->M == l->A->N)) { 1876 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1877 } else if (diag) { 1878 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1879 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1880 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1881 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1882 } 1883 for (i=0; i<slen; i++) { 1884 row = lrows[i] + rstart_bs; 1885 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1886 } 1887 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1888 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1889 } else { 1890 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1891 } 1892 1893 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1894 ierr = PetscFree(lrows);CHKERRQ(ierr); 1895 1896 /* wait on sends */ 1897 if (nsends) { 1898 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1899 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1900 ierr = PetscFree(send_status);CHKERRQ(ierr); 1901 } 1902 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1903 ierr = PetscFree(svalues);CHKERRQ(ierr); 1904 1905 PetscFunctionReturn(0); 1906 } 1907 1908 #undef __FUNCT__ 1909 #define __FUNCT__ "MatPrintHelp_MPIBAIJ" 1910 PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A) 1911 { 1912 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1913 MPI_Comm comm = A->comm; 1914 static PetscTruth called = PETSC_FALSE; 1915 PetscErrorCode ierr; 1916 1917 PetscFunctionBegin; 1918 if (!a->rank) { 1919 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1920 } 1921 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1922 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1923 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1924 PetscFunctionReturn(0); 1925 } 1926 1927 #undef __FUNCT__ 1928 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1929 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1930 { 1931 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1932 PetscErrorCode ierr; 1933 1934 PetscFunctionBegin; 1935 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1936 PetscFunctionReturn(0); 1937 } 1938 1939 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1940 1941 #undef __FUNCT__ 1942 #define __FUNCT__ "MatEqual_MPIBAIJ" 1943 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1944 { 1945 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1946 Mat a,b,c,d; 1947 PetscTruth flg; 1948 PetscErrorCode ierr; 1949 1950 PetscFunctionBegin; 1951 a = matA->A; b = matA->B; 1952 c = matB->A; d = matB->B; 1953 1954 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1955 if (flg) { 1956 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1957 } 1958 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1959 PetscFunctionReturn(0); 1960 } 1961 1962 1963 #undef __FUNCT__ 1964 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1965 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1966 { 1967 PetscErrorCode ierr; 1968 1969 PetscFunctionBegin; 1970 ierr = MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1971 PetscFunctionReturn(0); 1972 } 1973 1974 /* -------------------------------------------------------------------*/ 1975 static struct _MatOps MatOps_Values = { 1976 MatSetValues_MPIBAIJ, 1977 MatGetRow_MPIBAIJ, 1978 MatRestoreRow_MPIBAIJ, 1979 MatMult_MPIBAIJ, 1980 /* 4*/ MatMultAdd_MPIBAIJ, 1981 MatMultTranspose_MPIBAIJ, 1982 MatMultTransposeAdd_MPIBAIJ, 1983 0, 1984 0, 1985 0, 1986 /*10*/ 0, 1987 0, 1988 0, 1989 0, 1990 MatTranspose_MPIBAIJ, 1991 /*15*/ MatGetInfo_MPIBAIJ, 1992 MatEqual_MPIBAIJ, 1993 MatGetDiagonal_MPIBAIJ, 1994 MatDiagonalScale_MPIBAIJ, 1995 MatNorm_MPIBAIJ, 1996 /*20*/ MatAssemblyBegin_MPIBAIJ, 1997 MatAssemblyEnd_MPIBAIJ, 1998 0, 1999 MatSetOption_MPIBAIJ, 2000 MatZeroEntries_MPIBAIJ, 2001 /*25*/ MatZeroRows_MPIBAIJ, 2002 0, 2003 0, 2004 0, 2005 0, 2006 /*30*/ MatSetUpPreallocation_MPIBAIJ, 2007 0, 2008 0, 2009 0, 2010 0, 2011 /*35*/ MatDuplicate_MPIBAIJ, 2012 0, 2013 0, 2014 0, 2015 0, 2016 /*40*/ 0, 2017 MatGetSubMatrices_MPIBAIJ, 2018 MatIncreaseOverlap_MPIBAIJ, 2019 MatGetValues_MPIBAIJ, 2020 0, 2021 /*45*/ MatPrintHelp_MPIBAIJ, 2022 MatScale_MPIBAIJ, 2023 0, 2024 0, 2025 0, 2026 /*50*/ 0, 2027 0, 2028 0, 2029 0, 2030 0, 2031 /*55*/ 0, 2032 0, 2033 MatSetUnfactored_MPIBAIJ, 2034 0, 2035 MatSetValuesBlocked_MPIBAIJ, 2036 /*60*/ 0, 2037 MatDestroy_MPIBAIJ, 2038 MatView_MPIBAIJ, 2039 MatGetPetscMaps_Petsc, 2040 0, 2041 /*65*/ 0, 2042 0, 2043 0, 2044 0, 2045 0, 2046 /*70*/ MatGetRowMax_MPIBAIJ, 2047 0, 2048 0, 2049 0, 2050 0, 2051 /*75*/ 0, 2052 0, 2053 0, 2054 0, 2055 0, 2056 /*80*/ 0, 2057 0, 2058 0, 2059 0, 2060 MatLoad_MPIBAIJ, 2061 /*85*/ 0, 2062 0, 2063 0, 2064 0, 2065 0, 2066 /*90*/ 0, 2067 0, 2068 0, 2069 0, 2070 0, 2071 /*95*/ 0, 2072 0, 2073 0, 2074 0}; 2075 2076 2077 EXTERN_C_BEGIN 2078 #undef __FUNCT__ 2079 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2080 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2081 { 2082 PetscFunctionBegin; 2083 *a = ((Mat_MPIBAIJ *)A->data)->A; 2084 *iscopy = PETSC_FALSE; 2085 PetscFunctionReturn(0); 2086 } 2087 EXTERN_C_END 2088 2089 EXTERN_C_BEGIN 2090 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 2091 EXTERN_C_END 2092 2093 #undef __FUNCT__ 2094 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2095 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2096 { 2097 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2098 PetscInt m = B->m/bs,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2099 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2100 const PetscInt *JJ; 2101 PetscScalar *values; 2102 PetscErrorCode ierr; 2103 2104 PetscFunctionBegin; 2105 #if defined(PETSC_OPT_g) 2106 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2107 #endif 2108 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2109 o_nnz = d_nnz + m; 2110 2111 for (i=0; i<m; i++) { 2112 nnz = I[i+1]- I[i]; 2113 JJ = J + I[i]; 2114 nnz_max = PetscMax(nnz_max,nnz); 2115 #if defined(PETSC_OPT_g) 2116 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2117 #endif 2118 for (j=0; j<nnz; j++) { 2119 if (*JJ >= cstart) break; 2120 JJ++; 2121 } 2122 d = 0; 2123 for (; j<nnz; j++) { 2124 if (*JJ++ >= cend) break; 2125 d++; 2126 } 2127 d_nnz[i] = d; 2128 o_nnz[i] = nnz - d; 2129 } 2130 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2131 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2132 2133 if (v) values = (PetscScalar*)v; 2134 else { 2135 ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2136 ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2137 } 2138 2139 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2140 for (i=0; i<m; i++) { 2141 ii = i + rstart; 2142 nnz = I[i+1]- I[i]; 2143 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr); 2144 } 2145 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2146 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2147 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2148 2149 if (!v) { 2150 ierr = PetscFree(values);CHKERRQ(ierr); 2151 } 2152 PetscFunctionReturn(0); 2153 } 2154 2155 #undef __FUNCT__ 2156 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2157 /*@C 2158 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2159 (the default parallel PETSc format). 2160 2161 Collective on MPI_Comm 2162 2163 Input Parameters: 2164 + A - the matrix 2165 . i - the indices into j for the start of each local row (starts with zero) 2166 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2167 - v - optional values in the matrix 2168 2169 Level: developer 2170 2171 .keywords: matrix, aij, compressed row, sparse, parallel 2172 2173 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2174 @*/ 2175 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2176 { 2177 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2178 2179 PetscFunctionBegin; 2180 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2181 if (f) { 2182 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2183 } 2184 PetscFunctionReturn(0); 2185 } 2186 2187 EXTERN_C_BEGIN 2188 #undef __FUNCT__ 2189 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2190 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2191 { 2192 Mat_MPIBAIJ *b; 2193 PetscErrorCode ierr; 2194 PetscInt i; 2195 2196 PetscFunctionBegin; 2197 B->preallocated = PETSC_TRUE; 2198 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2199 2200 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2201 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2202 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2203 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2204 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2205 if (d_nnz) { 2206 for (i=0; i<B->m/bs; i++) { 2207 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2208 } 2209 } 2210 if (o_nnz) { 2211 for (i=0; i<B->m/bs; i++) { 2212 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2213 } 2214 } 2215 2216 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 2217 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 2218 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 2219 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 2220 2221 b = (Mat_MPIBAIJ*)B->data; 2222 B->bs = bs; 2223 b->bs2 = bs*bs; 2224 b->mbs = B->m/bs; 2225 b->nbs = B->n/bs; 2226 b->Mbs = B->M/bs; 2227 b->Nbs = B->N/bs; 2228 2229 ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 2230 b->rowners[0] = 0; 2231 for (i=2; i<=b->size; i++) { 2232 b->rowners[i] += b->rowners[i-1]; 2233 } 2234 b->rstart = b->rowners[b->rank]; 2235 b->rend = b->rowners[b->rank+1]; 2236 2237 ierr = MPI_Allgather(&b->nbs,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 2238 b->cowners[0] = 0; 2239 for (i=2; i<=b->size; i++) { 2240 b->cowners[i] += b->cowners[i-1]; 2241 } 2242 b->cstart = b->cowners[b->rank]; 2243 b->cend = b->cowners[b->rank+1]; 2244 2245 for (i=0; i<=b->size; i++) { 2246 b->rowners_bs[i] = b->rowners[i]*bs; 2247 } 2248 b->rstart_bs = b->rstart*bs; 2249 b->rend_bs = b->rend*bs; 2250 b->cstart_bs = b->cstart*bs; 2251 b->cend_bs = b->cend*bs; 2252 2253 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 2254 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2255 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2256 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2257 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 2258 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2259 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2260 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2261 2262 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2263 2264 PetscFunctionReturn(0); 2265 } 2266 EXTERN_C_END 2267 2268 EXTERN_C_BEGIN 2269 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2270 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2271 EXTERN_C_END 2272 2273 /*MC 2274 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2275 2276 Options Database Keys: 2277 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2278 2279 Level: beginner 2280 2281 .seealso: MatCreateMPIBAIJ 2282 M*/ 2283 2284 EXTERN_C_BEGIN 2285 #undef __FUNCT__ 2286 #define __FUNCT__ "MatCreate_MPIBAIJ" 2287 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2288 { 2289 Mat_MPIBAIJ *b; 2290 PetscErrorCode ierr; 2291 PetscTruth flg; 2292 2293 PetscFunctionBegin; 2294 ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2295 B->data = (void*)b; 2296 2297 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2298 B->mapping = 0; 2299 B->factor = 0; 2300 B->assembled = PETSC_FALSE; 2301 2302 B->insertmode = NOT_SET_VALUES; 2303 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2304 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2305 2306 /* build local table of row and column ownerships */ 2307 ierr = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 2308 ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2309 b->cowners = b->rowners + b->size + 2; 2310 b->rowners_bs = b->cowners + b->size + 2; 2311 2312 /* build cache for off array entries formed */ 2313 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2314 b->donotstash = PETSC_FALSE; 2315 b->colmap = PETSC_NULL; 2316 b->garray = PETSC_NULL; 2317 b->roworiented = PETSC_TRUE; 2318 2319 #if defined(PETSC_USE_MAT_SINGLE) 2320 /* stuff for MatSetValues_XXX in single precision */ 2321 b->setvalueslen = 0; 2322 b->setvaluescopy = PETSC_NULL; 2323 #endif 2324 2325 /* stuff used in block assembly */ 2326 b->barray = 0; 2327 2328 /* stuff used for matrix vector multiply */ 2329 b->lvec = 0; 2330 b->Mvctx = 0; 2331 2332 /* stuff for MatGetRow() */ 2333 b->rowindices = 0; 2334 b->rowvalues = 0; 2335 b->getrowactive = PETSC_FALSE; 2336 2337 /* hash table stuff */ 2338 b->ht = 0; 2339 b->hd = 0; 2340 b->ht_size = 0; 2341 b->ht_flag = PETSC_FALSE; 2342 b->ht_fact = 0; 2343 b->ht_total_ct = 0; 2344 b->ht_insert_ct = 0; 2345 2346 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2347 if (flg) { 2348 PetscReal fact = 1.39; 2349 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2350 ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2351 if (fact <= 1.0) fact = 1.39; 2352 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2353 ierr = PetscLogInfo((0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr); 2354 } 2355 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2356 "MatStoreValues_MPIBAIJ", 2357 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2358 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2359 "MatRetrieveValues_MPIBAIJ", 2360 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2361 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2362 "MatGetDiagonalBlock_MPIBAIJ", 2363 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2364 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2365 "MatMPIBAIJSetPreallocation_MPIBAIJ", 2366 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2367 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 2368 "MatMPIBAIJSetPreallocationCSR_MPIAIJ", 2369 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 2370 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 2371 "MatDiagonalScaleLocal_MPIBAIJ", 2372 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2373 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 2374 "MatSetHashTableFactor_MPIBAIJ", 2375 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2376 PetscFunctionReturn(0); 2377 } 2378 EXTERN_C_END 2379 2380 /*MC 2381 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2382 2383 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2384 and MATMPIBAIJ otherwise. 2385 2386 Options Database Keys: 2387 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2388 2389 Level: beginner 2390 2391 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2392 M*/ 2393 2394 EXTERN_C_BEGIN 2395 #undef __FUNCT__ 2396 #define __FUNCT__ "MatCreate_BAIJ" 2397 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 2398 { 2399 PetscErrorCode ierr; 2400 PetscMPIInt size; 2401 2402 PetscFunctionBegin; 2403 ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr); 2404 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2405 if (size == 1) { 2406 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2407 } else { 2408 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2409 } 2410 PetscFunctionReturn(0); 2411 } 2412 EXTERN_C_END 2413 2414 #undef __FUNCT__ 2415 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2416 /*@C 2417 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2418 (block compressed row). For good matrix assembly performance 2419 the user should preallocate the matrix storage by setting the parameters 2420 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2421 performance can be increased by more than a factor of 50. 2422 2423 Collective on Mat 2424 2425 Input Parameters: 2426 + A - the matrix 2427 . bs - size of blockk 2428 . d_nz - number of block nonzeros per block row in diagonal portion of local 2429 submatrix (same for all local rows) 2430 . d_nnz - array containing the number of block nonzeros in the various block rows 2431 of the in diagonal portion of the local (possibly different for each block 2432 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2433 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2434 submatrix (same for all local rows). 2435 - o_nnz - array containing the number of nonzeros in the various block rows of the 2436 off-diagonal portion of the local submatrix (possibly different for 2437 each block row) or PETSC_NULL. 2438 2439 If the *_nnz parameter is given then the *_nz parameter is ignored 2440 2441 Options Database Keys: 2442 . -mat_no_unroll - uses code that does not unroll the loops in the 2443 block calculations (much slower) 2444 . -mat_block_size - size of the blocks to use 2445 2446 Notes: 2447 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2448 than it must be used on all processors that share the object for that argument. 2449 2450 Storage Information: 2451 For a square global matrix we define each processor's diagonal portion 2452 to be its local rows and the corresponding columns (a square submatrix); 2453 each processor's off-diagonal portion encompasses the remainder of the 2454 local matrix (a rectangular submatrix). 2455 2456 The user can specify preallocated storage for the diagonal part of 2457 the local submatrix with either d_nz or d_nnz (not both). Set 2458 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2459 memory allocation. Likewise, specify preallocated storage for the 2460 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2461 2462 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2463 the figure below we depict these three local rows and all columns (0-11). 2464 2465 .vb 2466 0 1 2 3 4 5 6 7 8 9 10 11 2467 ------------------- 2468 row 3 | o o o d d d o o o o o o 2469 row 4 | o o o d d d o o o o o o 2470 row 5 | o o o d d d o o o o o o 2471 ------------------- 2472 .ve 2473 2474 Thus, any entries in the d locations are stored in the d (diagonal) 2475 submatrix, and any entries in the o locations are stored in the 2476 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2477 stored simply in the MATSEQBAIJ format for compressed row storage. 2478 2479 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2480 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2481 In general, for PDE problems in which most nonzeros are near the diagonal, 2482 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2483 or you will get TERRIBLE performance; see the users' manual chapter on 2484 matrices. 2485 2486 Level: intermediate 2487 2488 .keywords: matrix, block, aij, compressed row, sparse, parallel 2489 2490 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 2491 @*/ 2492 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2493 { 2494 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2495 2496 PetscFunctionBegin; 2497 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2498 if (f) { 2499 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2500 } 2501 PetscFunctionReturn(0); 2502 } 2503 2504 #undef __FUNCT__ 2505 #define __FUNCT__ "MatCreateMPIBAIJ" 2506 /*@C 2507 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2508 (block compressed row). For good matrix assembly performance 2509 the user should preallocate the matrix storage by setting the parameters 2510 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2511 performance can be increased by more than a factor of 50. 2512 2513 Collective on MPI_Comm 2514 2515 Input Parameters: 2516 + comm - MPI communicator 2517 . bs - size of blockk 2518 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2519 This value should be the same as the local size used in creating the 2520 y vector for the matrix-vector product y = Ax. 2521 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2522 This value should be the same as the local size used in creating the 2523 x vector for the matrix-vector product y = Ax. 2524 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2525 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2526 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2527 submatrix (same for all local rows) 2528 . d_nnz - array containing the number of nonzero blocks in the various block rows 2529 of the in diagonal portion of the local (possibly different for each block 2530 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2531 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2532 submatrix (same for all local rows). 2533 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2534 off-diagonal portion of the local submatrix (possibly different for 2535 each block row) or PETSC_NULL. 2536 2537 Output Parameter: 2538 . A - the matrix 2539 2540 Options Database Keys: 2541 . -mat_no_unroll - uses code that does not unroll the loops in the 2542 block calculations (much slower) 2543 . -mat_block_size - size of the blocks to use 2544 2545 Notes: 2546 If the *_nnz parameter is given then the *_nz parameter is ignored 2547 2548 A nonzero block is any block that as 1 or more nonzeros in it 2549 2550 The user MUST specify either the local or global matrix dimensions 2551 (possibly both). 2552 2553 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2554 than it must be used on all processors that share the object for that argument. 2555 2556 Storage Information: 2557 For a square global matrix we define each processor's diagonal portion 2558 to be its local rows and the corresponding columns (a square submatrix); 2559 each processor's off-diagonal portion encompasses the remainder of the 2560 local matrix (a rectangular submatrix). 2561 2562 The user can specify preallocated storage for the diagonal part of 2563 the local submatrix with either d_nz or d_nnz (not both). Set 2564 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2565 memory allocation. Likewise, specify preallocated storage for the 2566 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2567 2568 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2569 the figure below we depict these three local rows and all columns (0-11). 2570 2571 .vb 2572 0 1 2 3 4 5 6 7 8 9 10 11 2573 ------------------- 2574 row 3 | o o o d d d o o o o o o 2575 row 4 | o o o d d d o o o o o o 2576 row 5 | o o o d d d o o o o o o 2577 ------------------- 2578 .ve 2579 2580 Thus, any entries in the d locations are stored in the d (diagonal) 2581 submatrix, and any entries in the o locations are stored in the 2582 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2583 stored simply in the MATSEQBAIJ format for compressed row storage. 2584 2585 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2586 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2587 In general, for PDE problems in which most nonzeros are near the diagonal, 2588 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2589 or you will get TERRIBLE performance; see the users' manual chapter on 2590 matrices. 2591 2592 Level: intermediate 2593 2594 .keywords: matrix, block, aij, compressed row, sparse, parallel 2595 2596 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2597 @*/ 2598 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2599 { 2600 PetscErrorCode ierr; 2601 PetscMPIInt size; 2602 2603 PetscFunctionBegin; 2604 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2605 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2606 if (size > 1) { 2607 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2608 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2609 } else { 2610 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2611 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2612 } 2613 PetscFunctionReturn(0); 2614 } 2615 2616 #undef __FUNCT__ 2617 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 2618 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2619 { 2620 Mat mat; 2621 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2622 PetscErrorCode ierr; 2623 PetscInt len=0; 2624 2625 PetscFunctionBegin; 2626 *newmat = 0; 2627 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2628 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2629 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2630 2631 mat->factor = matin->factor; 2632 mat->preallocated = PETSC_TRUE; 2633 mat->assembled = PETSC_TRUE; 2634 mat->insertmode = NOT_SET_VALUES; 2635 2636 a = (Mat_MPIBAIJ*)mat->data; 2637 mat->bs = matin->bs; 2638 a->bs2 = oldmat->bs2; 2639 a->mbs = oldmat->mbs; 2640 a->nbs = oldmat->nbs; 2641 a->Mbs = oldmat->Mbs; 2642 a->Nbs = oldmat->Nbs; 2643 2644 a->rstart = oldmat->rstart; 2645 a->rend = oldmat->rend; 2646 a->cstart = oldmat->cstart; 2647 a->cend = oldmat->cend; 2648 a->size = oldmat->size; 2649 a->rank = oldmat->rank; 2650 a->donotstash = oldmat->donotstash; 2651 a->roworiented = oldmat->roworiented; 2652 a->rowindices = 0; 2653 a->rowvalues = 0; 2654 a->getrowactive = PETSC_FALSE; 2655 a->barray = 0; 2656 a->rstart_bs = oldmat->rstart_bs; 2657 a->rend_bs = oldmat->rend_bs; 2658 a->cstart_bs = oldmat->cstart_bs; 2659 a->cend_bs = oldmat->cend_bs; 2660 2661 /* hash table stuff */ 2662 a->ht = 0; 2663 a->hd = 0; 2664 a->ht_size = 0; 2665 a->ht_flag = oldmat->ht_flag; 2666 a->ht_fact = oldmat->ht_fact; 2667 a->ht_total_ct = 0; 2668 a->ht_insert_ct = 0; 2669 2670 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2671 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2672 ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr); 2673 if (oldmat->colmap) { 2674 #if defined (PETSC_USE_CTABLE) 2675 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2676 #else 2677 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2678 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2679 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2680 #endif 2681 } else a->colmap = 0; 2682 2683 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2684 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2685 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2686 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2687 } else a->garray = 0; 2688 2689 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2690 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2691 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2692 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2693 2694 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2695 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2696 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2697 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2698 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2699 *newmat = mat; 2700 2701 PetscFunctionReturn(0); 2702 } 2703 2704 #include "petscsys.h" 2705 2706 #undef __FUNCT__ 2707 #define __FUNCT__ "MatLoad_MPIBAIJ" 2708 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 2709 { 2710 Mat A; 2711 PetscErrorCode ierr; 2712 int fd; 2713 PetscInt i,nz,j,rstart,rend; 2714 PetscScalar *vals,*buf; 2715 MPI_Comm comm = ((PetscObject)viewer)->comm; 2716 MPI_Status status; 2717 PetscMPIInt rank,size,maxnz; 2718 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 2719 PetscInt *locrowlens,*procsnz = 0,*browners; 2720 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 2721 PetscMPIInt tag = ((PetscObject)viewer)->tag; 2722 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2723 PetscInt dcount,kmax,k,nzcount,tmp,mend; 2724 2725 PetscFunctionBegin; 2726 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2727 2728 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2729 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2730 if (!rank) { 2731 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2732 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2733 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2734 } 2735 2736 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2737 M = header[1]; N = header[2]; 2738 2739 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2740 2741 /* 2742 This code adds extra rows to make sure the number of rows is 2743 divisible by the blocksize 2744 */ 2745 Mbs = M/bs; 2746 extra_rows = bs - M + bs*Mbs; 2747 if (extra_rows == bs) extra_rows = 0; 2748 else Mbs++; 2749 if (extra_rows && !rank) { 2750 ierr = PetscLogInfo((0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 2751 } 2752 2753 /* determine ownership of all rows */ 2754 mbs = Mbs/size + ((Mbs % size) > rank); 2755 m = mbs*bs; 2756 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 2757 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2758 2759 /* process 0 needs enough room for process with most rows */ 2760 if (!rank) { 2761 mmax = rowners[1]; 2762 for (i=2; i<size; i++) { 2763 mmax = PetscMax(mmax,rowners[i]); 2764 } 2765 mmax*=bs; 2766 } else mmax = m; 2767 2768 rowners[0] = 0; 2769 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2770 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2771 rstart = rowners[rank]; 2772 rend = rowners[rank+1]; 2773 2774 /* distribute row lengths to all processors */ 2775 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 2776 if (!rank) { 2777 mend = m; 2778 if (size == 1) mend = mend - extra_rows; 2779 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 2780 for (j=mend; j<m; j++) locrowlens[j] = 1; 2781 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2782 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2783 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2784 for (j=0; j<m; j++) { 2785 procsnz[0] += locrowlens[j]; 2786 } 2787 for (i=1; i<size; i++) { 2788 mend = browners[i+1] - browners[i]; 2789 if (i == size-1) mend = mend - extra_rows; 2790 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 2791 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 2792 /* calculate the number of nonzeros on each processor */ 2793 for (j=0; j<browners[i+1]-browners[i]; j++) { 2794 procsnz[i] += rowlengths[j]; 2795 } 2796 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2797 } 2798 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2799 } else { 2800 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2801 } 2802 2803 if (!rank) { 2804 /* determine max buffer needed and allocate it */ 2805 maxnz = procsnz[0]; 2806 for (i=1; i<size; i++) { 2807 maxnz = PetscMax(maxnz,procsnz[i]); 2808 } 2809 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2810 2811 /* read in my part of the matrix column indices */ 2812 nz = procsnz[0]; 2813 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2814 mycols = ibuf; 2815 if (size == 1) nz -= extra_rows; 2816 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2817 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2818 2819 /* read in every ones (except the last) and ship off */ 2820 for (i=1; i<size-1; i++) { 2821 nz = procsnz[i]; 2822 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2823 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2824 } 2825 /* read in the stuff for the last proc */ 2826 if (size != 1) { 2827 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2828 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2829 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2830 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2831 } 2832 ierr = PetscFree(cols);CHKERRQ(ierr); 2833 } else { 2834 /* determine buffer space needed for message */ 2835 nz = 0; 2836 for (i=0; i<m; i++) { 2837 nz += locrowlens[i]; 2838 } 2839 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2840 mycols = ibuf; 2841 /* receive message of column indices*/ 2842 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2843 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2844 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2845 } 2846 2847 /* loop over local rows, determining number of off diagonal entries */ 2848 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2849 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2850 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2851 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2852 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2853 rowcount = 0; nzcount = 0; 2854 for (i=0; i<mbs; i++) { 2855 dcount = 0; 2856 odcount = 0; 2857 for (j=0; j<bs; j++) { 2858 kmax = locrowlens[rowcount]; 2859 for (k=0; k<kmax; k++) { 2860 tmp = mycols[nzcount++]/bs; 2861 if (!mask[tmp]) { 2862 mask[tmp] = 1; 2863 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2864 else masked1[dcount++] = tmp; 2865 } 2866 } 2867 rowcount++; 2868 } 2869 2870 dlens[i] = dcount; 2871 odlens[i] = odcount; 2872 2873 /* zero out the mask elements we set */ 2874 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2875 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2876 } 2877 2878 /* create our matrix */ 2879 ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr); 2880 ierr = MatSetType(A,type);CHKERRQ(ierr) 2881 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2882 2883 /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */ 2884 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2885 2886 if (!rank) { 2887 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2888 /* read in my part of the matrix numerical values */ 2889 nz = procsnz[0]; 2890 vals = buf; 2891 mycols = ibuf; 2892 if (size == 1) nz -= extra_rows; 2893 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2894 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2895 2896 /* insert into matrix */ 2897 jj = rstart*bs; 2898 for (i=0; i<m; i++) { 2899 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2900 mycols += locrowlens[i]; 2901 vals += locrowlens[i]; 2902 jj++; 2903 } 2904 /* read in other processors (except the last one) and ship out */ 2905 for (i=1; i<size-1; i++) { 2906 nz = procsnz[i]; 2907 vals = buf; 2908 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2909 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2910 } 2911 /* the last proc */ 2912 if (size != 1){ 2913 nz = procsnz[i] - extra_rows; 2914 vals = buf; 2915 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2916 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2917 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2918 } 2919 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2920 } else { 2921 /* receive numeric values */ 2922 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2923 2924 /* receive message of values*/ 2925 vals = buf; 2926 mycols = ibuf; 2927 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2928 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2929 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2930 2931 /* insert into matrix */ 2932 jj = rstart*bs; 2933 for (i=0; i<m; i++) { 2934 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2935 mycols += locrowlens[i]; 2936 vals += locrowlens[i]; 2937 jj++; 2938 } 2939 } 2940 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2941 ierr = PetscFree(buf);CHKERRQ(ierr); 2942 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2943 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2944 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2945 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2946 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2947 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2948 2949 *newmat = A; 2950 PetscFunctionReturn(0); 2951 } 2952 2953 #undef __FUNCT__ 2954 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2955 /*@ 2956 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2957 2958 Input Parameters: 2959 . mat - the matrix 2960 . fact - factor 2961 2962 Collective on Mat 2963 2964 Level: advanced 2965 2966 Notes: 2967 This can also be set by the command line option: -mat_use_hash_table fact 2968 2969 .keywords: matrix, hashtable, factor, HT 2970 2971 .seealso: MatSetOption() 2972 @*/ 2973 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2974 { 2975 PetscErrorCode ierr,(*f)(Mat,PetscReal); 2976 2977 PetscFunctionBegin; 2978 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 2979 if (f) { 2980 ierr = (*f)(mat,fact);CHKERRQ(ierr); 2981 } 2982 PetscFunctionReturn(0); 2983 } 2984 2985 EXTERN_C_BEGIN 2986 #undef __FUNCT__ 2987 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 2988 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 2989 { 2990 Mat_MPIBAIJ *baij; 2991 2992 PetscFunctionBegin; 2993 baij = (Mat_MPIBAIJ*)mat->data; 2994 baij->ht_fact = fact; 2995 PetscFunctionReturn(0); 2996 } 2997 EXTERN_C_END 2998 2999 #undef __FUNCT__ 3000 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3001 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3002 { 3003 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3004 PetscFunctionBegin; 3005 *Ad = a->A; 3006 *Ao = a->B; 3007 *colmap = a->garray; 3008 PetscFunctionReturn(0); 3009 } 3010