1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat); 15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*); 16 17 /* UGLY, ugly, ugly 18 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 19 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 20 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 21 converts the entries into single precision and then calls ..._MatScalar() to put them 22 into the single precision data structures. 23 */ 24 #if defined(PETSC_USE_MAT_SINGLE) 25 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 26 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 27 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 28 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 29 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 30 #else 31 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 32 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 33 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 34 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 35 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 36 #endif 37 38 #undef __FUNCT__ 39 #define __FUNCT__ "MatGetRowMax_MPIBAIJ" 40 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v) 41 { 42 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 43 PetscErrorCode ierr; 44 PetscInt i; 45 PetscScalar *va,*vb; 46 Vec vtmp; 47 48 PetscFunctionBegin; 49 50 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 51 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 52 53 ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr); 54 ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 55 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 56 57 for (i=0; i<A->m; i++){ 58 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 59 } 60 61 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 62 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 63 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 64 65 PetscFunctionReturn(0); 66 } 67 68 EXTERN_C_BEGIN 69 #undef __FUNCT__ 70 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 71 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 72 { 73 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 74 PetscErrorCode ierr; 75 76 PetscFunctionBegin; 77 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 78 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 79 PetscFunctionReturn(0); 80 } 81 EXTERN_C_END 82 83 EXTERN_C_BEGIN 84 #undef __FUNCT__ 85 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 86 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 87 { 88 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 89 PetscErrorCode ierr; 90 91 PetscFunctionBegin; 92 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 93 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 94 PetscFunctionReturn(0); 95 } 96 EXTERN_C_END 97 98 /* 99 Local utility routine that creates a mapping from the global column 100 number to the local number in the off-diagonal part of the local 101 storage of the matrix. This is done in a non scable way since the 102 length of colmap equals the global matrix length. 103 */ 104 #undef __FUNCT__ 105 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 106 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 107 { 108 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 109 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 110 PetscErrorCode ierr; 111 PetscInt nbs = B->nbs,i,bs=mat->bs; 112 113 PetscFunctionBegin; 114 #if defined (PETSC_USE_CTABLE) 115 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 116 for (i=0; i<nbs; i++){ 117 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 118 } 119 #else 120 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 121 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 122 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 123 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 124 #endif 125 PetscFunctionReturn(0); 126 } 127 128 #define CHUNKSIZE 10 129 130 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 131 { \ 132 \ 133 brow = row/bs; \ 134 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 135 rmax = aimax[brow]; nrow = ailen[brow]; \ 136 bcol = col/bs; \ 137 ridx = row % bs; cidx = col % bs; \ 138 low = 0; high = nrow; \ 139 while (high-low > 3) { \ 140 t = (low+high)/2; \ 141 if (rp[t] > bcol) high = t; \ 142 else low = t; \ 143 } \ 144 for (_i=low; _i<high; _i++) { \ 145 if (rp[_i] > bcol) break; \ 146 if (rp[_i] == bcol) { \ 147 bap = ap + bs2*_i + bs*cidx + ridx; \ 148 if (addv == ADD_VALUES) *bap += value; \ 149 else *bap = value; \ 150 goto a_noinsert; \ 151 } \ 152 } \ 153 if (a->nonew == 1) goto a_noinsert; \ 154 else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 155 if (nrow >= rmax) { \ 156 /* there is no extra room in row, therefore enlarge */ \ 157 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 158 MatScalar *new_a; \ 159 \ 160 if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 161 \ 162 /* malloc new storage space */ \ 163 ierr = PetscMalloc3(bs2*new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,a->mbs+1,PetscInt,&new_i);CHKERRQ(ierr);\ 164 \ 165 /* copy over old data into new slots */ \ 166 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 167 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 168 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \ 169 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 170 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \ 171 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 172 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 173 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 174 /* free up old matrix storage */ \ 175 if (!a->singlemalloc) {\ 176 ierr = PetscFree(a->a);CHKERRQ(ierr);\ 177 ierr = PetscFree(a->i);CHKERRQ(ierr);\ 178 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 179 } else {\ 180 ierr = PetscFree3(a->a,a->i,a->j);CHKERRQ(ierr);\ 181 }\ 182 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 183 a->singlemalloc = PETSC_TRUE; \ 184 \ 185 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 186 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 187 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \ 188 a->maxnz += bs2*CHUNKSIZE; \ 189 a->reallocs++; \ 190 a->nz++; \ 191 } \ 192 N = nrow++ - 1; \ 193 /* shift up all the later entries in this row */ \ 194 for (ii=N; ii>=_i; ii--) { \ 195 rp[ii+1] = rp[ii]; \ 196 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 197 } \ 198 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 199 rp[_i] = bcol; \ 200 ap[bs2*_i + bs*cidx + ridx] = value; \ 201 a_noinsert:; \ 202 ailen[brow] = nrow; \ 203 } 204 205 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 206 { \ 207 brow = row/bs; \ 208 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 209 rmax = bimax[brow]; nrow = bilen[brow]; \ 210 bcol = col/bs; \ 211 ridx = row % bs; cidx = col % bs; \ 212 low = 0; high = nrow; \ 213 while (high-low > 3) { \ 214 t = (low+high)/2; \ 215 if (rp[t] > bcol) high = t; \ 216 else low = t; \ 217 } \ 218 for (_i=low; _i<high; _i++) { \ 219 if (rp[_i] > bcol) break; \ 220 if (rp[_i] == bcol) { \ 221 bap = ap + bs2*_i + bs*cidx + ridx; \ 222 if (addv == ADD_VALUES) *bap += value; \ 223 else *bap = value; \ 224 goto b_noinsert; \ 225 } \ 226 } \ 227 if (b->nonew == 1) goto b_noinsert; \ 228 else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 229 if (nrow >= rmax) { \ 230 /* there is no extra room in row, therefore enlarge */ \ 231 PetscInt new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 232 MatScalar *new_a; \ 233 \ 234 if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 235 \ 236 /* malloc new storage space */ \ 237 ierr = PetscMalloc3(bs2*new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,b->mbs+1,PetscInt,&new_i);CHKERRQ(ierr);\ 238 \ 239 /* copy over old data into new slots */ \ 240 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 241 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 242 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \ 243 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 244 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \ 245 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 246 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 247 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 248 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 249 /* free up old matrix storage */ \ 250 if (!a->singlemalloc) {\ 251 ierr = PetscFree(b->a);CHKERRQ(ierr);\ 252 ierr = PetscFree(b->i);CHKERRQ(ierr);\ 253 ierr = PetscFree(b->j);CHKERRQ(ierr);\ 254 } else {\ 255 ierr = PetscFree3(b->a,b->i,b->j);CHKERRQ(ierr);\ 256 }\ 257 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 258 b->singlemalloc = PETSC_TRUE; \ 259 \ 260 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 261 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 262 ierr = PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \ 263 b->maxnz += bs2*CHUNKSIZE; \ 264 b->reallocs++; \ 265 b->nz++; \ 266 } \ 267 N = nrow++ - 1; \ 268 /* shift up all the later entries in this row */ \ 269 for (ii=N; ii>=_i; ii--) { \ 270 rp[ii+1] = rp[ii]; \ 271 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 272 } \ 273 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 274 rp[_i] = bcol; \ 275 ap[bs2*_i + bs*cidx + ridx] = value; \ 276 b_noinsert:; \ 277 bilen[brow] = nrow; \ 278 } 279 280 #if defined(PETSC_USE_MAT_SINGLE) 281 #undef __FUNCT__ 282 #define __FUNCT__ "MatSetValues_MPIBAIJ" 283 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 284 { 285 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 286 PetscErrorCode ierr; 287 PetscInt i,N = m*n; 288 MatScalar *vsingle; 289 290 PetscFunctionBegin; 291 if (N > b->setvalueslen) { 292 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 293 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 294 b->setvalueslen = N; 295 } 296 vsingle = b->setvaluescopy; 297 298 for (i=0; i<N; i++) { 299 vsingle[i] = v[i]; 300 } 301 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 302 PetscFunctionReturn(0); 303 } 304 305 #undef __FUNCT__ 306 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 307 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 308 { 309 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 310 PetscErrorCode ierr; 311 PetscInt i,N = m*n*b->bs2; 312 MatScalar *vsingle; 313 314 PetscFunctionBegin; 315 if (N > b->setvalueslen) { 316 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 317 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 318 b->setvalueslen = N; 319 } 320 vsingle = b->setvaluescopy; 321 for (i=0; i<N; i++) { 322 vsingle[i] = v[i]; 323 } 324 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327 328 #undef __FUNCT__ 329 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 330 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 331 { 332 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 333 PetscErrorCode ierr; 334 PetscInt i,N = m*n; 335 MatScalar *vsingle; 336 337 PetscFunctionBegin; 338 if (N > b->setvalueslen) { 339 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 340 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 341 b->setvalueslen = N; 342 } 343 vsingle = b->setvaluescopy; 344 for (i=0; i<N; i++) { 345 vsingle[i] = v[i]; 346 } 347 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 348 PetscFunctionReturn(0); 349 } 350 351 #undef __FUNCT__ 352 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 353 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 354 { 355 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 356 PetscErrorCode ierr; 357 PetscInt i,N = m*n*b->bs2; 358 MatScalar *vsingle; 359 360 PetscFunctionBegin; 361 if (N > b->setvalueslen) { 362 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 363 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 364 b->setvalueslen = N; 365 } 366 vsingle = b->setvaluescopy; 367 for (i=0; i<N; i++) { 368 vsingle[i] = v[i]; 369 } 370 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 371 PetscFunctionReturn(0); 372 } 373 #endif 374 375 #undef __FUNCT__ 376 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 377 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 378 { 379 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 380 MatScalar value; 381 PetscTruth roworiented = baij->roworiented; 382 PetscErrorCode ierr; 383 PetscInt i,j,row,col; 384 PetscInt rstart_orig=baij->rstart_bs; 385 PetscInt rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 386 PetscInt cend_orig=baij->cend_bs,bs=mat->bs; 387 388 /* Some Variables required in the macro */ 389 Mat A = baij->A; 390 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 391 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 392 MatScalar *aa=a->a; 393 394 Mat B = baij->B; 395 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 396 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 397 MatScalar *ba=b->a; 398 399 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 400 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 401 MatScalar *ap,*bap; 402 403 PetscFunctionBegin; 404 for (i=0; i<m; i++) { 405 if (im[i] < 0) continue; 406 #if defined(PETSC_USE_DEBUG) 407 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 408 #endif 409 if (im[i] >= rstart_orig && im[i] < rend_orig) { 410 row = im[i] - rstart_orig; 411 for (j=0; j<n; j++) { 412 if (in[j] >= cstart_orig && in[j] < cend_orig){ 413 col = in[j] - cstart_orig; 414 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 415 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 416 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 417 } else if (in[j] < 0) continue; 418 #if defined(PETSC_USE_DEBUG) 419 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->N-1);} 420 #endif 421 else { 422 if (mat->was_assembled) { 423 if (!baij->colmap) { 424 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 425 } 426 #if defined (PETSC_USE_CTABLE) 427 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 428 col = col - 1; 429 #else 430 col = baij->colmap[in[j]/bs] - 1; 431 #endif 432 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 433 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 434 col = in[j]; 435 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 436 B = baij->B; 437 b = (Mat_SeqBAIJ*)(B)->data; 438 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 439 ba=b->a; 440 } else col += in[j]%bs; 441 } else col = in[j]; 442 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 443 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 444 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 445 } 446 } 447 } else { 448 if (!baij->donotstash) { 449 if (roworiented) { 450 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 451 } else { 452 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 453 } 454 } 455 } 456 } 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 463 { 464 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 465 const MatScalar *value; 466 MatScalar *barray=baij->barray; 467 PetscTruth roworiented = baij->roworiented; 468 PetscErrorCode ierr; 469 PetscInt i,j,ii,jj,row,col,rstart=baij->rstart; 470 PetscInt rend=baij->rend,cstart=baij->cstart,stepval; 471 PetscInt cend=baij->cend,bs=mat->bs,bs2=baij->bs2; 472 473 PetscFunctionBegin; 474 if(!barray) { 475 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 476 baij->barray = barray; 477 } 478 479 if (roworiented) { 480 stepval = (n-1)*bs; 481 } else { 482 stepval = (m-1)*bs; 483 } 484 for (i=0; i<m; i++) { 485 if (im[i] < 0) continue; 486 #if defined(PETSC_USE_DEBUG) 487 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 488 #endif 489 if (im[i] >= rstart && im[i] < rend) { 490 row = im[i] - rstart; 491 for (j=0; j<n; j++) { 492 /* If NumCol = 1 then a copy is not required */ 493 if ((roworiented) && (n == 1)) { 494 barray = (MatScalar*)v + i*bs2; 495 } else if((!roworiented) && (m == 1)) { 496 barray = (MatScalar*)v + j*bs2; 497 } else { /* Here a copy is required */ 498 if (roworiented) { 499 value = v + i*(stepval+bs)*bs + j*bs; 500 } else { 501 value = v + j*(stepval+bs)*bs + i*bs; 502 } 503 for (ii=0; ii<bs; ii++,value+=stepval) { 504 for (jj=0; jj<bs; jj++) { 505 *barray++ = *value++; 506 } 507 } 508 barray -=bs2; 509 } 510 511 if (in[j] >= cstart && in[j] < cend){ 512 col = in[j] - cstart; 513 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 514 } 515 else if (in[j] < 0) continue; 516 #if defined(PETSC_USE_DEBUG) 517 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 518 #endif 519 else { 520 if (mat->was_assembled) { 521 if (!baij->colmap) { 522 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 523 } 524 525 #if defined(PETSC_USE_DEBUG) 526 #if defined (PETSC_USE_CTABLE) 527 { PetscInt data; 528 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 529 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 530 } 531 #else 532 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 533 #endif 534 #endif 535 #if defined (PETSC_USE_CTABLE) 536 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 537 col = (col - 1)/bs; 538 #else 539 col = (baij->colmap[in[j]] - 1)/bs; 540 #endif 541 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 542 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 543 col = in[j]; 544 } 545 } 546 else col = in[j]; 547 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 548 } 549 } 550 } else { 551 if (!baij->donotstash) { 552 if (roworiented) { 553 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 554 } else { 555 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 556 } 557 } 558 } 559 } 560 PetscFunctionReturn(0); 561 } 562 563 #define HASH_KEY 0.6180339887 564 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 565 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 566 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 567 #undef __FUNCT__ 568 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 569 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 570 { 571 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 572 PetscTruth roworiented = baij->roworiented; 573 PetscErrorCode ierr; 574 PetscInt i,j,row,col; 575 PetscInt rstart_orig=baij->rstart_bs; 576 PetscInt rend_orig=baij->rend_bs,Nbs=baij->Nbs; 577 PetscInt h1,key,size=baij->ht_size,bs=mat->bs,*HT=baij->ht,idx; 578 PetscReal tmp; 579 MatScalar **HD = baij->hd,value; 580 #if defined(PETSC_USE_DEBUG) 581 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 582 #endif 583 584 PetscFunctionBegin; 585 586 for (i=0; i<m; i++) { 587 #if defined(PETSC_USE_DEBUG) 588 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 589 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 590 #endif 591 row = im[i]; 592 if (row >= rstart_orig && row < rend_orig) { 593 for (j=0; j<n; j++) { 594 col = in[j]; 595 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 596 /* Look up PetscInto the Hash Table */ 597 key = (row/bs)*Nbs+(col/bs)+1; 598 h1 = HASH(size,key,tmp); 599 600 601 idx = h1; 602 #if defined(PETSC_USE_DEBUG) 603 insert_ct++; 604 total_ct++; 605 if (HT[idx] != key) { 606 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 607 if (idx == size) { 608 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 609 if (idx == h1) { 610 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 611 } 612 } 613 } 614 #else 615 if (HT[idx] != key) { 616 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 617 if (idx == size) { 618 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 619 if (idx == h1) { 620 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 621 } 622 } 623 } 624 #endif 625 /* A HASH table entry is found, so insert the values at the correct address */ 626 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 627 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 628 } 629 } else { 630 if (!baij->donotstash) { 631 if (roworiented) { 632 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 633 } else { 634 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 635 } 636 } 637 } 638 } 639 #if defined(PETSC_USE_DEBUG) 640 baij->ht_total_ct = total_ct; 641 baij->ht_insert_ct = insert_ct; 642 #endif 643 PetscFunctionReturn(0); 644 } 645 646 #undef __FUNCT__ 647 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 648 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 649 { 650 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 651 PetscTruth roworiented = baij->roworiented; 652 PetscErrorCode ierr; 653 PetscInt i,j,ii,jj,row,col; 654 PetscInt rstart=baij->rstart ; 655 PetscInt rend=baij->rend,stepval,bs=mat->bs,bs2=baij->bs2; 656 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 657 PetscReal tmp; 658 MatScalar **HD = baij->hd,*baij_a; 659 const MatScalar *v_t,*value; 660 #if defined(PETSC_USE_DEBUG) 661 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 662 #endif 663 664 PetscFunctionBegin; 665 666 if (roworiented) { 667 stepval = (n-1)*bs; 668 } else { 669 stepval = (m-1)*bs; 670 } 671 for (i=0; i<m; i++) { 672 #if defined(PETSC_USE_DEBUG) 673 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 674 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 675 #endif 676 row = im[i]; 677 v_t = v + i*bs2; 678 if (row >= rstart && row < rend) { 679 for (j=0; j<n; j++) { 680 col = in[j]; 681 682 /* Look up into the Hash Table */ 683 key = row*Nbs+col+1; 684 h1 = HASH(size,key,tmp); 685 686 idx = h1; 687 #if defined(PETSC_USE_DEBUG) 688 total_ct++; 689 insert_ct++; 690 if (HT[idx] != key) { 691 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 692 if (idx == size) { 693 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 694 if (idx == h1) { 695 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 696 } 697 } 698 } 699 #else 700 if (HT[idx] != key) { 701 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 702 if (idx == size) { 703 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 704 if (idx == h1) { 705 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 706 } 707 } 708 } 709 #endif 710 baij_a = HD[idx]; 711 if (roworiented) { 712 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 713 /* value = v + (i*(stepval+bs)+j)*bs; */ 714 value = v_t; 715 v_t += bs; 716 if (addv == ADD_VALUES) { 717 for (ii=0; ii<bs; ii++,value+=stepval) { 718 for (jj=ii; jj<bs2; jj+=bs) { 719 baij_a[jj] += *value++; 720 } 721 } 722 } else { 723 for (ii=0; ii<bs; ii++,value+=stepval) { 724 for (jj=ii; jj<bs2; jj+=bs) { 725 baij_a[jj] = *value++; 726 } 727 } 728 } 729 } else { 730 value = v + j*(stepval+bs)*bs + i*bs; 731 if (addv == ADD_VALUES) { 732 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 733 for (jj=0; jj<bs; jj++) { 734 baij_a[jj] += *value++; 735 } 736 } 737 } else { 738 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 739 for (jj=0; jj<bs; jj++) { 740 baij_a[jj] = *value++; 741 } 742 } 743 } 744 } 745 } 746 } else { 747 if (!baij->donotstash) { 748 if (roworiented) { 749 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 750 } else { 751 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 752 } 753 } 754 } 755 } 756 #if defined(PETSC_USE_DEBUG) 757 baij->ht_total_ct = total_ct; 758 baij->ht_insert_ct = insert_ct; 759 #endif 760 PetscFunctionReturn(0); 761 } 762 763 #undef __FUNCT__ 764 #define __FUNCT__ "MatGetValues_MPIBAIJ" 765 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 766 { 767 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 768 PetscErrorCode ierr; 769 PetscInt bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 770 PetscInt bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 771 772 PetscFunctionBegin; 773 for (i=0; i<m; i++) { 774 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 775 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 776 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 777 row = idxm[i] - bsrstart; 778 for (j=0; j<n; j++) { 779 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 780 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 781 if (idxn[j] >= bscstart && idxn[j] < bscend){ 782 col = idxn[j] - bscstart; 783 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 784 } else { 785 if (!baij->colmap) { 786 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 787 } 788 #if defined (PETSC_USE_CTABLE) 789 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 790 data --; 791 #else 792 data = baij->colmap[idxn[j]/bs]-1; 793 #endif 794 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 795 else { 796 col = data + idxn[j]%bs; 797 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 798 } 799 } 800 } 801 } else { 802 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 803 } 804 } 805 PetscFunctionReturn(0); 806 } 807 808 #undef __FUNCT__ 809 #define __FUNCT__ "MatNorm_MPIBAIJ" 810 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 811 { 812 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 813 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 814 PetscErrorCode ierr; 815 PetscInt i,j,bs2=baij->bs2,bs=baij->A->bs,nz,row,col; 816 PetscReal sum = 0.0; 817 MatScalar *v; 818 819 PetscFunctionBegin; 820 if (baij->size == 1) { 821 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 822 } else { 823 if (type == NORM_FROBENIUS) { 824 v = amat->a; 825 nz = amat->nz*bs2; 826 for (i=0; i<nz; i++) { 827 #if defined(PETSC_USE_COMPLEX) 828 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 829 #else 830 sum += (*v)*(*v); v++; 831 #endif 832 } 833 v = bmat->a; 834 nz = bmat->nz*bs2; 835 for (i=0; i<nz; i++) { 836 #if defined(PETSC_USE_COMPLEX) 837 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 838 #else 839 sum += (*v)*(*v); v++; 840 #endif 841 } 842 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 843 *nrm = sqrt(*nrm); 844 } else if (type == NORM_1) { /* max column sum */ 845 PetscReal *tmp,*tmp2; 846 PetscInt *jj,*garray=baij->garray,cstart=baij->cstart; 847 ierr = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 848 tmp2 = tmp + mat->N; 849 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 850 v = amat->a; jj = amat->j; 851 for (i=0; i<amat->nz; i++) { 852 for (j=0; j<bs; j++){ 853 col = bs*(cstart + *jj) + j; /* column index */ 854 for (row=0; row<bs; row++){ 855 tmp[col] += PetscAbsScalar(*v); v++; 856 } 857 } 858 jj++; 859 } 860 v = bmat->a; jj = bmat->j; 861 for (i=0; i<bmat->nz; i++) { 862 for (j=0; j<bs; j++){ 863 col = bs*garray[*jj] + j; 864 for (row=0; row<bs; row++){ 865 tmp[col] += PetscAbsScalar(*v); v++; 866 } 867 } 868 jj++; 869 } 870 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 871 *nrm = 0.0; 872 for (j=0; j<mat->N; j++) { 873 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 874 } 875 ierr = PetscFree(tmp);CHKERRQ(ierr); 876 } else if (type == NORM_INFINITY) { /* max row sum */ 877 PetscReal *sums; 878 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr) 879 sum = 0.0; 880 for (j=0; j<amat->mbs; j++) { 881 for (row=0; row<bs; row++) sums[row] = 0.0; 882 v = amat->a + bs2*amat->i[j]; 883 nz = amat->i[j+1]-amat->i[j]; 884 for (i=0; i<nz; i++) { 885 for (col=0; col<bs; col++){ 886 for (row=0; row<bs; row++){ 887 sums[row] += PetscAbsScalar(*v); v++; 888 } 889 } 890 } 891 v = bmat->a + bs2*bmat->i[j]; 892 nz = bmat->i[j+1]-bmat->i[j]; 893 for (i=0; i<nz; i++) { 894 for (col=0; col<bs; col++){ 895 for (row=0; row<bs; row++){ 896 sums[row] += PetscAbsScalar(*v); v++; 897 } 898 } 899 } 900 for (row=0; row<bs; row++){ 901 if (sums[row] > sum) sum = sums[row]; 902 } 903 } 904 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 905 ierr = PetscFree(sums);CHKERRQ(ierr); 906 } else { 907 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 908 } 909 } 910 PetscFunctionReturn(0); 911 } 912 913 /* 914 Creates the hash table, and sets the table 915 This table is created only once. 916 If new entried need to be added to the matrix 917 then the hash table has to be destroyed and 918 recreated. 919 */ 920 #undef __FUNCT__ 921 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 922 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 923 { 924 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 925 Mat A = baij->A,B=baij->B; 926 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 927 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 928 PetscErrorCode ierr; 929 PetscInt size,bs2=baij->bs2,rstart=baij->rstart; 930 PetscInt cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 931 PetscInt *HT,key; 932 MatScalar **HD; 933 PetscReal tmp; 934 #if defined(PETSC_USE_DEBUG) 935 PetscInt ct=0,max=0; 936 #endif 937 938 PetscFunctionBegin; 939 baij->ht_size=(PetscInt)(factor*nz); 940 size = baij->ht_size; 941 942 if (baij->ht) { 943 PetscFunctionReturn(0); 944 } 945 946 /* Allocate Memory for Hash Table */ 947 ierr = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 948 baij->ht = (PetscInt*)(baij->hd + size); 949 HD = baij->hd; 950 HT = baij->ht; 951 952 953 ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr); 954 955 956 /* Loop Over A */ 957 for (i=0; i<a->mbs; i++) { 958 for (j=ai[i]; j<ai[i+1]; j++) { 959 row = i+rstart; 960 col = aj[j]+cstart; 961 962 key = row*Nbs + col + 1; 963 h1 = HASH(size,key,tmp); 964 for (k=0; k<size; k++){ 965 if (!HT[(h1+k)%size]) { 966 HT[(h1+k)%size] = key; 967 HD[(h1+k)%size] = a->a + j*bs2; 968 break; 969 #if defined(PETSC_USE_DEBUG) 970 } else { 971 ct++; 972 #endif 973 } 974 } 975 #if defined(PETSC_USE_DEBUG) 976 if (k> max) max = k; 977 #endif 978 } 979 } 980 /* Loop Over B */ 981 for (i=0; i<b->mbs; i++) { 982 for (j=bi[i]; j<bi[i+1]; j++) { 983 row = i+rstart; 984 col = garray[bj[j]]; 985 key = row*Nbs + col + 1; 986 h1 = HASH(size,key,tmp); 987 for (k=0; k<size; k++){ 988 if (!HT[(h1+k)%size]) { 989 HT[(h1+k)%size] = key; 990 HD[(h1+k)%size] = b->a + j*bs2; 991 break; 992 #if defined(PETSC_USE_DEBUG) 993 } else { 994 ct++; 995 #endif 996 } 997 } 998 #if defined(PETSC_USE_DEBUG) 999 if (k> max) max = k; 1000 #endif 1001 } 1002 } 1003 1004 /* Print Summary */ 1005 #if defined(PETSC_USE_DEBUG) 1006 for (i=0,j=0; i<size; i++) { 1007 if (HT[i]) {j++;} 1008 } 1009 ierr = PetscLogInfo((0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max));CHKERRQ(ierr); 1010 #endif 1011 PetscFunctionReturn(0); 1012 } 1013 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 1016 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 1017 { 1018 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1019 PetscErrorCode ierr; 1020 PetscInt nstash,reallocs; 1021 InsertMode addv; 1022 1023 PetscFunctionBegin; 1024 if (baij->donotstash) { 1025 PetscFunctionReturn(0); 1026 } 1027 1028 /* make sure all processors are either in INSERTMODE or ADDMODE */ 1029 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 1030 if (addv == (ADD_VALUES|INSERT_VALUES)) { 1031 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 1032 } 1033 mat->insertmode = addv; /* in case this processor had no cache */ 1034 1035 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 1036 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 1037 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 1038 ierr = PetscLogInfo((0,"MatAssemblyBegin_MPIBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 1039 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 1040 ierr = PetscLogInfo((0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 1041 PetscFunctionReturn(0); 1042 } 1043 1044 #undef __FUNCT__ 1045 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 1046 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 1047 { 1048 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 1049 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 1050 PetscErrorCode ierr; 1051 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 1052 PetscInt *row,*col,other_disassembled; 1053 PetscTruth r1,r2,r3; 1054 MatScalar *val; 1055 InsertMode addv = mat->insertmode; 1056 PetscMPIInt n; 1057 1058 PetscFunctionBegin; 1059 if (!baij->donotstash) { 1060 while (1) { 1061 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1062 if (!flg) break; 1063 1064 for (i=0; i<n;) { 1065 /* Now identify the consecutive vals belonging to the same row */ 1066 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1067 if (j < n) ncols = j-i; 1068 else ncols = n-i; 1069 /* Now assemble all these values with a single function call */ 1070 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 1071 i = j; 1072 } 1073 } 1074 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1075 /* Now process the block-stash. Since the values are stashed column-oriented, 1076 set the roworiented flag to column oriented, and after MatSetValues() 1077 restore the original flags */ 1078 r1 = baij->roworiented; 1079 r2 = a->roworiented; 1080 r3 = b->roworiented; 1081 baij->roworiented = PETSC_FALSE; 1082 a->roworiented = PETSC_FALSE; 1083 b->roworiented = PETSC_FALSE; 1084 while (1) { 1085 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1086 if (!flg) break; 1087 1088 for (i=0; i<n;) { 1089 /* Now identify the consecutive vals belonging to the same row */ 1090 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1091 if (j < n) ncols = j-i; 1092 else ncols = n-i; 1093 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1094 i = j; 1095 } 1096 } 1097 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1098 baij->roworiented = r1; 1099 a->roworiented = r2; 1100 b->roworiented = r3; 1101 } 1102 1103 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1104 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1105 1106 /* determine if any processor has disassembled, if so we must 1107 also disassemble ourselfs, in order that we may reassemble. */ 1108 /* 1109 if nonzero structure of submatrix B cannot change then we know that 1110 no processor disassembled thus we can skip this stuff 1111 */ 1112 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1113 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1114 if (mat->was_assembled && !other_disassembled) { 1115 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1116 } 1117 } 1118 1119 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1120 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1121 } 1122 b->compressedrow.use = PETSC_TRUE; 1123 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1124 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1125 1126 #if defined(PETSC_USE_DEBUG) 1127 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1128 ierr = PetscLogInfo((0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct));CHKERRQ(ierr); 1129 baij->ht_total_ct = 0; 1130 baij->ht_insert_ct = 0; 1131 } 1132 #endif 1133 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1134 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1135 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1136 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1137 } 1138 1139 if (baij->rowvalues) { 1140 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1141 baij->rowvalues = 0; 1142 } 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1148 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1149 { 1150 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1151 PetscErrorCode ierr; 1152 PetscMPIInt size = baij->size,rank = baij->rank; 1153 PetscInt bs = mat->bs; 1154 PetscTruth iascii,isdraw; 1155 PetscViewer sviewer; 1156 PetscViewerFormat format; 1157 1158 PetscFunctionBegin; 1159 /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */ 1160 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1161 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1162 if (iascii) { 1163 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1164 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1165 MatInfo info; 1166 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1167 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1168 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 1169 rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 1170 mat->bs,(PetscInt)info.memory);CHKERRQ(ierr); 1171 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1172 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1173 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1174 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1175 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1176 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1177 PetscFunctionReturn(0); 1178 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1179 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1180 PetscFunctionReturn(0); 1181 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1182 PetscFunctionReturn(0); 1183 } 1184 } 1185 1186 if (isdraw) { 1187 PetscDraw draw; 1188 PetscTruth isnull; 1189 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1190 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1191 } 1192 1193 if (size == 1) { 1194 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 1195 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1196 } else { 1197 /* assemble the entire matrix onto first processor. */ 1198 Mat A; 1199 Mat_SeqBAIJ *Aloc; 1200 PetscInt M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1201 MatScalar *a; 1202 1203 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1204 /* Perhaps this should be the type of mat? */ 1205 if (!rank) { 1206 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 1207 } else { 1208 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 1209 } 1210 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1211 ierr = MatMPIBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1212 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1213 1214 /* copy over the A part */ 1215 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1216 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1217 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1218 1219 for (i=0; i<mbs; i++) { 1220 rvals[0] = bs*(baij->rstart + i); 1221 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1222 for (j=ai[i]; j<ai[i+1]; j++) { 1223 col = (baij->cstart+aj[j])*bs; 1224 for (k=0; k<bs; k++) { 1225 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1226 col++; a += bs; 1227 } 1228 } 1229 } 1230 /* copy over the B part */ 1231 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1232 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1233 for (i=0; i<mbs; i++) { 1234 rvals[0] = bs*(baij->rstart + i); 1235 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1236 for (j=ai[i]; j<ai[i+1]; j++) { 1237 col = baij->garray[aj[j]]*bs; 1238 for (k=0; k<bs; k++) { 1239 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1240 col++; a += bs; 1241 } 1242 } 1243 } 1244 ierr = PetscFree(rvals);CHKERRQ(ierr); 1245 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1246 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1247 /* 1248 Everyone has to call to draw the matrix since the graphics waits are 1249 synchronized across all processors that share the PetscDraw object 1250 */ 1251 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1252 if (!rank) { 1253 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1254 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1255 } 1256 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1257 ierr = MatDestroy(A);CHKERRQ(ierr); 1258 } 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatView_MPIBAIJ" 1264 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1265 { 1266 PetscErrorCode ierr; 1267 PetscTruth iascii,isdraw,issocket,isbinary; 1268 1269 PetscFunctionBegin; 1270 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1271 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1272 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1273 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1274 if (iascii || isdraw || issocket || isbinary) { 1275 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1276 } else { 1277 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1278 } 1279 PetscFunctionReturn(0); 1280 } 1281 1282 #undef __FUNCT__ 1283 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1284 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1285 { 1286 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1287 PetscErrorCode ierr; 1288 1289 PetscFunctionBegin; 1290 #if defined(PETSC_USE_LOG) 1291 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N); 1292 #endif 1293 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1294 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1295 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1296 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1297 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1298 #if defined (PETSC_USE_CTABLE) 1299 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1300 #else 1301 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1302 #endif 1303 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1304 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1305 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1306 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1307 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1308 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1309 #if defined(PETSC_USE_MAT_SINGLE) 1310 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 1311 #endif 1312 ierr = PetscFree(baij);CHKERRQ(ierr); 1313 1314 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1315 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1316 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1317 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1318 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1319 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1320 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1321 PetscFunctionReturn(0); 1322 } 1323 1324 #undef __FUNCT__ 1325 #define __FUNCT__ "MatMult_MPIBAIJ" 1326 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1327 { 1328 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1329 PetscErrorCode ierr; 1330 PetscInt nt; 1331 1332 PetscFunctionBegin; 1333 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1334 if (nt != A->n) { 1335 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1336 } 1337 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1338 if (nt != A->m) { 1339 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1340 } 1341 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1342 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1343 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1344 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1345 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1346 PetscFunctionReturn(0); 1347 } 1348 1349 #undef __FUNCT__ 1350 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1351 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1352 { 1353 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1354 PetscErrorCode ierr; 1355 1356 PetscFunctionBegin; 1357 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1358 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1359 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1360 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1361 PetscFunctionReturn(0); 1362 } 1363 1364 #undef __FUNCT__ 1365 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1366 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1367 { 1368 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1369 PetscErrorCode ierr; 1370 PetscTruth merged; 1371 1372 PetscFunctionBegin; 1373 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1374 /* do nondiagonal part */ 1375 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1376 if (!merged) { 1377 /* send it on its way */ 1378 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1379 /* do local part */ 1380 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1381 /* receive remote parts: note this assumes the values are not actually */ 1382 /* inserted in yy until the next line */ 1383 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1384 } else { 1385 /* do local part */ 1386 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1387 /* send it on its way */ 1388 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1389 /* values actually were received in the Begin() but we need to call this nop */ 1390 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1391 } 1392 PetscFunctionReturn(0); 1393 } 1394 1395 #undef __FUNCT__ 1396 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1397 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1398 { 1399 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1400 PetscErrorCode ierr; 1401 1402 PetscFunctionBegin; 1403 /* do nondiagonal part */ 1404 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1405 /* send it on its way */ 1406 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1407 /* do local part */ 1408 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1409 /* receive remote parts: note this assumes the values are not actually */ 1410 /* inserted in yy until the next line, which is true for my implementation*/ 1411 /* but is not perhaps always true. */ 1412 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1413 PetscFunctionReturn(0); 1414 } 1415 1416 /* 1417 This only works correctly for square matrices where the subblock A->A is the 1418 diagonal block 1419 */ 1420 #undef __FUNCT__ 1421 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1422 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1423 { 1424 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1425 PetscErrorCode ierr; 1426 1427 PetscFunctionBegin; 1428 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1429 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1430 PetscFunctionReturn(0); 1431 } 1432 1433 #undef __FUNCT__ 1434 #define __FUNCT__ "MatScale_MPIBAIJ" 1435 PetscErrorCode MatScale_MPIBAIJ(const PetscScalar *aa,Mat A) 1436 { 1437 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1438 PetscErrorCode ierr; 1439 1440 PetscFunctionBegin; 1441 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1442 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1443 PetscFunctionReturn(0); 1444 } 1445 1446 #undef __FUNCT__ 1447 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1448 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1449 { 1450 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1451 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1452 PetscErrorCode ierr; 1453 PetscInt bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1454 PetscInt nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1455 PetscInt *cmap,*idx_p,cstart = mat->cstart; 1456 1457 PetscFunctionBegin; 1458 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1459 mat->getrowactive = PETSC_TRUE; 1460 1461 if (!mat->rowvalues && (idx || v)) { 1462 /* 1463 allocate enough space to hold information from the longest row. 1464 */ 1465 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1466 PetscInt max = 1,mbs = mat->mbs,tmp; 1467 for (i=0; i<mbs; i++) { 1468 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1469 if (max < tmp) { max = tmp; } 1470 } 1471 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1472 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1473 } 1474 1475 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1476 lrow = row - brstart; 1477 1478 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1479 if (!v) {pvA = 0; pvB = 0;} 1480 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1481 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1482 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1483 nztot = nzA + nzB; 1484 1485 cmap = mat->garray; 1486 if (v || idx) { 1487 if (nztot) { 1488 /* Sort by increasing column numbers, assuming A and B already sorted */ 1489 PetscInt imark = -1; 1490 if (v) { 1491 *v = v_p = mat->rowvalues; 1492 for (i=0; i<nzB; i++) { 1493 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1494 else break; 1495 } 1496 imark = i; 1497 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1498 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1499 } 1500 if (idx) { 1501 *idx = idx_p = mat->rowindices; 1502 if (imark > -1) { 1503 for (i=0; i<imark; i++) { 1504 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1505 } 1506 } else { 1507 for (i=0; i<nzB; i++) { 1508 if (cmap[cworkB[i]/bs] < cstart) 1509 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1510 else break; 1511 } 1512 imark = i; 1513 } 1514 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1515 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1516 } 1517 } else { 1518 if (idx) *idx = 0; 1519 if (v) *v = 0; 1520 } 1521 } 1522 *nz = nztot; 1523 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1524 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1525 PetscFunctionReturn(0); 1526 } 1527 1528 #undef __FUNCT__ 1529 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1530 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1531 { 1532 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1533 1534 PetscFunctionBegin; 1535 if (!baij->getrowactive) { 1536 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1537 } 1538 baij->getrowactive = PETSC_FALSE; 1539 PetscFunctionReturn(0); 1540 } 1541 1542 #undef __FUNCT__ 1543 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1544 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1545 { 1546 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1547 PetscErrorCode ierr; 1548 1549 PetscFunctionBegin; 1550 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1551 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1552 PetscFunctionReturn(0); 1553 } 1554 1555 #undef __FUNCT__ 1556 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1557 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1558 { 1559 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1560 Mat A = a->A,B = a->B; 1561 PetscErrorCode ierr; 1562 PetscReal isend[5],irecv[5]; 1563 1564 PetscFunctionBegin; 1565 info->block_size = (PetscReal)matin->bs; 1566 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1567 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1568 isend[3] = info->memory; isend[4] = info->mallocs; 1569 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1570 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1571 isend[3] += info->memory; isend[4] += info->mallocs; 1572 if (flag == MAT_LOCAL) { 1573 info->nz_used = isend[0]; 1574 info->nz_allocated = isend[1]; 1575 info->nz_unneeded = isend[2]; 1576 info->memory = isend[3]; 1577 info->mallocs = isend[4]; 1578 } else if (flag == MAT_GLOBAL_MAX) { 1579 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1580 info->nz_used = irecv[0]; 1581 info->nz_allocated = irecv[1]; 1582 info->nz_unneeded = irecv[2]; 1583 info->memory = irecv[3]; 1584 info->mallocs = irecv[4]; 1585 } else if (flag == MAT_GLOBAL_SUM) { 1586 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1587 info->nz_used = irecv[0]; 1588 info->nz_allocated = irecv[1]; 1589 info->nz_unneeded = irecv[2]; 1590 info->memory = irecv[3]; 1591 info->mallocs = irecv[4]; 1592 } else { 1593 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1594 } 1595 info->rows_global = (PetscReal)A->M; 1596 info->columns_global = (PetscReal)A->N; 1597 info->rows_local = (PetscReal)A->m; 1598 info->columns_local = (PetscReal)A->N; 1599 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1600 info->fill_ratio_needed = 0; 1601 info->factor_mallocs = 0; 1602 PetscFunctionReturn(0); 1603 } 1604 1605 #undef __FUNCT__ 1606 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1607 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op) 1608 { 1609 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1610 PetscErrorCode ierr; 1611 1612 PetscFunctionBegin; 1613 switch (op) { 1614 case MAT_NO_NEW_NONZERO_LOCATIONS: 1615 case MAT_YES_NEW_NONZERO_LOCATIONS: 1616 case MAT_COLUMNS_UNSORTED: 1617 case MAT_COLUMNS_SORTED: 1618 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1619 case MAT_KEEP_ZEROED_ROWS: 1620 case MAT_NEW_NONZERO_LOCATION_ERR: 1621 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1622 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1623 break; 1624 case MAT_ROW_ORIENTED: 1625 a->roworiented = PETSC_TRUE; 1626 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1627 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1628 break; 1629 case MAT_ROWS_SORTED: 1630 case MAT_ROWS_UNSORTED: 1631 case MAT_YES_NEW_DIAGONALS: 1632 ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr); 1633 break; 1634 case MAT_COLUMN_ORIENTED: 1635 a->roworiented = PETSC_FALSE; 1636 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1637 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1638 break; 1639 case MAT_IGNORE_OFF_PROC_ENTRIES: 1640 a->donotstash = PETSC_TRUE; 1641 break; 1642 case MAT_NO_NEW_DIAGONALS: 1643 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1644 case MAT_USE_HASH_TABLE: 1645 a->ht_flag = PETSC_TRUE; 1646 break; 1647 case MAT_SYMMETRIC: 1648 case MAT_STRUCTURALLY_SYMMETRIC: 1649 case MAT_HERMITIAN: 1650 case MAT_SYMMETRY_ETERNAL: 1651 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1652 break; 1653 case MAT_NOT_SYMMETRIC: 1654 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1655 case MAT_NOT_HERMITIAN: 1656 case MAT_NOT_SYMMETRY_ETERNAL: 1657 break; 1658 default: 1659 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1660 } 1661 PetscFunctionReturn(0); 1662 } 1663 1664 #undef __FUNCT__ 1665 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1666 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1667 { 1668 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1669 Mat_SeqBAIJ *Aloc; 1670 Mat B; 1671 PetscErrorCode ierr; 1672 PetscInt M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col; 1673 PetscInt bs=A->bs,mbs=baij->mbs; 1674 MatScalar *a; 1675 1676 PetscFunctionBegin; 1677 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1678 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1679 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1680 ierr = MatMPIBAIJSetPreallocation(B,A->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1681 1682 /* copy over the A part */ 1683 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1684 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1685 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1686 1687 for (i=0; i<mbs; i++) { 1688 rvals[0] = bs*(baij->rstart + i); 1689 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1690 for (j=ai[i]; j<ai[i+1]; j++) { 1691 col = (baij->cstart+aj[j])*bs; 1692 for (k=0; k<bs; k++) { 1693 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1694 col++; a += bs; 1695 } 1696 } 1697 } 1698 /* copy over the B part */ 1699 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1700 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1701 for (i=0; i<mbs; i++) { 1702 rvals[0] = bs*(baij->rstart + i); 1703 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1704 for (j=ai[i]; j<ai[i+1]; j++) { 1705 col = baij->garray[aj[j]]*bs; 1706 for (k=0; k<bs; k++) { 1707 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1708 col++; a += bs; 1709 } 1710 } 1711 } 1712 ierr = PetscFree(rvals);CHKERRQ(ierr); 1713 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1714 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1715 1716 if (matout) { 1717 *matout = B; 1718 } else { 1719 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1720 } 1721 PetscFunctionReturn(0); 1722 } 1723 1724 #undef __FUNCT__ 1725 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1726 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1727 { 1728 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1729 Mat a = baij->A,b = baij->B; 1730 PetscErrorCode ierr; 1731 PetscInt s1,s2,s3; 1732 1733 PetscFunctionBegin; 1734 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1735 if (rr) { 1736 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1737 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1738 /* Overlap communication with computation. */ 1739 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1740 } 1741 if (ll) { 1742 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1743 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1744 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1745 } 1746 /* scale the diagonal block */ 1747 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1748 1749 if (rr) { 1750 /* Do a scatter end and then right scale the off-diagonal block */ 1751 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1752 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1753 } 1754 1755 PetscFunctionReturn(0); 1756 } 1757 1758 #undef __FUNCT__ 1759 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1760 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag) 1761 { 1762 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1763 PetscErrorCode ierr; 1764 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1765 PetscInt i,N,*rows,*owners = l->rowners; 1766 PetscInt *nprocs,j,idx,nsends,row; 1767 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1768 PetscInt *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1; 1769 PetscInt *lens,*lrows,*values,bs=A->bs,rstart_bs=l->rstart_bs; 1770 MPI_Comm comm = A->comm; 1771 MPI_Request *send_waits,*recv_waits; 1772 MPI_Status recv_status,*send_status; 1773 IS istmp; 1774 #if defined(PETSC_DEBUG) 1775 PetscTruth found = PETSC_FALSE; 1776 #endif 1777 1778 PetscFunctionBegin; 1779 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 1780 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1781 1782 /* first count number of contributors to each processor */ 1783 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1784 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1785 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1786 j = 0; 1787 for (i=0; i<N; i++) { 1788 if (lastidx > (idx = rows[i])) j = 0; 1789 lastidx = idx; 1790 for (; j<size; j++) { 1791 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1792 nprocs[2*j]++; 1793 nprocs[2*j+1] = 1; 1794 owner[i] = j; 1795 #if defined(PETSC_DEBUG) 1796 found = PETSC_TRUE; 1797 #endif 1798 break; 1799 } 1800 } 1801 #if defined(PETSC_DEBUG) 1802 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1803 found = PETSC_FALSE; 1804 #endif 1805 } 1806 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1807 1808 /* inform other processors of number of messages and max length*/ 1809 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1810 1811 /* post receives: */ 1812 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1813 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1814 for (i=0; i<nrecvs; i++) { 1815 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1816 } 1817 1818 /* do sends: 1819 1) starts[i] gives the starting index in svalues for stuff going to 1820 the ith processor 1821 */ 1822 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1823 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1824 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1825 starts[0] = 0; 1826 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1827 for (i=0; i<N; i++) { 1828 svalues[starts[owner[i]]++] = rows[i]; 1829 } 1830 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1831 1832 starts[0] = 0; 1833 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1834 count = 0; 1835 for (i=0; i<size; i++) { 1836 if (nprocs[2*i+1]) { 1837 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1838 } 1839 } 1840 ierr = PetscFree(starts);CHKERRQ(ierr); 1841 1842 base = owners[rank]*bs; 1843 1844 /* wait on receives */ 1845 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 1846 source = lens + nrecvs; 1847 count = nrecvs; slen = 0; 1848 while (count) { 1849 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1850 /* unpack receives into our local space */ 1851 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1852 source[imdex] = recv_status.MPI_SOURCE; 1853 lens[imdex] = n; 1854 slen += n; 1855 count--; 1856 } 1857 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1858 1859 /* move the data into the send scatter */ 1860 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1861 count = 0; 1862 for (i=0; i<nrecvs; i++) { 1863 values = rvalues + i*nmax; 1864 for (j=0; j<lens[i]; j++) { 1865 lrows[count++] = values[j] - base; 1866 } 1867 } 1868 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1869 ierr = PetscFree(lens);CHKERRQ(ierr); 1870 ierr = PetscFree(owner);CHKERRQ(ierr); 1871 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1872 1873 /* actually zap the local rows */ 1874 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1875 ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr); 1876 1877 /* 1878 Zero the required rows. If the "diagonal block" of the matrix 1879 is square and the user wishes to set the diagonal we use seperate 1880 code so that MatSetValues() is not called for each diagonal allocating 1881 new memory, thus calling lots of mallocs and slowing things down. 1882 1883 Contributed by: Mathew Knepley 1884 */ 1885 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1886 ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1887 if (diag && (l->A->M == l->A->N)) { 1888 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1889 } else if (diag) { 1890 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1891 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1892 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1893 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1894 } 1895 for (i=0; i<slen; i++) { 1896 row = lrows[i] + rstart_bs; 1897 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1898 } 1899 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1900 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1901 } else { 1902 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1903 } 1904 1905 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1906 ierr = PetscFree(lrows);CHKERRQ(ierr); 1907 1908 /* wait on sends */ 1909 if (nsends) { 1910 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1911 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1912 ierr = PetscFree(send_status);CHKERRQ(ierr); 1913 } 1914 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1915 ierr = PetscFree(svalues);CHKERRQ(ierr); 1916 1917 PetscFunctionReturn(0); 1918 } 1919 1920 #undef __FUNCT__ 1921 #define __FUNCT__ "MatPrintHelp_MPIBAIJ" 1922 PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A) 1923 { 1924 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1925 MPI_Comm comm = A->comm; 1926 static PetscTruth called = PETSC_FALSE; 1927 PetscErrorCode ierr; 1928 1929 PetscFunctionBegin; 1930 if (!a->rank) { 1931 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1932 } 1933 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1934 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1935 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1936 PetscFunctionReturn(0); 1937 } 1938 1939 #undef __FUNCT__ 1940 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1941 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1942 { 1943 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1944 PetscErrorCode ierr; 1945 1946 PetscFunctionBegin; 1947 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1948 PetscFunctionReturn(0); 1949 } 1950 1951 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1952 1953 #undef __FUNCT__ 1954 #define __FUNCT__ "MatEqual_MPIBAIJ" 1955 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1956 { 1957 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1958 Mat a,b,c,d; 1959 PetscTruth flg; 1960 PetscErrorCode ierr; 1961 1962 PetscFunctionBegin; 1963 a = matA->A; b = matA->B; 1964 c = matB->A; d = matB->B; 1965 1966 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1967 if (flg) { 1968 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1969 } 1970 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1971 PetscFunctionReturn(0); 1972 } 1973 1974 1975 #undef __FUNCT__ 1976 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1977 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1978 { 1979 PetscErrorCode ierr; 1980 1981 PetscFunctionBegin; 1982 ierr = MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1983 PetscFunctionReturn(0); 1984 } 1985 1986 /* -------------------------------------------------------------------*/ 1987 static struct _MatOps MatOps_Values = { 1988 MatSetValues_MPIBAIJ, 1989 MatGetRow_MPIBAIJ, 1990 MatRestoreRow_MPIBAIJ, 1991 MatMult_MPIBAIJ, 1992 /* 4*/ MatMultAdd_MPIBAIJ, 1993 MatMultTranspose_MPIBAIJ, 1994 MatMultTransposeAdd_MPIBAIJ, 1995 0, 1996 0, 1997 0, 1998 /*10*/ 0, 1999 0, 2000 0, 2001 0, 2002 MatTranspose_MPIBAIJ, 2003 /*15*/ MatGetInfo_MPIBAIJ, 2004 MatEqual_MPIBAIJ, 2005 MatGetDiagonal_MPIBAIJ, 2006 MatDiagonalScale_MPIBAIJ, 2007 MatNorm_MPIBAIJ, 2008 /*20*/ MatAssemblyBegin_MPIBAIJ, 2009 MatAssemblyEnd_MPIBAIJ, 2010 0, 2011 MatSetOption_MPIBAIJ, 2012 MatZeroEntries_MPIBAIJ, 2013 /*25*/ MatZeroRows_MPIBAIJ, 2014 0, 2015 0, 2016 0, 2017 0, 2018 /*30*/ MatSetUpPreallocation_MPIBAIJ, 2019 0, 2020 0, 2021 0, 2022 0, 2023 /*35*/ MatDuplicate_MPIBAIJ, 2024 0, 2025 0, 2026 0, 2027 0, 2028 /*40*/ 0, 2029 MatGetSubMatrices_MPIBAIJ, 2030 MatIncreaseOverlap_MPIBAIJ, 2031 MatGetValues_MPIBAIJ, 2032 0, 2033 /*45*/ MatPrintHelp_MPIBAIJ, 2034 MatScale_MPIBAIJ, 2035 0, 2036 0, 2037 0, 2038 /*50*/ 0, 2039 0, 2040 0, 2041 0, 2042 0, 2043 /*55*/ 0, 2044 0, 2045 MatSetUnfactored_MPIBAIJ, 2046 0, 2047 MatSetValuesBlocked_MPIBAIJ, 2048 /*60*/ 0, 2049 MatDestroy_MPIBAIJ, 2050 MatView_MPIBAIJ, 2051 MatGetPetscMaps_Petsc, 2052 0, 2053 /*65*/ 0, 2054 0, 2055 0, 2056 0, 2057 0, 2058 /*70*/ MatGetRowMax_MPIBAIJ, 2059 0, 2060 0, 2061 0, 2062 0, 2063 /*75*/ 0, 2064 0, 2065 0, 2066 0, 2067 0, 2068 /*80*/ 0, 2069 0, 2070 0, 2071 0, 2072 MatLoad_MPIBAIJ, 2073 /*85*/ 0, 2074 0, 2075 0, 2076 0, 2077 0, 2078 /*90*/ 0, 2079 0, 2080 0, 2081 0, 2082 0, 2083 /*95*/ 0, 2084 0, 2085 0, 2086 0}; 2087 2088 2089 EXTERN_C_BEGIN 2090 #undef __FUNCT__ 2091 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2092 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2093 { 2094 PetscFunctionBegin; 2095 *a = ((Mat_MPIBAIJ *)A->data)->A; 2096 *iscopy = PETSC_FALSE; 2097 PetscFunctionReturn(0); 2098 } 2099 EXTERN_C_END 2100 2101 EXTERN_C_BEGIN 2102 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 2103 EXTERN_C_END 2104 2105 #undef __FUNCT__ 2106 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2107 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2108 { 2109 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2110 PetscInt m = B->m/bs,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2111 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2112 const PetscInt *JJ; 2113 PetscScalar *values; 2114 PetscErrorCode ierr; 2115 2116 PetscFunctionBegin; 2117 #if defined(PETSC_OPT_g) 2118 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2119 #endif 2120 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2121 o_nnz = d_nnz + m; 2122 2123 for (i=0; i<m; i++) { 2124 nnz = I[i+1]- I[i]; 2125 JJ = J + I[i]; 2126 nnz_max = PetscMax(nnz_max,nnz); 2127 #if defined(PETSC_OPT_g) 2128 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2129 #endif 2130 for (j=0; j<nnz; j++) { 2131 if (*JJ >= cstart) break; 2132 JJ++; 2133 } 2134 d = 0; 2135 for (; j<nnz; j++) { 2136 if (*JJ++ >= cend) break; 2137 d++; 2138 } 2139 d_nnz[i] = d; 2140 o_nnz[i] = nnz - d; 2141 } 2142 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2143 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2144 2145 if (v) values = (PetscScalar*)v; 2146 else { 2147 ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2148 ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2149 } 2150 2151 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2152 for (i=0; i<m; i++) { 2153 ii = i + rstart; 2154 nnz = I[i+1]- I[i]; 2155 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr); 2156 } 2157 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2158 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2159 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2160 2161 if (!v) { 2162 ierr = PetscFree(values);CHKERRQ(ierr); 2163 } 2164 PetscFunctionReturn(0); 2165 } 2166 2167 #undef __FUNCT__ 2168 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2169 /*@C 2170 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2171 (the default parallel PETSc format). 2172 2173 Collective on MPI_Comm 2174 2175 Input Parameters: 2176 + A - the matrix 2177 . i - the indices into j for the start of each local row (starts with zero) 2178 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2179 - v - optional values in the matrix 2180 2181 Level: developer 2182 2183 .keywords: matrix, aij, compressed row, sparse, parallel 2184 2185 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2186 @*/ 2187 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2188 { 2189 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2190 2191 PetscFunctionBegin; 2192 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2193 if (f) { 2194 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2195 } 2196 PetscFunctionReturn(0); 2197 } 2198 2199 EXTERN_C_BEGIN 2200 #undef __FUNCT__ 2201 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2202 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2203 { 2204 Mat_MPIBAIJ *b; 2205 PetscErrorCode ierr; 2206 PetscInt i; 2207 2208 PetscFunctionBegin; 2209 B->preallocated = PETSC_TRUE; 2210 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2211 2212 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2213 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2214 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2215 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2216 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2217 if (d_nnz) { 2218 for (i=0; i<B->m/bs; i++) { 2219 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2220 } 2221 } 2222 if (o_nnz) { 2223 for (i=0; i<B->m/bs; i++) { 2224 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2225 } 2226 } 2227 2228 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 2229 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 2230 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 2231 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 2232 2233 b = (Mat_MPIBAIJ*)B->data; 2234 B->bs = bs; 2235 b->bs2 = bs*bs; 2236 b->mbs = B->m/bs; 2237 b->nbs = B->n/bs; 2238 b->Mbs = B->M/bs; 2239 b->Nbs = B->N/bs; 2240 2241 ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 2242 b->rowners[0] = 0; 2243 for (i=2; i<=b->size; i++) { 2244 b->rowners[i] += b->rowners[i-1]; 2245 } 2246 b->rstart = b->rowners[b->rank]; 2247 b->rend = b->rowners[b->rank+1]; 2248 2249 ierr = MPI_Allgather(&b->nbs,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 2250 b->cowners[0] = 0; 2251 for (i=2; i<=b->size; i++) { 2252 b->cowners[i] += b->cowners[i-1]; 2253 } 2254 b->cstart = b->cowners[b->rank]; 2255 b->cend = b->cowners[b->rank+1]; 2256 2257 for (i=0; i<=b->size; i++) { 2258 b->rowners_bs[i] = b->rowners[i]*bs; 2259 } 2260 b->rstart_bs = b->rstart*bs; 2261 b->rend_bs = b->rend*bs; 2262 b->cstart_bs = b->cstart*bs; 2263 b->cend_bs = b->cend*bs; 2264 2265 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 2266 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2267 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2268 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2269 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 2270 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2271 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2272 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2273 2274 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2275 2276 PetscFunctionReturn(0); 2277 } 2278 EXTERN_C_END 2279 2280 EXTERN_C_BEGIN 2281 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2282 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2283 EXTERN_C_END 2284 2285 /*MC 2286 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2287 2288 Options Database Keys: 2289 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2290 2291 Level: beginner 2292 2293 .seealso: MatCreateMPIBAIJ 2294 M*/ 2295 2296 EXTERN_C_BEGIN 2297 #undef __FUNCT__ 2298 #define __FUNCT__ "MatCreate_MPIBAIJ" 2299 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2300 { 2301 Mat_MPIBAIJ *b; 2302 PetscErrorCode ierr; 2303 PetscTruth flg; 2304 2305 PetscFunctionBegin; 2306 ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2307 B->data = (void*)b; 2308 2309 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2310 B->mapping = 0; 2311 B->factor = 0; 2312 B->assembled = PETSC_FALSE; 2313 2314 B->insertmode = NOT_SET_VALUES; 2315 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2316 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2317 2318 /* build local table of row and column ownerships */ 2319 ierr = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 2320 ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2321 b->cowners = b->rowners + b->size + 2; 2322 b->rowners_bs = b->cowners + b->size + 2; 2323 2324 /* build cache for off array entries formed */ 2325 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2326 b->donotstash = PETSC_FALSE; 2327 b->colmap = PETSC_NULL; 2328 b->garray = PETSC_NULL; 2329 b->roworiented = PETSC_TRUE; 2330 2331 #if defined(PETSC_USE_MAT_SINGLE) 2332 /* stuff for MatSetValues_XXX in single precision */ 2333 b->setvalueslen = 0; 2334 b->setvaluescopy = PETSC_NULL; 2335 #endif 2336 2337 /* stuff used in block assembly */ 2338 b->barray = 0; 2339 2340 /* stuff used for matrix vector multiply */ 2341 b->lvec = 0; 2342 b->Mvctx = 0; 2343 2344 /* stuff for MatGetRow() */ 2345 b->rowindices = 0; 2346 b->rowvalues = 0; 2347 b->getrowactive = PETSC_FALSE; 2348 2349 /* hash table stuff */ 2350 b->ht = 0; 2351 b->hd = 0; 2352 b->ht_size = 0; 2353 b->ht_flag = PETSC_FALSE; 2354 b->ht_fact = 0; 2355 b->ht_total_ct = 0; 2356 b->ht_insert_ct = 0; 2357 2358 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2359 if (flg) { 2360 PetscReal fact = 1.39; 2361 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2362 ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2363 if (fact <= 1.0) fact = 1.39; 2364 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2365 ierr = PetscLogInfo((0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr); 2366 } 2367 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2368 "MatStoreValues_MPIBAIJ", 2369 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2370 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2371 "MatRetrieveValues_MPIBAIJ", 2372 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2373 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2374 "MatGetDiagonalBlock_MPIBAIJ", 2375 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2376 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2377 "MatMPIBAIJSetPreallocation_MPIBAIJ", 2378 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2379 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 2380 "MatMPIBAIJSetPreallocationCSR_MPIAIJ", 2381 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 2382 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 2383 "MatDiagonalScaleLocal_MPIBAIJ", 2384 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2385 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 2386 "MatSetHashTableFactor_MPIBAIJ", 2387 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2388 PetscFunctionReturn(0); 2389 } 2390 EXTERN_C_END 2391 2392 /*MC 2393 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2394 2395 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2396 and MATMPIBAIJ otherwise. 2397 2398 Options Database Keys: 2399 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2400 2401 Level: beginner 2402 2403 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2404 M*/ 2405 2406 EXTERN_C_BEGIN 2407 #undef __FUNCT__ 2408 #define __FUNCT__ "MatCreate_BAIJ" 2409 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 2410 { 2411 PetscErrorCode ierr; 2412 PetscMPIInt size; 2413 2414 PetscFunctionBegin; 2415 ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr); 2416 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2417 if (size == 1) { 2418 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2419 } else { 2420 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2421 } 2422 PetscFunctionReturn(0); 2423 } 2424 EXTERN_C_END 2425 2426 #undef __FUNCT__ 2427 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2428 /*@C 2429 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2430 (block compressed row). For good matrix assembly performance 2431 the user should preallocate the matrix storage by setting the parameters 2432 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2433 performance can be increased by more than a factor of 50. 2434 2435 Collective on Mat 2436 2437 Input Parameters: 2438 + A - the matrix 2439 . bs - size of blockk 2440 . d_nz - number of block nonzeros per block row in diagonal portion of local 2441 submatrix (same for all local rows) 2442 . d_nnz - array containing the number of block nonzeros in the various block rows 2443 of the in diagonal portion of the local (possibly different for each block 2444 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2445 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2446 submatrix (same for all local rows). 2447 - o_nnz - array containing the number of nonzeros in the various block rows of the 2448 off-diagonal portion of the local submatrix (possibly different for 2449 each block row) or PETSC_NULL. 2450 2451 If the *_nnz parameter is given then the *_nz parameter is ignored 2452 2453 Options Database Keys: 2454 . -mat_no_unroll - uses code that does not unroll the loops in the 2455 block calculations (much slower) 2456 . -mat_block_size - size of the blocks to use 2457 2458 Notes: 2459 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2460 than it must be used on all processors that share the object for that argument. 2461 2462 Storage Information: 2463 For a square global matrix we define each processor's diagonal portion 2464 to be its local rows and the corresponding columns (a square submatrix); 2465 each processor's off-diagonal portion encompasses the remainder of the 2466 local matrix (a rectangular submatrix). 2467 2468 The user can specify preallocated storage for the diagonal part of 2469 the local submatrix with either d_nz or d_nnz (not both). Set 2470 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2471 memory allocation. Likewise, specify preallocated storage for the 2472 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2473 2474 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2475 the figure below we depict these three local rows and all columns (0-11). 2476 2477 .vb 2478 0 1 2 3 4 5 6 7 8 9 10 11 2479 ------------------- 2480 row 3 | o o o d d d o o o o o o 2481 row 4 | o o o d d d o o o o o o 2482 row 5 | o o o d d d o o o o o o 2483 ------------------- 2484 .ve 2485 2486 Thus, any entries in the d locations are stored in the d (diagonal) 2487 submatrix, and any entries in the o locations are stored in the 2488 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2489 stored simply in the MATSEQBAIJ format for compressed row storage. 2490 2491 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2492 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2493 In general, for PDE problems in which most nonzeros are near the diagonal, 2494 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2495 or you will get TERRIBLE performance; see the users' manual chapter on 2496 matrices. 2497 2498 Level: intermediate 2499 2500 .keywords: matrix, block, aij, compressed row, sparse, parallel 2501 2502 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 2503 @*/ 2504 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2505 { 2506 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2507 2508 PetscFunctionBegin; 2509 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2510 if (f) { 2511 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2512 } 2513 PetscFunctionReturn(0); 2514 } 2515 2516 #undef __FUNCT__ 2517 #define __FUNCT__ "MatCreateMPIBAIJ" 2518 /*@C 2519 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2520 (block compressed row). For good matrix assembly performance 2521 the user should preallocate the matrix storage by setting the parameters 2522 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2523 performance can be increased by more than a factor of 50. 2524 2525 Collective on MPI_Comm 2526 2527 Input Parameters: 2528 + comm - MPI communicator 2529 . bs - size of blockk 2530 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2531 This value should be the same as the local size used in creating the 2532 y vector for the matrix-vector product y = Ax. 2533 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2534 This value should be the same as the local size used in creating the 2535 x vector for the matrix-vector product y = Ax. 2536 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2537 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2538 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2539 submatrix (same for all local rows) 2540 . d_nnz - array containing the number of nonzero blocks in the various block rows 2541 of the in diagonal portion of the local (possibly different for each block 2542 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2543 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2544 submatrix (same for all local rows). 2545 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2546 off-diagonal portion of the local submatrix (possibly different for 2547 each block row) or PETSC_NULL. 2548 2549 Output Parameter: 2550 . A - the matrix 2551 2552 Options Database Keys: 2553 . -mat_no_unroll - uses code that does not unroll the loops in the 2554 block calculations (much slower) 2555 . -mat_block_size - size of the blocks to use 2556 2557 Notes: 2558 If the *_nnz parameter is given then the *_nz parameter is ignored 2559 2560 A nonzero block is any block that as 1 or more nonzeros in it 2561 2562 The user MUST specify either the local or global matrix dimensions 2563 (possibly both). 2564 2565 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2566 than it must be used on all processors that share the object for that argument. 2567 2568 Storage Information: 2569 For a square global matrix we define each processor's diagonal portion 2570 to be its local rows and the corresponding columns (a square submatrix); 2571 each processor's off-diagonal portion encompasses the remainder of the 2572 local matrix (a rectangular submatrix). 2573 2574 The user can specify preallocated storage for the diagonal part of 2575 the local submatrix with either d_nz or d_nnz (not both). Set 2576 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2577 memory allocation. Likewise, specify preallocated storage for the 2578 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2579 2580 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2581 the figure below we depict these three local rows and all columns (0-11). 2582 2583 .vb 2584 0 1 2 3 4 5 6 7 8 9 10 11 2585 ------------------- 2586 row 3 | o o o d d d o o o o o o 2587 row 4 | o o o d d d o o o o o o 2588 row 5 | o o o d d d o o o o o o 2589 ------------------- 2590 .ve 2591 2592 Thus, any entries in the d locations are stored in the d (diagonal) 2593 submatrix, and any entries in the o locations are stored in the 2594 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2595 stored simply in the MATSEQBAIJ format for compressed row storage. 2596 2597 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2598 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2599 In general, for PDE problems in which most nonzeros are near the diagonal, 2600 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2601 or you will get TERRIBLE performance; see the users' manual chapter on 2602 matrices. 2603 2604 Level: intermediate 2605 2606 .keywords: matrix, block, aij, compressed row, sparse, parallel 2607 2608 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2609 @*/ 2610 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2611 { 2612 PetscErrorCode ierr; 2613 PetscMPIInt size; 2614 2615 PetscFunctionBegin; 2616 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2617 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2618 if (size > 1) { 2619 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2620 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2621 } else { 2622 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2623 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2624 } 2625 PetscFunctionReturn(0); 2626 } 2627 2628 #undef __FUNCT__ 2629 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 2630 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2631 { 2632 Mat mat; 2633 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2634 PetscErrorCode ierr; 2635 PetscInt len=0; 2636 2637 PetscFunctionBegin; 2638 *newmat = 0; 2639 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2640 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2641 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2642 2643 mat->factor = matin->factor; 2644 mat->preallocated = PETSC_TRUE; 2645 mat->assembled = PETSC_TRUE; 2646 mat->insertmode = NOT_SET_VALUES; 2647 2648 a = (Mat_MPIBAIJ*)mat->data; 2649 mat->bs = matin->bs; 2650 a->bs2 = oldmat->bs2; 2651 a->mbs = oldmat->mbs; 2652 a->nbs = oldmat->nbs; 2653 a->Mbs = oldmat->Mbs; 2654 a->Nbs = oldmat->Nbs; 2655 2656 a->rstart = oldmat->rstart; 2657 a->rend = oldmat->rend; 2658 a->cstart = oldmat->cstart; 2659 a->cend = oldmat->cend; 2660 a->size = oldmat->size; 2661 a->rank = oldmat->rank; 2662 a->donotstash = oldmat->donotstash; 2663 a->roworiented = oldmat->roworiented; 2664 a->rowindices = 0; 2665 a->rowvalues = 0; 2666 a->getrowactive = PETSC_FALSE; 2667 a->barray = 0; 2668 a->rstart_bs = oldmat->rstart_bs; 2669 a->rend_bs = oldmat->rend_bs; 2670 a->cstart_bs = oldmat->cstart_bs; 2671 a->cend_bs = oldmat->cend_bs; 2672 2673 /* hash table stuff */ 2674 a->ht = 0; 2675 a->hd = 0; 2676 a->ht_size = 0; 2677 a->ht_flag = oldmat->ht_flag; 2678 a->ht_fact = oldmat->ht_fact; 2679 a->ht_total_ct = 0; 2680 a->ht_insert_ct = 0; 2681 2682 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2683 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2684 ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr); 2685 if (oldmat->colmap) { 2686 #if defined (PETSC_USE_CTABLE) 2687 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2688 #else 2689 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2690 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2691 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2692 #endif 2693 } else a->colmap = 0; 2694 2695 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2696 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2697 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2698 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2699 } else a->garray = 0; 2700 2701 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2702 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2703 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2704 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2705 2706 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2707 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2708 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2709 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2710 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2711 *newmat = mat; 2712 2713 PetscFunctionReturn(0); 2714 } 2715 2716 #include "petscsys.h" 2717 2718 #undef __FUNCT__ 2719 #define __FUNCT__ "MatLoad_MPIBAIJ" 2720 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 2721 { 2722 Mat A; 2723 PetscErrorCode ierr; 2724 int fd; 2725 PetscInt i,nz,j,rstart,rend; 2726 PetscScalar *vals,*buf; 2727 MPI_Comm comm = ((PetscObject)viewer)->comm; 2728 MPI_Status status; 2729 PetscMPIInt rank,size,maxnz; 2730 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 2731 PetscInt *locrowlens,*procsnz = 0,*browners; 2732 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 2733 PetscMPIInt tag = ((PetscObject)viewer)->tag; 2734 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2735 PetscInt dcount,kmax,k,nzcount,tmp,mend; 2736 2737 PetscFunctionBegin; 2738 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2739 2740 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2741 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2742 if (!rank) { 2743 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2744 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2745 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2746 } 2747 2748 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2749 M = header[1]; N = header[2]; 2750 2751 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2752 2753 /* 2754 This code adds extra rows to make sure the number of rows is 2755 divisible by the blocksize 2756 */ 2757 Mbs = M/bs; 2758 extra_rows = bs - M + bs*Mbs; 2759 if (extra_rows == bs) extra_rows = 0; 2760 else Mbs++; 2761 if (extra_rows && !rank) { 2762 ierr = PetscLogInfo((0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 2763 } 2764 2765 /* determine ownership of all rows */ 2766 mbs = Mbs/size + ((Mbs % size) > rank); 2767 m = mbs*bs; 2768 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 2769 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2770 2771 /* process 0 needs enough room for process with most rows */ 2772 if (!rank) { 2773 mmax = rowners[1]; 2774 for (i=2; i<size; i++) { 2775 mmax = PetscMax(mmax,rowners[i]); 2776 } 2777 mmax*=bs; 2778 } else mmax = m; 2779 2780 rowners[0] = 0; 2781 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2782 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2783 rstart = rowners[rank]; 2784 rend = rowners[rank+1]; 2785 2786 /* distribute row lengths to all processors */ 2787 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 2788 if (!rank) { 2789 mend = m; 2790 if (size == 1) mend = mend - extra_rows; 2791 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 2792 for (j=mend; j<m; j++) locrowlens[j] = 1; 2793 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2794 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2795 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2796 for (j=0; j<m; j++) { 2797 procsnz[0] += locrowlens[j]; 2798 } 2799 for (i=1; i<size; i++) { 2800 mend = browners[i+1] - browners[i]; 2801 if (i == size-1) mend = mend - extra_rows; 2802 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 2803 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 2804 /* calculate the number of nonzeros on each processor */ 2805 for (j=0; j<browners[i+1]-browners[i]; j++) { 2806 procsnz[i] += rowlengths[j]; 2807 } 2808 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2809 } 2810 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2811 } else { 2812 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2813 } 2814 2815 if (!rank) { 2816 /* determine max buffer needed and allocate it */ 2817 maxnz = procsnz[0]; 2818 for (i=1; i<size; i++) { 2819 maxnz = PetscMax(maxnz,procsnz[i]); 2820 } 2821 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2822 2823 /* read in my part of the matrix column indices */ 2824 nz = procsnz[0]; 2825 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2826 mycols = ibuf; 2827 if (size == 1) nz -= extra_rows; 2828 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2829 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2830 2831 /* read in every ones (except the last) and ship off */ 2832 for (i=1; i<size-1; i++) { 2833 nz = procsnz[i]; 2834 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2835 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2836 } 2837 /* read in the stuff for the last proc */ 2838 if (size != 1) { 2839 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2840 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2841 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2842 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2843 } 2844 ierr = PetscFree(cols);CHKERRQ(ierr); 2845 } else { 2846 /* determine buffer space needed for message */ 2847 nz = 0; 2848 for (i=0; i<m; i++) { 2849 nz += locrowlens[i]; 2850 } 2851 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2852 mycols = ibuf; 2853 /* receive message of column indices*/ 2854 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2855 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2856 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2857 } 2858 2859 /* loop over local rows, determining number of off diagonal entries */ 2860 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2861 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2862 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2863 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2864 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2865 rowcount = 0; nzcount = 0; 2866 for (i=0; i<mbs; i++) { 2867 dcount = 0; 2868 odcount = 0; 2869 for (j=0; j<bs; j++) { 2870 kmax = locrowlens[rowcount]; 2871 for (k=0; k<kmax; k++) { 2872 tmp = mycols[nzcount++]/bs; 2873 if (!mask[tmp]) { 2874 mask[tmp] = 1; 2875 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2876 else masked1[dcount++] = tmp; 2877 } 2878 } 2879 rowcount++; 2880 } 2881 2882 dlens[i] = dcount; 2883 odlens[i] = odcount; 2884 2885 /* zero out the mask elements we set */ 2886 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2887 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2888 } 2889 2890 /* create our matrix */ 2891 ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr); 2892 ierr = MatSetType(A,type);CHKERRQ(ierr) 2893 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2894 2895 /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */ 2896 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2897 2898 if (!rank) { 2899 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2900 /* read in my part of the matrix numerical values */ 2901 nz = procsnz[0]; 2902 vals = buf; 2903 mycols = ibuf; 2904 if (size == 1) nz -= extra_rows; 2905 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2906 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2907 2908 /* insert into matrix */ 2909 jj = rstart*bs; 2910 for (i=0; i<m; i++) { 2911 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2912 mycols += locrowlens[i]; 2913 vals += locrowlens[i]; 2914 jj++; 2915 } 2916 /* read in other processors (except the last one) and ship out */ 2917 for (i=1; i<size-1; i++) { 2918 nz = procsnz[i]; 2919 vals = buf; 2920 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2921 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2922 } 2923 /* the last proc */ 2924 if (size != 1){ 2925 nz = procsnz[i] - extra_rows; 2926 vals = buf; 2927 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2928 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2929 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2930 } 2931 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2932 } else { 2933 /* receive numeric values */ 2934 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2935 2936 /* receive message of values*/ 2937 vals = buf; 2938 mycols = ibuf; 2939 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2940 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2941 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2942 2943 /* insert into matrix */ 2944 jj = rstart*bs; 2945 for (i=0; i<m; i++) { 2946 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2947 mycols += locrowlens[i]; 2948 vals += locrowlens[i]; 2949 jj++; 2950 } 2951 } 2952 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2953 ierr = PetscFree(buf);CHKERRQ(ierr); 2954 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2955 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2956 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2957 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2958 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2959 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2960 2961 *newmat = A; 2962 PetscFunctionReturn(0); 2963 } 2964 2965 #undef __FUNCT__ 2966 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2967 /*@ 2968 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2969 2970 Input Parameters: 2971 . mat - the matrix 2972 . fact - factor 2973 2974 Collective on Mat 2975 2976 Level: advanced 2977 2978 Notes: 2979 This can also be set by the command line option: -mat_use_hash_table fact 2980 2981 .keywords: matrix, hashtable, factor, HT 2982 2983 .seealso: MatSetOption() 2984 @*/ 2985 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2986 { 2987 PetscErrorCode ierr,(*f)(Mat,PetscReal); 2988 2989 PetscFunctionBegin; 2990 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 2991 if (f) { 2992 ierr = (*f)(mat,fact);CHKERRQ(ierr); 2993 } 2994 PetscFunctionReturn(0); 2995 } 2996 2997 EXTERN_C_BEGIN 2998 #undef __FUNCT__ 2999 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3000 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3001 { 3002 Mat_MPIBAIJ *baij; 3003 3004 PetscFunctionBegin; 3005 baij = (Mat_MPIBAIJ*)mat->data; 3006 baij->ht_fact = fact; 3007 PetscFunctionReturn(0); 3008 } 3009 EXTERN_C_END 3010 3011 #undef __FUNCT__ 3012 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3013 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3014 { 3015 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3016 PetscFunctionBegin; 3017 *Ad = a->A; 3018 *Ao = a->B; 3019 *colmap = a->garray; 3020 PetscFunctionReturn(0); 3021 } 3022