1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat); 15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*); 16 17 /* UGLY, ugly, ugly 18 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 19 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 20 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 21 converts the entries into single precision and then calls ..._MatScalar() to put them 22 into the single precision data structures. 23 */ 24 #if defined(PETSC_USE_MAT_SINGLE) 25 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 26 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 27 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 28 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 29 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 30 #else 31 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 32 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 33 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 34 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 35 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 36 #endif 37 38 #undef __FUNCT__ 39 #define __FUNCT__ "MatGetRowMax_MPIBAIJ" 40 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v) 41 { 42 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 43 PetscErrorCode ierr; 44 PetscInt i; 45 PetscScalar *va,*vb; 46 Vec vtmp; 47 48 PetscFunctionBegin; 49 50 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 51 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 52 53 ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr); 54 ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 55 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 56 57 for (i=0; i<A->m; i++){ 58 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 59 } 60 61 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 62 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 63 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 64 65 PetscFunctionReturn(0); 66 } 67 68 EXTERN_C_BEGIN 69 #undef __FUNCT__ 70 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 71 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 72 { 73 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 74 PetscErrorCode ierr; 75 76 PetscFunctionBegin; 77 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 78 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 79 PetscFunctionReturn(0); 80 } 81 EXTERN_C_END 82 83 EXTERN_C_BEGIN 84 #undef __FUNCT__ 85 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 86 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 87 { 88 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 89 PetscErrorCode ierr; 90 91 PetscFunctionBegin; 92 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 93 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 94 PetscFunctionReturn(0); 95 } 96 EXTERN_C_END 97 98 /* 99 Local utility routine that creates a mapping from the global column 100 number to the local number in the off-diagonal part of the local 101 storage of the matrix. This is done in a non scable way since the 102 length of colmap equals the global matrix length. 103 */ 104 #undef __FUNCT__ 105 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 106 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 107 { 108 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 109 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 110 PetscErrorCode ierr; 111 PetscInt nbs = B->nbs,i,bs=mat->bs; 112 113 PetscFunctionBegin; 114 #if defined (PETSC_USE_CTABLE) 115 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 116 for (i=0; i<nbs; i++){ 117 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 118 } 119 #else 120 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 121 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 122 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 123 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 124 #endif 125 PetscFunctionReturn(0); 126 } 127 128 #define CHUNKSIZE 10 129 130 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 131 { \ 132 \ 133 brow = row/bs; \ 134 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 135 rmax = aimax[brow]; nrow = ailen[brow]; \ 136 bcol = col/bs; \ 137 ridx = row % bs; cidx = col % bs; \ 138 low = 0; high = nrow; \ 139 while (high-low > 3) { \ 140 t = (low+high)/2; \ 141 if (rp[t] > bcol) high = t; \ 142 else low = t; \ 143 } \ 144 for (_i=low; _i<high; _i++) { \ 145 if (rp[_i] > bcol) break; \ 146 if (rp[_i] == bcol) { \ 147 bap = ap + bs2*_i + bs*cidx + ridx; \ 148 if (addv == ADD_VALUES) *bap += value; \ 149 else *bap = value; \ 150 goto a_noinsert; \ 151 } \ 152 } \ 153 if (a->nonew == 1) goto a_noinsert; \ 154 else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 155 if (nrow >= rmax) { \ 156 /* there is no extra room in row, therefore enlarge */ \ 157 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 158 MatScalar *new_a; \ 159 \ 160 if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 161 \ 162 /* malloc new storage space */ \ 163 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); \ 164 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 165 new_j = (PetscInt*)(new_a + bs2*new_nz); \ 166 new_i = new_j + new_nz; \ 167 \ 168 /* copy over old data into new slots */ \ 169 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 170 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 171 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \ 172 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 173 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \ 174 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 175 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 176 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 177 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 178 /* free up old matrix storage */ \ 179 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 180 if (!a->singlemalloc) { \ 181 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 182 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 183 } \ 184 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 185 a->singlemalloc = PETSC_TRUE; \ 186 \ 187 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 188 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 189 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \ 190 a->maxnz += bs2*CHUNKSIZE; \ 191 a->reallocs++; \ 192 a->nz++; \ 193 } \ 194 N = nrow++ - 1; \ 195 /* shift up all the later entries in this row */ \ 196 for (ii=N; ii>=_i; ii--) { \ 197 rp[ii+1] = rp[ii]; \ 198 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 199 } \ 200 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 201 rp[_i] = bcol; \ 202 ap[bs2*_i + bs*cidx + ridx] = value; \ 203 a_noinsert:; \ 204 ailen[brow] = nrow; \ 205 } 206 207 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 208 { \ 209 brow = row/bs; \ 210 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 211 rmax = bimax[brow]; nrow = bilen[brow]; \ 212 bcol = col/bs; \ 213 ridx = row % bs; cidx = col % bs; \ 214 low = 0; high = nrow; \ 215 while (high-low > 3) { \ 216 t = (low+high)/2; \ 217 if (rp[t] > bcol) high = t; \ 218 else low = t; \ 219 } \ 220 for (_i=low; _i<high; _i++) { \ 221 if (rp[_i] > bcol) break; \ 222 if (rp[_i] == bcol) { \ 223 bap = ap + bs2*_i + bs*cidx + ridx; \ 224 if (addv == ADD_VALUES) *bap += value; \ 225 else *bap = value; \ 226 goto b_noinsert; \ 227 } \ 228 } \ 229 if (b->nonew == 1) goto b_noinsert; \ 230 else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 231 if (nrow >= rmax) { \ 232 /* there is no extra room in row, therefore enlarge */ \ 233 PetscInt new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 234 MatScalar *new_a; \ 235 \ 236 if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 237 \ 238 /* malloc new storage space */ \ 239 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(PetscInt); \ 240 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 241 new_j = (PetscInt*)(new_a + bs2*new_nz); \ 242 new_i = new_j + new_nz; \ 243 \ 244 /* copy over old data into new slots */ \ 245 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 246 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 247 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \ 248 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 249 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \ 250 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 251 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 252 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 253 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 254 /* free up old matrix storage */ \ 255 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 256 if (!b->singlemalloc) { \ 257 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 258 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 259 } \ 260 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 261 b->singlemalloc = PETSC_TRUE; \ 262 \ 263 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 264 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 265 ierr = PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \ 266 b->maxnz += bs2*CHUNKSIZE; \ 267 b->reallocs++; \ 268 b->nz++; \ 269 } \ 270 N = nrow++ - 1; \ 271 /* shift up all the later entries in this row */ \ 272 for (ii=N; ii>=_i; ii--) { \ 273 rp[ii+1] = rp[ii]; \ 274 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 275 } \ 276 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 277 rp[_i] = bcol; \ 278 ap[bs2*_i + bs*cidx + ridx] = value; \ 279 b_noinsert:; \ 280 bilen[brow] = nrow; \ 281 } 282 283 #if defined(PETSC_USE_MAT_SINGLE) 284 #undef __FUNCT__ 285 #define __FUNCT__ "MatSetValues_MPIBAIJ" 286 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 287 { 288 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 289 PetscErrorCode ierr; 290 PetscInt i,N = m*n; 291 MatScalar *vsingle; 292 293 PetscFunctionBegin; 294 if (N > b->setvalueslen) { 295 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 296 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 297 b->setvalueslen = N; 298 } 299 vsingle = b->setvaluescopy; 300 301 for (i=0; i<N; i++) { 302 vsingle[i] = v[i]; 303 } 304 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 305 PetscFunctionReturn(0); 306 } 307 308 #undef __FUNCT__ 309 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 310 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 311 { 312 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 313 PetscErrorCode ierr; 314 PetscInt i,N = m*n*b->bs2; 315 MatScalar *vsingle; 316 317 PetscFunctionBegin; 318 if (N > b->setvalueslen) { 319 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 320 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 321 b->setvalueslen = N; 322 } 323 vsingle = b->setvaluescopy; 324 for (i=0; i<N; i++) { 325 vsingle[i] = v[i]; 326 } 327 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 328 PetscFunctionReturn(0); 329 } 330 331 #undef __FUNCT__ 332 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 333 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 334 { 335 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 336 PetscErrorCode ierr; 337 PetscInt i,N = m*n; 338 MatScalar *vsingle; 339 340 PetscFunctionBegin; 341 if (N > b->setvalueslen) { 342 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 343 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 344 b->setvalueslen = N; 345 } 346 vsingle = b->setvaluescopy; 347 for (i=0; i<N; i++) { 348 vsingle[i] = v[i]; 349 } 350 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 351 PetscFunctionReturn(0); 352 } 353 354 #undef __FUNCT__ 355 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 356 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 357 { 358 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 359 PetscErrorCode ierr; 360 PetscInt i,N = m*n*b->bs2; 361 MatScalar *vsingle; 362 363 PetscFunctionBegin; 364 if (N > b->setvalueslen) { 365 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 366 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 367 b->setvalueslen = N; 368 } 369 vsingle = b->setvaluescopy; 370 for (i=0; i<N; i++) { 371 vsingle[i] = v[i]; 372 } 373 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 374 PetscFunctionReturn(0); 375 } 376 #endif 377 378 #undef __FUNCT__ 379 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 380 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 381 { 382 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 383 MatScalar value; 384 PetscTruth roworiented = baij->roworiented; 385 PetscErrorCode ierr; 386 PetscInt i,j,row,col; 387 PetscInt rstart_orig=baij->rstart_bs; 388 PetscInt rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 389 PetscInt cend_orig=baij->cend_bs,bs=mat->bs; 390 391 /* Some Variables required in the macro */ 392 Mat A = baij->A; 393 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 394 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 395 MatScalar *aa=a->a; 396 397 Mat B = baij->B; 398 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 399 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 400 MatScalar *ba=b->a; 401 402 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 403 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 404 MatScalar *ap,*bap; 405 406 PetscFunctionBegin; 407 for (i=0; i<m; i++) { 408 if (im[i] < 0) continue; 409 #if defined(PETSC_USE_DEBUG) 410 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 411 #endif 412 if (im[i] >= rstart_orig && im[i] < rend_orig) { 413 row = im[i] - rstart_orig; 414 for (j=0; j<n; j++) { 415 if (in[j] >= cstart_orig && in[j] < cend_orig){ 416 col = in[j] - cstart_orig; 417 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 418 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 419 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 420 } else if (in[j] < 0) continue; 421 #if defined(PETSC_USE_DEBUG) 422 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->N-1);} 423 #endif 424 else { 425 if (mat->was_assembled) { 426 if (!baij->colmap) { 427 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 428 } 429 #if defined (PETSC_USE_CTABLE) 430 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 431 col = col - 1; 432 #else 433 col = baij->colmap[in[j]/bs] - 1; 434 #endif 435 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 436 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 437 col = in[j]; 438 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 439 B = baij->B; 440 b = (Mat_SeqBAIJ*)(B)->data; 441 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 442 ba=b->a; 443 } else col += in[j]%bs; 444 } else col = in[j]; 445 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 446 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 447 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 448 } 449 } 450 } else { 451 if (!baij->donotstash) { 452 if (roworiented) { 453 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 454 } else { 455 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 456 } 457 } 458 } 459 } 460 PetscFunctionReturn(0); 461 } 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 465 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 466 { 467 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 468 const MatScalar *value; 469 MatScalar *barray=baij->barray; 470 PetscTruth roworiented = baij->roworiented; 471 PetscErrorCode ierr; 472 PetscInt i,j,ii,jj,row,col,rstart=baij->rstart; 473 PetscInt rend=baij->rend,cstart=baij->cstart,stepval; 474 PetscInt cend=baij->cend,bs=mat->bs,bs2=baij->bs2; 475 476 PetscFunctionBegin; 477 if(!barray) { 478 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 479 baij->barray = barray; 480 } 481 482 if (roworiented) { 483 stepval = (n-1)*bs; 484 } else { 485 stepval = (m-1)*bs; 486 } 487 for (i=0; i<m; i++) { 488 if (im[i] < 0) continue; 489 #if defined(PETSC_USE_DEBUG) 490 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 491 #endif 492 if (im[i] >= rstart && im[i] < rend) { 493 row = im[i] - rstart; 494 for (j=0; j<n; j++) { 495 /* If NumCol = 1 then a copy is not required */ 496 if ((roworiented) && (n == 1)) { 497 barray = (MatScalar*)v + i*bs2; 498 } else if((!roworiented) && (m == 1)) { 499 barray = (MatScalar*)v + j*bs2; 500 } else { /* Here a copy is required */ 501 if (roworiented) { 502 value = v + i*(stepval+bs)*bs + j*bs; 503 } else { 504 value = v + j*(stepval+bs)*bs + i*bs; 505 } 506 for (ii=0; ii<bs; ii++,value+=stepval) { 507 for (jj=0; jj<bs; jj++) { 508 *barray++ = *value++; 509 } 510 } 511 barray -=bs2; 512 } 513 514 if (in[j] >= cstart && in[j] < cend){ 515 col = in[j] - cstart; 516 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 517 } 518 else if (in[j] < 0) continue; 519 #if defined(PETSC_USE_DEBUG) 520 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 521 #endif 522 else { 523 if (mat->was_assembled) { 524 if (!baij->colmap) { 525 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 526 } 527 528 #if defined(PETSC_USE_DEBUG) 529 #if defined (PETSC_USE_CTABLE) 530 { PetscInt data; 531 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 532 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 533 } 534 #else 535 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 536 #endif 537 #endif 538 #if defined (PETSC_USE_CTABLE) 539 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 540 col = (col - 1)/bs; 541 #else 542 col = (baij->colmap[in[j]] - 1)/bs; 543 #endif 544 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 545 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 546 col = in[j]; 547 } 548 } 549 else col = in[j]; 550 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 551 } 552 } 553 } else { 554 if (!baij->donotstash) { 555 if (roworiented) { 556 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 557 } else { 558 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 559 } 560 } 561 } 562 } 563 PetscFunctionReturn(0); 564 } 565 566 #define HASH_KEY 0.6180339887 567 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 568 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 569 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 570 #undef __FUNCT__ 571 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 572 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 573 { 574 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 575 PetscTruth roworiented = baij->roworiented; 576 PetscErrorCode ierr; 577 PetscInt i,j,row,col; 578 PetscInt rstart_orig=baij->rstart_bs; 579 PetscInt rend_orig=baij->rend_bs,Nbs=baij->Nbs; 580 PetscInt h1,key,size=baij->ht_size,bs=mat->bs,*HT=baij->ht,idx; 581 PetscReal tmp; 582 MatScalar **HD = baij->hd,value; 583 #if defined(PETSC_USE_DEBUG) 584 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 585 #endif 586 587 PetscFunctionBegin; 588 589 for (i=0; i<m; i++) { 590 #if defined(PETSC_USE_DEBUG) 591 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 592 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 593 #endif 594 row = im[i]; 595 if (row >= rstart_orig && row < rend_orig) { 596 for (j=0; j<n; j++) { 597 col = in[j]; 598 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 599 /* Look up PetscInto the Hash Table */ 600 key = (row/bs)*Nbs+(col/bs)+1; 601 h1 = HASH(size,key,tmp); 602 603 604 idx = h1; 605 #if defined(PETSC_USE_DEBUG) 606 insert_ct++; 607 total_ct++; 608 if (HT[idx] != key) { 609 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 610 if (idx == size) { 611 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 612 if (idx == h1) { 613 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 614 } 615 } 616 } 617 #else 618 if (HT[idx] != key) { 619 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 620 if (idx == size) { 621 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 622 if (idx == h1) { 623 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 624 } 625 } 626 } 627 #endif 628 /* A HASH table entry is found, so insert the values at the correct address */ 629 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 630 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 631 } 632 } else { 633 if (!baij->donotstash) { 634 if (roworiented) { 635 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 636 } else { 637 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 638 } 639 } 640 } 641 } 642 #if defined(PETSC_USE_DEBUG) 643 baij->ht_total_ct = total_ct; 644 baij->ht_insert_ct = insert_ct; 645 #endif 646 PetscFunctionReturn(0); 647 } 648 649 #undef __FUNCT__ 650 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 651 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 652 { 653 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 654 PetscTruth roworiented = baij->roworiented; 655 PetscErrorCode ierr; 656 PetscInt i,j,ii,jj,row,col; 657 PetscInt rstart=baij->rstart ; 658 PetscInt rend=baij->rend,stepval,bs=mat->bs,bs2=baij->bs2; 659 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 660 PetscReal tmp; 661 MatScalar **HD = baij->hd,*baij_a; 662 const MatScalar *v_t,*value; 663 #if defined(PETSC_USE_DEBUG) 664 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 665 #endif 666 667 PetscFunctionBegin; 668 669 if (roworiented) { 670 stepval = (n-1)*bs; 671 } else { 672 stepval = (m-1)*bs; 673 } 674 for (i=0; i<m; i++) { 675 #if defined(PETSC_USE_DEBUG) 676 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 677 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 678 #endif 679 row = im[i]; 680 v_t = v + i*bs2; 681 if (row >= rstart && row < rend) { 682 for (j=0; j<n; j++) { 683 col = in[j]; 684 685 /* Look up into the Hash Table */ 686 key = row*Nbs+col+1; 687 h1 = HASH(size,key,tmp); 688 689 idx = h1; 690 #if defined(PETSC_USE_DEBUG) 691 total_ct++; 692 insert_ct++; 693 if (HT[idx] != key) { 694 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 695 if (idx == size) { 696 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 697 if (idx == h1) { 698 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 699 } 700 } 701 } 702 #else 703 if (HT[idx] != key) { 704 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 705 if (idx == size) { 706 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 707 if (idx == h1) { 708 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 709 } 710 } 711 } 712 #endif 713 baij_a = HD[idx]; 714 if (roworiented) { 715 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 716 /* value = v + (i*(stepval+bs)+j)*bs; */ 717 value = v_t; 718 v_t += bs; 719 if (addv == ADD_VALUES) { 720 for (ii=0; ii<bs; ii++,value+=stepval) { 721 for (jj=ii; jj<bs2; jj+=bs) { 722 baij_a[jj] += *value++; 723 } 724 } 725 } else { 726 for (ii=0; ii<bs; ii++,value+=stepval) { 727 for (jj=ii; jj<bs2; jj+=bs) { 728 baij_a[jj] = *value++; 729 } 730 } 731 } 732 } else { 733 value = v + j*(stepval+bs)*bs + i*bs; 734 if (addv == ADD_VALUES) { 735 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 736 for (jj=0; jj<bs; jj++) { 737 baij_a[jj] += *value++; 738 } 739 } 740 } else { 741 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 742 for (jj=0; jj<bs; jj++) { 743 baij_a[jj] = *value++; 744 } 745 } 746 } 747 } 748 } 749 } else { 750 if (!baij->donotstash) { 751 if (roworiented) { 752 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 753 } else { 754 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 755 } 756 } 757 } 758 } 759 #if defined(PETSC_USE_DEBUG) 760 baij->ht_total_ct = total_ct; 761 baij->ht_insert_ct = insert_ct; 762 #endif 763 PetscFunctionReturn(0); 764 } 765 766 #undef __FUNCT__ 767 #define __FUNCT__ "MatGetValues_MPIBAIJ" 768 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 769 { 770 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 771 PetscErrorCode ierr; 772 PetscInt bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 773 PetscInt bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 774 775 PetscFunctionBegin; 776 for (i=0; i<m; i++) { 777 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 778 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 779 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 780 row = idxm[i] - bsrstart; 781 for (j=0; j<n; j++) { 782 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 783 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 784 if (idxn[j] >= bscstart && idxn[j] < bscend){ 785 col = idxn[j] - bscstart; 786 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 787 } else { 788 if (!baij->colmap) { 789 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 790 } 791 #if defined (PETSC_USE_CTABLE) 792 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 793 data --; 794 #else 795 data = baij->colmap[idxn[j]/bs]-1; 796 #endif 797 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 798 else { 799 col = data + idxn[j]%bs; 800 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 801 } 802 } 803 } 804 } else { 805 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 806 } 807 } 808 PetscFunctionReturn(0); 809 } 810 811 #undef __FUNCT__ 812 #define __FUNCT__ "MatNorm_MPIBAIJ" 813 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 814 { 815 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 816 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 817 PetscErrorCode ierr; 818 PetscInt i,j,bs2=baij->bs2,bs=baij->A->bs,nz,row,col; 819 PetscReal sum = 0.0; 820 MatScalar *v; 821 822 PetscFunctionBegin; 823 if (baij->size == 1) { 824 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 825 } else { 826 if (type == NORM_FROBENIUS) { 827 v = amat->a; 828 nz = amat->nz*bs2; 829 for (i=0; i<nz; i++) { 830 #if defined(PETSC_USE_COMPLEX) 831 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 832 #else 833 sum += (*v)*(*v); v++; 834 #endif 835 } 836 v = bmat->a; 837 nz = bmat->nz*bs2; 838 for (i=0; i<nz; i++) { 839 #if defined(PETSC_USE_COMPLEX) 840 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 841 #else 842 sum += (*v)*(*v); v++; 843 #endif 844 } 845 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 846 *nrm = sqrt(*nrm); 847 } else if (type == NORM_1) { /* max column sum */ 848 PetscReal *tmp,*tmp2; 849 PetscInt *jj,*garray=baij->garray,cstart=baij->cstart; 850 ierr = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 851 tmp2 = tmp + mat->N; 852 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 853 v = amat->a; jj = amat->j; 854 for (i=0; i<amat->nz; i++) { 855 for (j=0; j<bs; j++){ 856 col = bs*(cstart + *jj) + j; /* column index */ 857 for (row=0; row<bs; row++){ 858 tmp[col] += PetscAbsScalar(*v); v++; 859 } 860 } 861 jj++; 862 } 863 v = bmat->a; jj = bmat->j; 864 for (i=0; i<bmat->nz; i++) { 865 for (j=0; j<bs; j++){ 866 col = bs*garray[*jj] + j; 867 for (row=0; row<bs; row++){ 868 tmp[col] += PetscAbsScalar(*v); v++; 869 } 870 } 871 jj++; 872 } 873 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 874 *nrm = 0.0; 875 for (j=0; j<mat->N; j++) { 876 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 877 } 878 ierr = PetscFree(tmp);CHKERRQ(ierr); 879 } else if (type == NORM_INFINITY) { /* max row sum */ 880 PetscReal *sums; 881 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr) 882 sum = 0.0; 883 for (j=0; j<amat->mbs; j++) { 884 for (row=0; row<bs; row++) sums[row] = 0.0; 885 v = amat->a + bs2*amat->i[j]; 886 nz = amat->i[j+1]-amat->i[j]; 887 for (i=0; i<nz; i++) { 888 for (col=0; col<bs; col++){ 889 for (row=0; row<bs; row++){ 890 sums[row] += PetscAbsScalar(*v); v++; 891 } 892 } 893 } 894 v = bmat->a + bs2*bmat->i[j]; 895 nz = bmat->i[j+1]-bmat->i[j]; 896 for (i=0; i<nz; i++) { 897 for (col=0; col<bs; col++){ 898 for (row=0; row<bs; row++){ 899 sums[row] += PetscAbsScalar(*v); v++; 900 } 901 } 902 } 903 for (row=0; row<bs; row++){ 904 if (sums[row] > sum) sum = sums[row]; 905 } 906 } 907 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 908 ierr = PetscFree(sums);CHKERRQ(ierr); 909 } else { 910 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 911 } 912 } 913 PetscFunctionReturn(0); 914 } 915 916 /* 917 Creates the hash table, and sets the table 918 This table is created only once. 919 If new entried need to be added to the matrix 920 then the hash table has to be destroyed and 921 recreated. 922 */ 923 #undef __FUNCT__ 924 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 925 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 926 { 927 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 928 Mat A = baij->A,B=baij->B; 929 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 930 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 931 PetscErrorCode ierr; 932 PetscInt size,bs2=baij->bs2,rstart=baij->rstart; 933 PetscInt cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 934 PetscInt *HT,key; 935 MatScalar **HD; 936 PetscReal tmp; 937 #if defined(PETSC_USE_DEBUG) 938 PetscInt ct=0,max=0; 939 #endif 940 941 PetscFunctionBegin; 942 baij->ht_size=(PetscInt)(factor*nz); 943 size = baij->ht_size; 944 945 if (baij->ht) { 946 PetscFunctionReturn(0); 947 } 948 949 /* Allocate Memory for Hash Table */ 950 ierr = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 951 baij->ht = (PetscInt*)(baij->hd + size); 952 HD = baij->hd; 953 HT = baij->ht; 954 955 956 ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr); 957 958 959 /* Loop Over A */ 960 for (i=0; i<a->mbs; i++) { 961 for (j=ai[i]; j<ai[i+1]; j++) { 962 row = i+rstart; 963 col = aj[j]+cstart; 964 965 key = row*Nbs + col + 1; 966 h1 = HASH(size,key,tmp); 967 for (k=0; k<size; k++){ 968 if (!HT[(h1+k)%size]) { 969 HT[(h1+k)%size] = key; 970 HD[(h1+k)%size] = a->a + j*bs2; 971 break; 972 #if defined(PETSC_USE_DEBUG) 973 } else { 974 ct++; 975 #endif 976 } 977 } 978 #if defined(PETSC_USE_DEBUG) 979 if (k> max) max = k; 980 #endif 981 } 982 } 983 /* Loop Over B */ 984 for (i=0; i<b->mbs; i++) { 985 for (j=bi[i]; j<bi[i+1]; j++) { 986 row = i+rstart; 987 col = garray[bj[j]]; 988 key = row*Nbs + col + 1; 989 h1 = HASH(size,key,tmp); 990 for (k=0; k<size; k++){ 991 if (!HT[(h1+k)%size]) { 992 HT[(h1+k)%size] = key; 993 HD[(h1+k)%size] = b->a + j*bs2; 994 break; 995 #if defined(PETSC_USE_DEBUG) 996 } else { 997 ct++; 998 #endif 999 } 1000 } 1001 #if defined(PETSC_USE_DEBUG) 1002 if (k> max) max = k; 1003 #endif 1004 } 1005 } 1006 1007 /* Print Summary */ 1008 #if defined(PETSC_USE_DEBUG) 1009 for (i=0,j=0; i<size; i++) { 1010 if (HT[i]) {j++;} 1011 } 1012 ierr = PetscLogInfo((0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max));CHKERRQ(ierr); 1013 #endif 1014 PetscFunctionReturn(0); 1015 } 1016 1017 #undef __FUNCT__ 1018 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 1019 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 1020 { 1021 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1022 PetscErrorCode ierr; 1023 PetscInt nstash,reallocs; 1024 InsertMode addv; 1025 1026 PetscFunctionBegin; 1027 if (baij->donotstash) { 1028 PetscFunctionReturn(0); 1029 } 1030 1031 /* make sure all processors are either in INSERTMODE or ADDMODE */ 1032 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 1033 if (addv == (ADD_VALUES|INSERT_VALUES)) { 1034 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 1035 } 1036 mat->insertmode = addv; /* in case this processor had no cache */ 1037 1038 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 1039 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 1040 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 1041 ierr = PetscLogInfo((0,"MatAssemblyBegin_MPIBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 1042 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 1043 ierr = PetscLogInfo((0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 1044 PetscFunctionReturn(0); 1045 } 1046 1047 #undef __FUNCT__ 1048 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 1049 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 1050 { 1051 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 1052 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 1053 PetscErrorCode ierr; 1054 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 1055 PetscInt *row,*col,other_disassembled; 1056 PetscTruth r1,r2,r3; 1057 MatScalar *val; 1058 InsertMode addv = mat->insertmode; 1059 PetscMPIInt n; 1060 1061 PetscFunctionBegin; 1062 if (!baij->donotstash) { 1063 while (1) { 1064 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1065 if (!flg) break; 1066 1067 for (i=0; i<n;) { 1068 /* Now identify the consecutive vals belonging to the same row */ 1069 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1070 if (j < n) ncols = j-i; 1071 else ncols = n-i; 1072 /* Now assemble all these values with a single function call */ 1073 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 1074 i = j; 1075 } 1076 } 1077 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1078 /* Now process the block-stash. Since the values are stashed column-oriented, 1079 set the roworiented flag to column oriented, and after MatSetValues() 1080 restore the original flags */ 1081 r1 = baij->roworiented; 1082 r2 = a->roworiented; 1083 r3 = b->roworiented; 1084 baij->roworiented = PETSC_FALSE; 1085 a->roworiented = PETSC_FALSE; 1086 b->roworiented = PETSC_FALSE; 1087 while (1) { 1088 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1089 if (!flg) break; 1090 1091 for (i=0; i<n;) { 1092 /* Now identify the consecutive vals belonging to the same row */ 1093 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1094 if (j < n) ncols = j-i; 1095 else ncols = n-i; 1096 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1097 i = j; 1098 } 1099 } 1100 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1101 baij->roworiented = r1; 1102 a->roworiented = r2; 1103 b->roworiented = r3; 1104 } 1105 1106 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1107 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1108 1109 /* determine if any processor has disassembled, if so we must 1110 also disassemble ourselfs, in order that we may reassemble. */ 1111 /* 1112 if nonzero structure of submatrix B cannot change then we know that 1113 no processor disassembled thus we can skip this stuff 1114 */ 1115 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1116 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1117 if (mat->was_assembled && !other_disassembled) { 1118 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1119 } 1120 } 1121 1122 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1123 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1124 } 1125 b->compressedrow.use = PETSC_TRUE; 1126 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1127 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1128 1129 #if defined(PETSC_USE_DEBUG) 1130 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1131 ierr = PetscLogInfo((0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct));CHKERRQ(ierr); 1132 baij->ht_total_ct = 0; 1133 baij->ht_insert_ct = 0; 1134 } 1135 #endif 1136 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1137 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1138 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1139 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1140 } 1141 1142 if (baij->rowvalues) { 1143 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1144 baij->rowvalues = 0; 1145 } 1146 PetscFunctionReturn(0); 1147 } 1148 1149 #undef __FUNCT__ 1150 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1151 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1152 { 1153 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1154 PetscErrorCode ierr; 1155 PetscMPIInt size = baij->size,rank = baij->rank; 1156 PetscInt bs = mat->bs; 1157 PetscTruth iascii,isdraw; 1158 PetscViewer sviewer; 1159 PetscViewerFormat format; 1160 1161 PetscFunctionBegin; 1162 /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */ 1163 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1164 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1165 if (iascii) { 1166 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1167 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1168 MatInfo info; 1169 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1170 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1171 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 1172 rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 1173 mat->bs,(PetscInt)info.memory);CHKERRQ(ierr); 1174 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1175 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1176 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1177 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1178 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1179 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1180 PetscFunctionReturn(0); 1181 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1182 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1183 PetscFunctionReturn(0); 1184 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1185 PetscFunctionReturn(0); 1186 } 1187 } 1188 1189 if (isdraw) { 1190 PetscDraw draw; 1191 PetscTruth isnull; 1192 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1193 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1194 } 1195 1196 if (size == 1) { 1197 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 1198 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1199 } else { 1200 /* assemble the entire matrix onto first processor. */ 1201 Mat A; 1202 Mat_SeqBAIJ *Aloc; 1203 PetscInt M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1204 MatScalar *a; 1205 1206 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1207 /* Perhaps this should be the type of mat? */ 1208 if (!rank) { 1209 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 1210 } else { 1211 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 1212 } 1213 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1214 ierr = MatMPIBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1215 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1216 1217 /* copy over the A part */ 1218 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1219 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1220 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1221 1222 for (i=0; i<mbs; i++) { 1223 rvals[0] = bs*(baij->rstart + i); 1224 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1225 for (j=ai[i]; j<ai[i+1]; j++) { 1226 col = (baij->cstart+aj[j])*bs; 1227 for (k=0; k<bs; k++) { 1228 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1229 col++; a += bs; 1230 } 1231 } 1232 } 1233 /* copy over the B part */ 1234 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1235 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1236 for (i=0; i<mbs; i++) { 1237 rvals[0] = bs*(baij->rstart + i); 1238 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1239 for (j=ai[i]; j<ai[i+1]; j++) { 1240 col = baij->garray[aj[j]]*bs; 1241 for (k=0; k<bs; k++) { 1242 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1243 col++; a += bs; 1244 } 1245 } 1246 } 1247 ierr = PetscFree(rvals);CHKERRQ(ierr); 1248 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1249 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1250 /* 1251 Everyone has to call to draw the matrix since the graphics waits are 1252 synchronized across all processors that share the PetscDraw object 1253 */ 1254 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1255 if (!rank) { 1256 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1257 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1258 } 1259 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1260 ierr = MatDestroy(A);CHKERRQ(ierr); 1261 } 1262 PetscFunctionReturn(0); 1263 } 1264 1265 #undef __FUNCT__ 1266 #define __FUNCT__ "MatView_MPIBAIJ" 1267 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1268 { 1269 PetscErrorCode ierr; 1270 PetscTruth iascii,isdraw,issocket,isbinary; 1271 1272 PetscFunctionBegin; 1273 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1274 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1275 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1276 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1277 if (iascii || isdraw || issocket || isbinary) { 1278 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1279 } else { 1280 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1281 } 1282 PetscFunctionReturn(0); 1283 } 1284 1285 #undef __FUNCT__ 1286 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1287 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1288 { 1289 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1290 PetscErrorCode ierr; 1291 1292 PetscFunctionBegin; 1293 #if defined(PETSC_USE_LOG) 1294 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N); 1295 #endif 1296 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1297 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1298 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1299 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1300 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1301 #if defined (PETSC_USE_CTABLE) 1302 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1303 #else 1304 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1305 #endif 1306 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1307 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1308 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1309 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1310 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1311 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1312 #if defined(PETSC_USE_MAT_SINGLE) 1313 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 1314 #endif 1315 ierr = PetscFree(baij);CHKERRQ(ierr); 1316 1317 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1318 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1319 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1320 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1321 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1322 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1323 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1324 PetscFunctionReturn(0); 1325 } 1326 1327 #undef __FUNCT__ 1328 #define __FUNCT__ "MatMult_MPIBAIJ" 1329 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1330 { 1331 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1332 PetscErrorCode ierr; 1333 PetscInt nt; 1334 1335 PetscFunctionBegin; 1336 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1337 if (nt != A->n) { 1338 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1339 } 1340 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1341 if (nt != A->m) { 1342 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1343 } 1344 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1345 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1346 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1347 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1348 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1349 PetscFunctionReturn(0); 1350 } 1351 1352 #undef __FUNCT__ 1353 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1354 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1355 { 1356 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1357 PetscErrorCode ierr; 1358 1359 PetscFunctionBegin; 1360 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1361 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1362 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1363 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1364 PetscFunctionReturn(0); 1365 } 1366 1367 #undef __FUNCT__ 1368 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1369 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1370 { 1371 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1372 PetscErrorCode ierr; 1373 PetscTruth merged; 1374 1375 PetscFunctionBegin; 1376 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1377 /* do nondiagonal part */ 1378 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1379 if (!merged) { 1380 /* send it on its way */ 1381 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1382 /* do local part */ 1383 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1384 /* receive remote parts: note this assumes the values are not actually */ 1385 /* inserted in yy until the next line */ 1386 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1387 } else { 1388 /* do local part */ 1389 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1390 /* send it on its way */ 1391 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1392 /* values actually were received in the Begin() but we need to call this nop */ 1393 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1394 } 1395 PetscFunctionReturn(0); 1396 } 1397 1398 #undef __FUNCT__ 1399 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1400 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1401 { 1402 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1403 PetscErrorCode ierr; 1404 1405 PetscFunctionBegin; 1406 /* do nondiagonal part */ 1407 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1408 /* send it on its way */ 1409 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1410 /* do local part */ 1411 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1412 /* receive remote parts: note this assumes the values are not actually */ 1413 /* inserted in yy until the next line, which is true for my implementation*/ 1414 /* but is not perhaps always true. */ 1415 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1416 PetscFunctionReturn(0); 1417 } 1418 1419 /* 1420 This only works correctly for square matrices where the subblock A->A is the 1421 diagonal block 1422 */ 1423 #undef __FUNCT__ 1424 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1425 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1426 { 1427 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1428 PetscErrorCode ierr; 1429 1430 PetscFunctionBegin; 1431 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1432 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNCT__ 1437 #define __FUNCT__ "MatScale_MPIBAIJ" 1438 PetscErrorCode MatScale_MPIBAIJ(const PetscScalar *aa,Mat A) 1439 { 1440 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1441 PetscErrorCode ierr; 1442 1443 PetscFunctionBegin; 1444 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1445 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1446 PetscFunctionReturn(0); 1447 } 1448 1449 #undef __FUNCT__ 1450 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1451 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1452 { 1453 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1454 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1455 PetscErrorCode ierr; 1456 PetscInt bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1457 PetscInt nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1458 PetscInt *cmap,*idx_p,cstart = mat->cstart; 1459 1460 PetscFunctionBegin; 1461 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1462 mat->getrowactive = PETSC_TRUE; 1463 1464 if (!mat->rowvalues && (idx || v)) { 1465 /* 1466 allocate enough space to hold information from the longest row. 1467 */ 1468 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1469 PetscInt max = 1,mbs = mat->mbs,tmp; 1470 for (i=0; i<mbs; i++) { 1471 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1472 if (max < tmp) { max = tmp; } 1473 } 1474 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1475 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1476 } 1477 1478 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1479 lrow = row - brstart; 1480 1481 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1482 if (!v) {pvA = 0; pvB = 0;} 1483 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1484 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1485 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1486 nztot = nzA + nzB; 1487 1488 cmap = mat->garray; 1489 if (v || idx) { 1490 if (nztot) { 1491 /* Sort by increasing column numbers, assuming A and B already sorted */ 1492 PetscInt imark = -1; 1493 if (v) { 1494 *v = v_p = mat->rowvalues; 1495 for (i=0; i<nzB; i++) { 1496 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1497 else break; 1498 } 1499 imark = i; 1500 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1501 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1502 } 1503 if (idx) { 1504 *idx = idx_p = mat->rowindices; 1505 if (imark > -1) { 1506 for (i=0; i<imark; i++) { 1507 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1508 } 1509 } else { 1510 for (i=0; i<nzB; i++) { 1511 if (cmap[cworkB[i]/bs] < cstart) 1512 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1513 else break; 1514 } 1515 imark = i; 1516 } 1517 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1518 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1519 } 1520 } else { 1521 if (idx) *idx = 0; 1522 if (v) *v = 0; 1523 } 1524 } 1525 *nz = nztot; 1526 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1527 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1528 PetscFunctionReturn(0); 1529 } 1530 1531 #undef __FUNCT__ 1532 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1533 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1534 { 1535 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1536 1537 PetscFunctionBegin; 1538 if (!baij->getrowactive) { 1539 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1540 } 1541 baij->getrowactive = PETSC_FALSE; 1542 PetscFunctionReturn(0); 1543 } 1544 1545 #undef __FUNCT__ 1546 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1547 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1548 { 1549 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1550 PetscErrorCode ierr; 1551 1552 PetscFunctionBegin; 1553 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1554 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1555 PetscFunctionReturn(0); 1556 } 1557 1558 #undef __FUNCT__ 1559 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1560 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1561 { 1562 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1563 Mat A = a->A,B = a->B; 1564 PetscErrorCode ierr; 1565 PetscReal isend[5],irecv[5]; 1566 1567 PetscFunctionBegin; 1568 info->block_size = (PetscReal)matin->bs; 1569 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1570 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1571 isend[3] = info->memory; isend[4] = info->mallocs; 1572 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1573 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1574 isend[3] += info->memory; isend[4] += info->mallocs; 1575 if (flag == MAT_LOCAL) { 1576 info->nz_used = isend[0]; 1577 info->nz_allocated = isend[1]; 1578 info->nz_unneeded = isend[2]; 1579 info->memory = isend[3]; 1580 info->mallocs = isend[4]; 1581 } else if (flag == MAT_GLOBAL_MAX) { 1582 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1583 info->nz_used = irecv[0]; 1584 info->nz_allocated = irecv[1]; 1585 info->nz_unneeded = irecv[2]; 1586 info->memory = irecv[3]; 1587 info->mallocs = irecv[4]; 1588 } else if (flag == MAT_GLOBAL_SUM) { 1589 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1590 info->nz_used = irecv[0]; 1591 info->nz_allocated = irecv[1]; 1592 info->nz_unneeded = irecv[2]; 1593 info->memory = irecv[3]; 1594 info->mallocs = irecv[4]; 1595 } else { 1596 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1597 } 1598 info->rows_global = (PetscReal)A->M; 1599 info->columns_global = (PetscReal)A->N; 1600 info->rows_local = (PetscReal)A->m; 1601 info->columns_local = (PetscReal)A->N; 1602 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1603 info->fill_ratio_needed = 0; 1604 info->factor_mallocs = 0; 1605 PetscFunctionReturn(0); 1606 } 1607 1608 #undef __FUNCT__ 1609 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1610 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op) 1611 { 1612 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1613 PetscErrorCode ierr; 1614 1615 PetscFunctionBegin; 1616 switch (op) { 1617 case MAT_NO_NEW_NONZERO_LOCATIONS: 1618 case MAT_YES_NEW_NONZERO_LOCATIONS: 1619 case MAT_COLUMNS_UNSORTED: 1620 case MAT_COLUMNS_SORTED: 1621 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1622 case MAT_KEEP_ZEROED_ROWS: 1623 case MAT_NEW_NONZERO_LOCATION_ERR: 1624 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1625 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1626 break; 1627 case MAT_ROW_ORIENTED: 1628 a->roworiented = PETSC_TRUE; 1629 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1630 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1631 break; 1632 case MAT_ROWS_SORTED: 1633 case MAT_ROWS_UNSORTED: 1634 case MAT_YES_NEW_DIAGONALS: 1635 ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr); 1636 break; 1637 case MAT_COLUMN_ORIENTED: 1638 a->roworiented = PETSC_FALSE; 1639 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1640 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1641 break; 1642 case MAT_IGNORE_OFF_PROC_ENTRIES: 1643 a->donotstash = PETSC_TRUE; 1644 break; 1645 case MAT_NO_NEW_DIAGONALS: 1646 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1647 case MAT_USE_HASH_TABLE: 1648 a->ht_flag = PETSC_TRUE; 1649 break; 1650 case MAT_SYMMETRIC: 1651 case MAT_STRUCTURALLY_SYMMETRIC: 1652 case MAT_HERMITIAN: 1653 case MAT_SYMMETRY_ETERNAL: 1654 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1655 break; 1656 case MAT_NOT_SYMMETRIC: 1657 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1658 case MAT_NOT_HERMITIAN: 1659 case MAT_NOT_SYMMETRY_ETERNAL: 1660 break; 1661 default: 1662 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1663 } 1664 PetscFunctionReturn(0); 1665 } 1666 1667 #undef __FUNCT__ 1668 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1669 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1670 { 1671 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1672 Mat_SeqBAIJ *Aloc; 1673 Mat B; 1674 PetscErrorCode ierr; 1675 PetscInt M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col; 1676 PetscInt bs=A->bs,mbs=baij->mbs; 1677 MatScalar *a; 1678 1679 PetscFunctionBegin; 1680 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1681 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1682 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1683 ierr = MatMPIBAIJSetPreallocation(B,A->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1684 1685 /* copy over the A part */ 1686 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1687 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1688 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1689 1690 for (i=0; i<mbs; i++) { 1691 rvals[0] = bs*(baij->rstart + i); 1692 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1693 for (j=ai[i]; j<ai[i+1]; j++) { 1694 col = (baij->cstart+aj[j])*bs; 1695 for (k=0; k<bs; k++) { 1696 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1697 col++; a += bs; 1698 } 1699 } 1700 } 1701 /* copy over the B part */ 1702 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1703 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1704 for (i=0; i<mbs; i++) { 1705 rvals[0] = bs*(baij->rstart + i); 1706 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1707 for (j=ai[i]; j<ai[i+1]; j++) { 1708 col = baij->garray[aj[j]]*bs; 1709 for (k=0; k<bs; k++) { 1710 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1711 col++; a += bs; 1712 } 1713 } 1714 } 1715 ierr = PetscFree(rvals);CHKERRQ(ierr); 1716 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1717 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1718 1719 if (matout) { 1720 *matout = B; 1721 } else { 1722 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1723 } 1724 PetscFunctionReturn(0); 1725 } 1726 1727 #undef __FUNCT__ 1728 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1729 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1730 { 1731 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1732 Mat a = baij->A,b = baij->B; 1733 PetscErrorCode ierr; 1734 PetscInt s1,s2,s3; 1735 1736 PetscFunctionBegin; 1737 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1738 if (rr) { 1739 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1740 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1741 /* Overlap communication with computation. */ 1742 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1743 } 1744 if (ll) { 1745 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1746 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1747 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1748 } 1749 /* scale the diagonal block */ 1750 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1751 1752 if (rr) { 1753 /* Do a scatter end and then right scale the off-diagonal block */ 1754 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1755 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1756 } 1757 1758 PetscFunctionReturn(0); 1759 } 1760 1761 #undef __FUNCT__ 1762 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1763 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag) 1764 { 1765 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1766 PetscErrorCode ierr; 1767 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1768 PetscInt i,N,*rows,*owners = l->rowners; 1769 PetscInt *nprocs,j,idx,nsends,row; 1770 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1771 PetscInt *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1; 1772 PetscInt *lens,*lrows,*values,bs=A->bs,rstart_bs=l->rstart_bs; 1773 MPI_Comm comm = A->comm; 1774 MPI_Request *send_waits,*recv_waits; 1775 MPI_Status recv_status,*send_status; 1776 IS istmp; 1777 #if defined(PETSC_DEBUG) 1778 PetscTruth found = PETSC_FALSE; 1779 #endif 1780 1781 PetscFunctionBegin; 1782 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 1783 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1784 1785 /* first count number of contributors to each processor */ 1786 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1787 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1788 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1789 j = 0; 1790 for (i=0; i<N; i++) { 1791 if (lastidx > (idx = rows[i])) j = 0; 1792 lastidx = idx; 1793 for (; j<size; j++) { 1794 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1795 nprocs[2*j]++; 1796 nprocs[2*j+1] = 1; 1797 owner[i] = j; 1798 #if defined(PETSC_DEBUG) 1799 found = PETSC_TRUE; 1800 #endif 1801 break; 1802 } 1803 } 1804 #if defined(PETSC_DEBUG) 1805 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1806 found = PETSC_FALSE; 1807 #endif 1808 } 1809 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1810 1811 /* inform other processors of number of messages and max length*/ 1812 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1813 1814 /* post receives: */ 1815 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1816 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1817 for (i=0; i<nrecvs; i++) { 1818 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1819 } 1820 1821 /* do sends: 1822 1) starts[i] gives the starting index in svalues for stuff going to 1823 the ith processor 1824 */ 1825 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1826 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1827 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1828 starts[0] = 0; 1829 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1830 for (i=0; i<N; i++) { 1831 svalues[starts[owner[i]]++] = rows[i]; 1832 } 1833 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1834 1835 starts[0] = 0; 1836 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1837 count = 0; 1838 for (i=0; i<size; i++) { 1839 if (nprocs[2*i+1]) { 1840 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1841 } 1842 } 1843 ierr = PetscFree(starts);CHKERRQ(ierr); 1844 1845 base = owners[rank]*bs; 1846 1847 /* wait on receives */ 1848 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 1849 source = lens + nrecvs; 1850 count = nrecvs; slen = 0; 1851 while (count) { 1852 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1853 /* unpack receives into our local space */ 1854 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1855 source[imdex] = recv_status.MPI_SOURCE; 1856 lens[imdex] = n; 1857 slen += n; 1858 count--; 1859 } 1860 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1861 1862 /* move the data into the send scatter */ 1863 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1864 count = 0; 1865 for (i=0; i<nrecvs; i++) { 1866 values = rvalues + i*nmax; 1867 for (j=0; j<lens[i]; j++) { 1868 lrows[count++] = values[j] - base; 1869 } 1870 } 1871 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1872 ierr = PetscFree(lens);CHKERRQ(ierr); 1873 ierr = PetscFree(owner);CHKERRQ(ierr); 1874 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1875 1876 /* actually zap the local rows */ 1877 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1878 ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr); 1879 1880 /* 1881 Zero the required rows. If the "diagonal block" of the matrix 1882 is square and the user wishes to set the diagonal we use seperate 1883 code so that MatSetValues() is not called for each diagonal allocating 1884 new memory, thus calling lots of mallocs and slowing things down. 1885 1886 Contributed by: Mathew Knepley 1887 */ 1888 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1889 ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1890 if (diag && (l->A->M == l->A->N)) { 1891 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1892 } else if (diag) { 1893 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1894 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1895 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1896 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1897 } 1898 for (i=0; i<slen; i++) { 1899 row = lrows[i] + rstart_bs; 1900 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1901 } 1902 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1903 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1904 } else { 1905 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1906 } 1907 1908 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1909 ierr = PetscFree(lrows);CHKERRQ(ierr); 1910 1911 /* wait on sends */ 1912 if (nsends) { 1913 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1914 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1915 ierr = PetscFree(send_status);CHKERRQ(ierr); 1916 } 1917 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1918 ierr = PetscFree(svalues);CHKERRQ(ierr); 1919 1920 PetscFunctionReturn(0); 1921 } 1922 1923 #undef __FUNCT__ 1924 #define __FUNCT__ "MatPrintHelp_MPIBAIJ" 1925 PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A) 1926 { 1927 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1928 MPI_Comm comm = A->comm; 1929 static PetscTruth called = PETSC_FALSE; 1930 PetscErrorCode ierr; 1931 1932 PetscFunctionBegin; 1933 if (!a->rank) { 1934 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1935 } 1936 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1937 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1938 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1939 PetscFunctionReturn(0); 1940 } 1941 1942 #undef __FUNCT__ 1943 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1944 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1945 { 1946 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1947 PetscErrorCode ierr; 1948 1949 PetscFunctionBegin; 1950 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1951 PetscFunctionReturn(0); 1952 } 1953 1954 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1955 1956 #undef __FUNCT__ 1957 #define __FUNCT__ "MatEqual_MPIBAIJ" 1958 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1959 { 1960 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1961 Mat a,b,c,d; 1962 PetscTruth flg; 1963 PetscErrorCode ierr; 1964 1965 PetscFunctionBegin; 1966 a = matA->A; b = matA->B; 1967 c = matB->A; d = matB->B; 1968 1969 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1970 if (flg) { 1971 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1972 } 1973 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1974 PetscFunctionReturn(0); 1975 } 1976 1977 1978 #undef __FUNCT__ 1979 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1980 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1981 { 1982 PetscErrorCode ierr; 1983 1984 PetscFunctionBegin; 1985 ierr = MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1986 PetscFunctionReturn(0); 1987 } 1988 1989 /* -------------------------------------------------------------------*/ 1990 static struct _MatOps MatOps_Values = { 1991 MatSetValues_MPIBAIJ, 1992 MatGetRow_MPIBAIJ, 1993 MatRestoreRow_MPIBAIJ, 1994 MatMult_MPIBAIJ, 1995 /* 4*/ MatMultAdd_MPIBAIJ, 1996 MatMultTranspose_MPIBAIJ, 1997 MatMultTransposeAdd_MPIBAIJ, 1998 0, 1999 0, 2000 0, 2001 /*10*/ 0, 2002 0, 2003 0, 2004 0, 2005 MatTranspose_MPIBAIJ, 2006 /*15*/ MatGetInfo_MPIBAIJ, 2007 MatEqual_MPIBAIJ, 2008 MatGetDiagonal_MPIBAIJ, 2009 MatDiagonalScale_MPIBAIJ, 2010 MatNorm_MPIBAIJ, 2011 /*20*/ MatAssemblyBegin_MPIBAIJ, 2012 MatAssemblyEnd_MPIBAIJ, 2013 0, 2014 MatSetOption_MPIBAIJ, 2015 MatZeroEntries_MPIBAIJ, 2016 /*25*/ MatZeroRows_MPIBAIJ, 2017 0, 2018 0, 2019 0, 2020 0, 2021 /*30*/ MatSetUpPreallocation_MPIBAIJ, 2022 0, 2023 0, 2024 0, 2025 0, 2026 /*35*/ MatDuplicate_MPIBAIJ, 2027 0, 2028 0, 2029 0, 2030 0, 2031 /*40*/ 0, 2032 MatGetSubMatrices_MPIBAIJ, 2033 MatIncreaseOverlap_MPIBAIJ, 2034 MatGetValues_MPIBAIJ, 2035 0, 2036 /*45*/ MatPrintHelp_MPIBAIJ, 2037 MatScale_MPIBAIJ, 2038 0, 2039 0, 2040 0, 2041 /*50*/ 0, 2042 0, 2043 0, 2044 0, 2045 0, 2046 /*55*/ 0, 2047 0, 2048 MatSetUnfactored_MPIBAIJ, 2049 0, 2050 MatSetValuesBlocked_MPIBAIJ, 2051 /*60*/ 0, 2052 MatDestroy_MPIBAIJ, 2053 MatView_MPIBAIJ, 2054 MatGetPetscMaps_Petsc, 2055 0, 2056 /*65*/ 0, 2057 0, 2058 0, 2059 0, 2060 0, 2061 /*70*/ MatGetRowMax_MPIBAIJ, 2062 0, 2063 0, 2064 0, 2065 0, 2066 /*75*/ 0, 2067 0, 2068 0, 2069 0, 2070 0, 2071 /*80*/ 0, 2072 0, 2073 0, 2074 0, 2075 MatLoad_MPIBAIJ, 2076 /*85*/ 0, 2077 0, 2078 0, 2079 0, 2080 0, 2081 /*90*/ 0, 2082 0, 2083 0, 2084 0, 2085 0, 2086 /*95*/ 0, 2087 0, 2088 0, 2089 0}; 2090 2091 2092 EXTERN_C_BEGIN 2093 #undef __FUNCT__ 2094 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2095 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2096 { 2097 PetscFunctionBegin; 2098 *a = ((Mat_MPIBAIJ *)A->data)->A; 2099 *iscopy = PETSC_FALSE; 2100 PetscFunctionReturn(0); 2101 } 2102 EXTERN_C_END 2103 2104 EXTERN_C_BEGIN 2105 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 2106 EXTERN_C_END 2107 2108 #undef __FUNCT__ 2109 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2110 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2111 { 2112 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2113 PetscInt m = B->m/bs,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2114 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2115 const PetscInt *JJ; 2116 PetscScalar *values; 2117 PetscErrorCode ierr; 2118 2119 PetscFunctionBegin; 2120 #if defined(PETSC_OPT_g) 2121 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2122 #endif 2123 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2124 o_nnz = d_nnz + m; 2125 2126 for (i=0; i<m; i++) { 2127 nnz = I[i+1]- I[i]; 2128 JJ = J + I[i]; 2129 nnz_max = PetscMax(nnz_max,nnz); 2130 #if defined(PETSC_OPT_g) 2131 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2132 #endif 2133 for (j=0; j<nnz; j++) { 2134 if (*JJ >= cstart) break; 2135 JJ++; 2136 } 2137 d = 0; 2138 for (; j<nnz; j++) { 2139 if (*JJ++ >= cend) break; 2140 d++; 2141 } 2142 d_nnz[i] = d; 2143 o_nnz[i] = nnz - d; 2144 } 2145 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2146 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2147 2148 if (v) values = (PetscScalar*)v; 2149 else { 2150 ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2151 ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2152 } 2153 2154 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2155 for (i=0; i<m; i++) { 2156 ii = i + rstart; 2157 nnz = I[i+1]- I[i]; 2158 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr); 2159 } 2160 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2161 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2162 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2163 2164 if (!v) { 2165 ierr = PetscFree(values);CHKERRQ(ierr); 2166 } 2167 PetscFunctionReturn(0); 2168 } 2169 2170 #undef __FUNCT__ 2171 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2172 /*@C 2173 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2174 (the default parallel PETSc format). 2175 2176 Collective on MPI_Comm 2177 2178 Input Parameters: 2179 + A - the matrix 2180 . i - the indices into j for the start of each local row (starts with zero) 2181 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2182 - v - optional values in the matrix 2183 2184 Level: developer 2185 2186 .keywords: matrix, aij, compressed row, sparse, parallel 2187 2188 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2189 @*/ 2190 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2191 { 2192 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2193 2194 PetscFunctionBegin; 2195 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2196 if (f) { 2197 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2198 } 2199 PetscFunctionReturn(0); 2200 } 2201 2202 EXTERN_C_BEGIN 2203 #undef __FUNCT__ 2204 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2205 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2206 { 2207 Mat_MPIBAIJ *b; 2208 PetscErrorCode ierr; 2209 PetscInt i; 2210 2211 PetscFunctionBegin; 2212 B->preallocated = PETSC_TRUE; 2213 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2214 2215 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2216 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2217 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2218 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2219 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2220 if (d_nnz) { 2221 for (i=0; i<B->m/bs; i++) { 2222 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2223 } 2224 } 2225 if (o_nnz) { 2226 for (i=0; i<B->m/bs; i++) { 2227 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2228 } 2229 } 2230 2231 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 2232 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 2233 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 2234 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 2235 2236 b = (Mat_MPIBAIJ*)B->data; 2237 B->bs = bs; 2238 b->bs2 = bs*bs; 2239 b->mbs = B->m/bs; 2240 b->nbs = B->n/bs; 2241 b->Mbs = B->M/bs; 2242 b->Nbs = B->N/bs; 2243 2244 ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 2245 b->rowners[0] = 0; 2246 for (i=2; i<=b->size; i++) { 2247 b->rowners[i] += b->rowners[i-1]; 2248 } 2249 b->rstart = b->rowners[b->rank]; 2250 b->rend = b->rowners[b->rank+1]; 2251 2252 ierr = MPI_Allgather(&b->nbs,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 2253 b->cowners[0] = 0; 2254 for (i=2; i<=b->size; i++) { 2255 b->cowners[i] += b->cowners[i-1]; 2256 } 2257 b->cstart = b->cowners[b->rank]; 2258 b->cend = b->cowners[b->rank+1]; 2259 2260 for (i=0; i<=b->size; i++) { 2261 b->rowners_bs[i] = b->rowners[i]*bs; 2262 } 2263 b->rstart_bs = b->rstart*bs; 2264 b->rend_bs = b->rend*bs; 2265 b->cstart_bs = b->cstart*bs; 2266 b->cend_bs = b->cend*bs; 2267 2268 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 2269 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2270 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2271 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2272 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 2273 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2274 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2275 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2276 2277 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2278 2279 PetscFunctionReturn(0); 2280 } 2281 EXTERN_C_END 2282 2283 EXTERN_C_BEGIN 2284 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2285 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2286 EXTERN_C_END 2287 2288 /*MC 2289 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2290 2291 Options Database Keys: 2292 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2293 2294 Level: beginner 2295 2296 .seealso: MatCreateMPIBAIJ 2297 M*/ 2298 2299 EXTERN_C_BEGIN 2300 #undef __FUNCT__ 2301 #define __FUNCT__ "MatCreate_MPIBAIJ" 2302 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2303 { 2304 Mat_MPIBAIJ *b; 2305 PetscErrorCode ierr; 2306 PetscTruth flg; 2307 2308 PetscFunctionBegin; 2309 ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2310 B->data = (void*)b; 2311 2312 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2313 B->mapping = 0; 2314 B->factor = 0; 2315 B->assembled = PETSC_FALSE; 2316 2317 B->insertmode = NOT_SET_VALUES; 2318 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2319 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2320 2321 /* build local table of row and column ownerships */ 2322 ierr = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 2323 ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2324 b->cowners = b->rowners + b->size + 2; 2325 b->rowners_bs = b->cowners + b->size + 2; 2326 2327 /* build cache for off array entries formed */ 2328 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2329 b->donotstash = PETSC_FALSE; 2330 b->colmap = PETSC_NULL; 2331 b->garray = PETSC_NULL; 2332 b->roworiented = PETSC_TRUE; 2333 2334 #if defined(PETSC_USE_MAT_SINGLE) 2335 /* stuff for MatSetValues_XXX in single precision */ 2336 b->setvalueslen = 0; 2337 b->setvaluescopy = PETSC_NULL; 2338 #endif 2339 2340 /* stuff used in block assembly */ 2341 b->barray = 0; 2342 2343 /* stuff used for matrix vector multiply */ 2344 b->lvec = 0; 2345 b->Mvctx = 0; 2346 2347 /* stuff for MatGetRow() */ 2348 b->rowindices = 0; 2349 b->rowvalues = 0; 2350 b->getrowactive = PETSC_FALSE; 2351 2352 /* hash table stuff */ 2353 b->ht = 0; 2354 b->hd = 0; 2355 b->ht_size = 0; 2356 b->ht_flag = PETSC_FALSE; 2357 b->ht_fact = 0; 2358 b->ht_total_ct = 0; 2359 b->ht_insert_ct = 0; 2360 2361 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2362 if (flg) { 2363 PetscReal fact = 1.39; 2364 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2365 ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2366 if (fact <= 1.0) fact = 1.39; 2367 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2368 ierr = PetscLogInfo((0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr); 2369 } 2370 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2371 "MatStoreValues_MPIBAIJ", 2372 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2373 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2374 "MatRetrieveValues_MPIBAIJ", 2375 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2376 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2377 "MatGetDiagonalBlock_MPIBAIJ", 2378 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2379 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2380 "MatMPIBAIJSetPreallocation_MPIBAIJ", 2381 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2382 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 2383 "MatMPIBAIJSetPreallocationCSR_MPIAIJ", 2384 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 2385 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 2386 "MatDiagonalScaleLocal_MPIBAIJ", 2387 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2388 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 2389 "MatSetHashTableFactor_MPIBAIJ", 2390 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2391 PetscFunctionReturn(0); 2392 } 2393 EXTERN_C_END 2394 2395 /*MC 2396 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2397 2398 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2399 and MATMPIBAIJ otherwise. 2400 2401 Options Database Keys: 2402 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2403 2404 Level: beginner 2405 2406 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2407 M*/ 2408 2409 EXTERN_C_BEGIN 2410 #undef __FUNCT__ 2411 #define __FUNCT__ "MatCreate_BAIJ" 2412 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 2413 { 2414 PetscErrorCode ierr; 2415 PetscMPIInt size; 2416 2417 PetscFunctionBegin; 2418 ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr); 2419 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2420 if (size == 1) { 2421 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2422 } else { 2423 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2424 } 2425 PetscFunctionReturn(0); 2426 } 2427 EXTERN_C_END 2428 2429 #undef __FUNCT__ 2430 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2431 /*@C 2432 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2433 (block compressed row). For good matrix assembly performance 2434 the user should preallocate the matrix storage by setting the parameters 2435 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2436 performance can be increased by more than a factor of 50. 2437 2438 Collective on Mat 2439 2440 Input Parameters: 2441 + A - the matrix 2442 . bs - size of blockk 2443 . d_nz - number of block nonzeros per block row in diagonal portion of local 2444 submatrix (same for all local rows) 2445 . d_nnz - array containing the number of block nonzeros in the various block rows 2446 of the in diagonal portion of the local (possibly different for each block 2447 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2448 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2449 submatrix (same for all local rows). 2450 - o_nnz - array containing the number of nonzeros in the various block rows of the 2451 off-diagonal portion of the local submatrix (possibly different for 2452 each block row) or PETSC_NULL. 2453 2454 If the *_nnz parameter is given then the *_nz parameter is ignored 2455 2456 Options Database Keys: 2457 . -mat_no_unroll - uses code that does not unroll the loops in the 2458 block calculations (much slower) 2459 . -mat_block_size - size of the blocks to use 2460 2461 Notes: 2462 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2463 than it must be used on all processors that share the object for that argument. 2464 2465 Storage Information: 2466 For a square global matrix we define each processor's diagonal portion 2467 to be its local rows and the corresponding columns (a square submatrix); 2468 each processor's off-diagonal portion encompasses the remainder of the 2469 local matrix (a rectangular submatrix). 2470 2471 The user can specify preallocated storage for the diagonal part of 2472 the local submatrix with either d_nz or d_nnz (not both). Set 2473 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2474 memory allocation. Likewise, specify preallocated storage for the 2475 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2476 2477 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2478 the figure below we depict these three local rows and all columns (0-11). 2479 2480 .vb 2481 0 1 2 3 4 5 6 7 8 9 10 11 2482 ------------------- 2483 row 3 | o o o d d d o o o o o o 2484 row 4 | o o o d d d o o o o o o 2485 row 5 | o o o d d d o o o o o o 2486 ------------------- 2487 .ve 2488 2489 Thus, any entries in the d locations are stored in the d (diagonal) 2490 submatrix, and any entries in the o locations are stored in the 2491 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2492 stored simply in the MATSEQBAIJ format for compressed row storage. 2493 2494 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2495 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2496 In general, for PDE problems in which most nonzeros are near the diagonal, 2497 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2498 or you will get TERRIBLE performance; see the users' manual chapter on 2499 matrices. 2500 2501 Level: intermediate 2502 2503 .keywords: matrix, block, aij, compressed row, sparse, parallel 2504 2505 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 2506 @*/ 2507 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2508 { 2509 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2510 2511 PetscFunctionBegin; 2512 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2513 if (f) { 2514 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2515 } 2516 PetscFunctionReturn(0); 2517 } 2518 2519 #undef __FUNCT__ 2520 #define __FUNCT__ "MatCreateMPIBAIJ" 2521 /*@C 2522 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2523 (block compressed row). For good matrix assembly performance 2524 the user should preallocate the matrix storage by setting the parameters 2525 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2526 performance can be increased by more than a factor of 50. 2527 2528 Collective on MPI_Comm 2529 2530 Input Parameters: 2531 + comm - MPI communicator 2532 . bs - size of blockk 2533 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2534 This value should be the same as the local size used in creating the 2535 y vector for the matrix-vector product y = Ax. 2536 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2537 This value should be the same as the local size used in creating the 2538 x vector for the matrix-vector product y = Ax. 2539 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2540 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2541 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2542 submatrix (same for all local rows) 2543 . d_nnz - array containing the number of nonzero blocks in the various block rows 2544 of the in diagonal portion of the local (possibly different for each block 2545 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2546 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2547 submatrix (same for all local rows). 2548 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2549 off-diagonal portion of the local submatrix (possibly different for 2550 each block row) or PETSC_NULL. 2551 2552 Output Parameter: 2553 . A - the matrix 2554 2555 Options Database Keys: 2556 . -mat_no_unroll - uses code that does not unroll the loops in the 2557 block calculations (much slower) 2558 . -mat_block_size - size of the blocks to use 2559 2560 Notes: 2561 If the *_nnz parameter is given then the *_nz parameter is ignored 2562 2563 A nonzero block is any block that as 1 or more nonzeros in it 2564 2565 The user MUST specify either the local or global matrix dimensions 2566 (possibly both). 2567 2568 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2569 than it must be used on all processors that share the object for that argument. 2570 2571 Storage Information: 2572 For a square global matrix we define each processor's diagonal portion 2573 to be its local rows and the corresponding columns (a square submatrix); 2574 each processor's off-diagonal portion encompasses the remainder of the 2575 local matrix (a rectangular submatrix). 2576 2577 The user can specify preallocated storage for the diagonal part of 2578 the local submatrix with either d_nz or d_nnz (not both). Set 2579 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2580 memory allocation. Likewise, specify preallocated storage for the 2581 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2582 2583 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2584 the figure below we depict these three local rows and all columns (0-11). 2585 2586 .vb 2587 0 1 2 3 4 5 6 7 8 9 10 11 2588 ------------------- 2589 row 3 | o o o d d d o o o o o o 2590 row 4 | o o o d d d o o o o o o 2591 row 5 | o o o d d d o o o o o o 2592 ------------------- 2593 .ve 2594 2595 Thus, any entries in the d locations are stored in the d (diagonal) 2596 submatrix, and any entries in the o locations are stored in the 2597 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2598 stored simply in the MATSEQBAIJ format for compressed row storage. 2599 2600 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2601 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2602 In general, for PDE problems in which most nonzeros are near the diagonal, 2603 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2604 or you will get TERRIBLE performance; see the users' manual chapter on 2605 matrices. 2606 2607 Level: intermediate 2608 2609 .keywords: matrix, block, aij, compressed row, sparse, parallel 2610 2611 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2612 @*/ 2613 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2614 { 2615 PetscErrorCode ierr; 2616 PetscMPIInt size; 2617 2618 PetscFunctionBegin; 2619 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2620 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2621 if (size > 1) { 2622 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2623 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2624 } else { 2625 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2626 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2627 } 2628 PetscFunctionReturn(0); 2629 } 2630 2631 #undef __FUNCT__ 2632 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 2633 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2634 { 2635 Mat mat; 2636 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2637 PetscErrorCode ierr; 2638 PetscInt len=0; 2639 2640 PetscFunctionBegin; 2641 *newmat = 0; 2642 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2643 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2644 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2645 2646 mat->factor = matin->factor; 2647 mat->preallocated = PETSC_TRUE; 2648 mat->assembled = PETSC_TRUE; 2649 mat->insertmode = NOT_SET_VALUES; 2650 2651 a = (Mat_MPIBAIJ*)mat->data; 2652 mat->bs = matin->bs; 2653 a->bs2 = oldmat->bs2; 2654 a->mbs = oldmat->mbs; 2655 a->nbs = oldmat->nbs; 2656 a->Mbs = oldmat->Mbs; 2657 a->Nbs = oldmat->Nbs; 2658 2659 a->rstart = oldmat->rstart; 2660 a->rend = oldmat->rend; 2661 a->cstart = oldmat->cstart; 2662 a->cend = oldmat->cend; 2663 a->size = oldmat->size; 2664 a->rank = oldmat->rank; 2665 a->donotstash = oldmat->donotstash; 2666 a->roworiented = oldmat->roworiented; 2667 a->rowindices = 0; 2668 a->rowvalues = 0; 2669 a->getrowactive = PETSC_FALSE; 2670 a->barray = 0; 2671 a->rstart_bs = oldmat->rstart_bs; 2672 a->rend_bs = oldmat->rend_bs; 2673 a->cstart_bs = oldmat->cstart_bs; 2674 a->cend_bs = oldmat->cend_bs; 2675 2676 /* hash table stuff */ 2677 a->ht = 0; 2678 a->hd = 0; 2679 a->ht_size = 0; 2680 a->ht_flag = oldmat->ht_flag; 2681 a->ht_fact = oldmat->ht_fact; 2682 a->ht_total_ct = 0; 2683 a->ht_insert_ct = 0; 2684 2685 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2686 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2687 ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr); 2688 if (oldmat->colmap) { 2689 #if defined (PETSC_USE_CTABLE) 2690 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2691 #else 2692 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2693 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2694 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2695 #endif 2696 } else a->colmap = 0; 2697 2698 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2699 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2700 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2701 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2702 } else a->garray = 0; 2703 2704 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2705 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2706 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2707 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2708 2709 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2710 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2711 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2712 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2713 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2714 *newmat = mat; 2715 2716 PetscFunctionReturn(0); 2717 } 2718 2719 #include "petscsys.h" 2720 2721 #undef __FUNCT__ 2722 #define __FUNCT__ "MatLoad_MPIBAIJ" 2723 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 2724 { 2725 Mat A; 2726 PetscErrorCode ierr; 2727 int fd; 2728 PetscInt i,nz,j,rstart,rend; 2729 PetscScalar *vals,*buf; 2730 MPI_Comm comm = ((PetscObject)viewer)->comm; 2731 MPI_Status status; 2732 PetscMPIInt rank,size,maxnz; 2733 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 2734 PetscInt *locrowlens,*procsnz = 0,*browners; 2735 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 2736 PetscMPIInt tag = ((PetscObject)viewer)->tag; 2737 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2738 PetscInt dcount,kmax,k,nzcount,tmp,mend; 2739 2740 PetscFunctionBegin; 2741 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2742 2743 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2744 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2745 if (!rank) { 2746 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2747 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2748 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2749 } 2750 2751 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2752 M = header[1]; N = header[2]; 2753 2754 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2755 2756 /* 2757 This code adds extra rows to make sure the number of rows is 2758 divisible by the blocksize 2759 */ 2760 Mbs = M/bs; 2761 extra_rows = bs - M + bs*Mbs; 2762 if (extra_rows == bs) extra_rows = 0; 2763 else Mbs++; 2764 if (extra_rows && !rank) { 2765 ierr = PetscLogInfo((0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 2766 } 2767 2768 /* determine ownership of all rows */ 2769 mbs = Mbs/size + ((Mbs % size) > rank); 2770 m = mbs*bs; 2771 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 2772 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2773 2774 /* process 0 needs enough room for process with most rows */ 2775 if (!rank) { 2776 mmax = rowners[1]; 2777 for (i=2; i<size; i++) { 2778 mmax = PetscMax(mmax,rowners[i]); 2779 } 2780 mmax*=bs; 2781 } else mmax = m; 2782 2783 rowners[0] = 0; 2784 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2785 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2786 rstart = rowners[rank]; 2787 rend = rowners[rank+1]; 2788 2789 /* distribute row lengths to all processors */ 2790 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 2791 if (!rank) { 2792 mend = m; 2793 if (size == 1) mend = mend - extra_rows; 2794 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 2795 for (j=mend; j<m; j++) locrowlens[j] = 1; 2796 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2797 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2798 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2799 for (j=0; j<m; j++) { 2800 procsnz[0] += locrowlens[j]; 2801 } 2802 for (i=1; i<size; i++) { 2803 mend = browners[i+1] - browners[i]; 2804 if (i == size-1) mend = mend - extra_rows; 2805 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 2806 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 2807 /* calculate the number of nonzeros on each processor */ 2808 for (j=0; j<browners[i+1]-browners[i]; j++) { 2809 procsnz[i] += rowlengths[j]; 2810 } 2811 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2812 } 2813 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2814 } else { 2815 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2816 } 2817 2818 if (!rank) { 2819 /* determine max buffer needed and allocate it */ 2820 maxnz = procsnz[0]; 2821 for (i=1; i<size; i++) { 2822 maxnz = PetscMax(maxnz,procsnz[i]); 2823 } 2824 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2825 2826 /* read in my part of the matrix column indices */ 2827 nz = procsnz[0]; 2828 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2829 mycols = ibuf; 2830 if (size == 1) nz -= extra_rows; 2831 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2832 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2833 2834 /* read in every ones (except the last) and ship off */ 2835 for (i=1; i<size-1; i++) { 2836 nz = procsnz[i]; 2837 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2838 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2839 } 2840 /* read in the stuff for the last proc */ 2841 if (size != 1) { 2842 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2843 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2844 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2845 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2846 } 2847 ierr = PetscFree(cols);CHKERRQ(ierr); 2848 } else { 2849 /* determine buffer space needed for message */ 2850 nz = 0; 2851 for (i=0; i<m; i++) { 2852 nz += locrowlens[i]; 2853 } 2854 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2855 mycols = ibuf; 2856 /* receive message of column indices*/ 2857 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2858 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2859 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2860 } 2861 2862 /* loop over local rows, determining number of off diagonal entries */ 2863 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2864 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2865 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2866 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2867 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2868 rowcount = 0; nzcount = 0; 2869 for (i=0; i<mbs; i++) { 2870 dcount = 0; 2871 odcount = 0; 2872 for (j=0; j<bs; j++) { 2873 kmax = locrowlens[rowcount]; 2874 for (k=0; k<kmax; k++) { 2875 tmp = mycols[nzcount++]/bs; 2876 if (!mask[tmp]) { 2877 mask[tmp] = 1; 2878 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2879 else masked1[dcount++] = tmp; 2880 } 2881 } 2882 rowcount++; 2883 } 2884 2885 dlens[i] = dcount; 2886 odlens[i] = odcount; 2887 2888 /* zero out the mask elements we set */ 2889 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2890 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2891 } 2892 2893 /* create our matrix */ 2894 ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr); 2895 ierr = MatSetType(A,type);CHKERRQ(ierr) 2896 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2897 2898 /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */ 2899 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2900 2901 if (!rank) { 2902 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2903 /* read in my part of the matrix numerical values */ 2904 nz = procsnz[0]; 2905 vals = buf; 2906 mycols = ibuf; 2907 if (size == 1) nz -= extra_rows; 2908 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2909 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2910 2911 /* insert into matrix */ 2912 jj = rstart*bs; 2913 for (i=0; i<m; i++) { 2914 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2915 mycols += locrowlens[i]; 2916 vals += locrowlens[i]; 2917 jj++; 2918 } 2919 /* read in other processors (except the last one) and ship out */ 2920 for (i=1; i<size-1; i++) { 2921 nz = procsnz[i]; 2922 vals = buf; 2923 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2924 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2925 } 2926 /* the last proc */ 2927 if (size != 1){ 2928 nz = procsnz[i] - extra_rows; 2929 vals = buf; 2930 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2931 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2932 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2933 } 2934 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2935 } else { 2936 /* receive numeric values */ 2937 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2938 2939 /* receive message of values*/ 2940 vals = buf; 2941 mycols = ibuf; 2942 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2943 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2944 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2945 2946 /* insert into matrix */ 2947 jj = rstart*bs; 2948 for (i=0; i<m; i++) { 2949 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2950 mycols += locrowlens[i]; 2951 vals += locrowlens[i]; 2952 jj++; 2953 } 2954 } 2955 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2956 ierr = PetscFree(buf);CHKERRQ(ierr); 2957 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2958 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2959 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2960 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2961 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2962 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2963 2964 *newmat = A; 2965 PetscFunctionReturn(0); 2966 } 2967 2968 #undef __FUNCT__ 2969 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2970 /*@ 2971 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2972 2973 Input Parameters: 2974 . mat - the matrix 2975 . fact - factor 2976 2977 Collective on Mat 2978 2979 Level: advanced 2980 2981 Notes: 2982 This can also be set by the command line option: -mat_use_hash_table fact 2983 2984 .keywords: matrix, hashtable, factor, HT 2985 2986 .seealso: MatSetOption() 2987 @*/ 2988 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2989 { 2990 PetscErrorCode ierr,(*f)(Mat,PetscReal); 2991 2992 PetscFunctionBegin; 2993 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 2994 if (f) { 2995 ierr = (*f)(mat,fact);CHKERRQ(ierr); 2996 } 2997 PetscFunctionReturn(0); 2998 } 2999 3000 EXTERN_C_BEGIN 3001 #undef __FUNCT__ 3002 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3003 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3004 { 3005 Mat_MPIBAIJ *baij; 3006 3007 PetscFunctionBegin; 3008 baij = (Mat_MPIBAIJ*)mat->data; 3009 baij->ht_fact = fact; 3010 PetscFunctionReturn(0); 3011 } 3012 EXTERN_C_END 3013 3014 #undef __FUNCT__ 3015 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3016 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3017 { 3018 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3019 PetscFunctionBegin; 3020 *Ad = a->A; 3021 *Ao = a->B; 3022 *colmap = a->garray; 3023 PetscFunctionReturn(0); 3024 } 3025