1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 8 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 9 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 10 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 11 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 12 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 13 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 16 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 19 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 20 { 21 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 22 PetscErrorCode ierr; 23 PetscInt i,*idxb = 0; 24 PetscScalar *va,*vb; 25 Vec vtmp; 26 27 PetscFunctionBegin; 28 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 29 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 30 if (idx) { 31 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 32 } 33 34 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 35 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 36 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 37 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 38 39 for (i=0; i<A->rmap->n; i++){ 40 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 41 } 42 43 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 44 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 45 if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);} 46 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 47 PetscFunctionReturn(0); 48 } 49 50 EXTERN_C_BEGIN 51 #undef __FUNCT__ 52 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 53 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 54 { 55 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 56 PetscErrorCode ierr; 57 58 PetscFunctionBegin; 59 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 60 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 61 PetscFunctionReturn(0); 62 } 63 EXTERN_C_END 64 65 EXTERN_C_BEGIN 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 68 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 69 { 70 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 75 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 76 PetscFunctionReturn(0); 77 } 78 EXTERN_C_END 79 80 /* 81 Local utility routine that creates a mapping from the global column 82 number to the local number in the off-diagonal part of the local 83 storage of the matrix. This is done in a non scalable way since the 84 length of colmap equals the global matrix length. 85 */ 86 #undef __FUNCT__ 87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 89 { 90 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 91 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 92 PetscErrorCode ierr; 93 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 94 95 PetscFunctionBegin; 96 #if defined (PETSC_USE_CTABLE) 97 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 98 for (i=0; i<nbs; i++){ 99 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 100 } 101 #else 102 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 103 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 105 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 106 #endif 107 PetscFunctionReturn(0); 108 } 109 110 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 111 { \ 112 \ 113 brow = row/bs; \ 114 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 115 rmax = aimax[brow]; nrow = ailen[brow]; \ 116 bcol = col/bs; \ 117 ridx = row % bs; cidx = col % bs; \ 118 low = 0; high = nrow; \ 119 while (high-low > 3) { \ 120 t = (low+high)/2; \ 121 if (rp[t] > bcol) high = t; \ 122 else low = t; \ 123 } \ 124 for (_i=low; _i<high; _i++) { \ 125 if (rp[_i] > bcol) break; \ 126 if (rp[_i] == bcol) { \ 127 bap = ap + bs2*_i + bs*cidx + ridx; \ 128 if (addv == ADD_VALUES) *bap += value; \ 129 else *bap = value; \ 130 goto a_noinsert; \ 131 } \ 132 } \ 133 if (a->nonew == 1) goto a_noinsert; \ 134 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 135 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 136 N = nrow++ - 1; \ 137 /* shift up all the later entries in this row */ \ 138 for (ii=N; ii>=_i; ii--) { \ 139 rp[ii+1] = rp[ii]; \ 140 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 141 } \ 142 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 143 rp[_i] = bcol; \ 144 ap[bs2*_i + bs*cidx + ridx] = value; \ 145 a_noinsert:; \ 146 ailen[brow] = nrow; \ 147 } 148 149 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 150 { \ 151 brow = row/bs; \ 152 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 153 rmax = bimax[brow]; nrow = bilen[brow]; \ 154 bcol = col/bs; \ 155 ridx = row % bs; cidx = col % bs; \ 156 low = 0; high = nrow; \ 157 while (high-low > 3) { \ 158 t = (low+high)/2; \ 159 if (rp[t] > bcol) high = t; \ 160 else low = t; \ 161 } \ 162 for (_i=low; _i<high; _i++) { \ 163 if (rp[_i] > bcol) break; \ 164 if (rp[_i] == bcol) { \ 165 bap = ap + bs2*_i + bs*cidx + ridx; \ 166 if (addv == ADD_VALUES) *bap += value; \ 167 else *bap = value; \ 168 goto b_noinsert; \ 169 } \ 170 } \ 171 if (b->nonew == 1) goto b_noinsert; \ 172 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 173 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 174 CHKMEMQ;\ 175 N = nrow++ - 1; \ 176 /* shift up all the later entries in this row */ \ 177 for (ii=N; ii>=_i; ii--) { \ 178 rp[ii+1] = rp[ii]; \ 179 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 180 } \ 181 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 182 rp[_i] = bcol; \ 183 ap[bs2*_i + bs*cidx + ridx] = value; \ 184 b_noinsert:; \ 185 bilen[brow] = nrow; \ 186 } 187 188 #undef __FUNCT__ 189 #define __FUNCT__ "MatSetValues_MPIBAIJ" 190 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 191 { 192 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 193 MatScalar value; 194 PetscBool roworiented = baij->roworiented; 195 PetscErrorCode ierr; 196 PetscInt i,j,row,col; 197 PetscInt rstart_orig=mat->rmap->rstart; 198 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 199 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 200 201 /* Some Variables required in the macro */ 202 Mat A = baij->A; 203 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 204 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 205 MatScalar *aa=a->a; 206 207 Mat B = baij->B; 208 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 209 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 210 MatScalar *ba=b->a; 211 212 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 213 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 214 MatScalar *ap,*bap; 215 216 PetscFunctionBegin; 217 if (v) PetscValidScalarPointer(v,6); 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_DEBUG) 221 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 222 #endif 223 if (im[i] >= rstart_orig && im[i] < rend_orig) { 224 row = im[i] - rstart_orig; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart_orig && in[j] < cend_orig){ 227 col = in[j] - cstart_orig; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 230 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 231 } else if (in[j] < 0) continue; 232 #if defined(PETSC_USE_DEBUG) 233 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 234 #endif 235 else { 236 if (mat->was_assembled) { 237 if (!baij->colmap) { 238 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 239 } 240 #if defined (PETSC_USE_CTABLE) 241 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 242 col = col - 1; 243 #else 244 col = baij->colmap[in[j]/bs] - 1; 245 #endif 246 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 247 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 248 col = in[j]; 249 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 250 B = baij->B; 251 b = (Mat_SeqBAIJ*)(B)->data; 252 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 253 ba=b->a; 254 } else col += in[j]%bs; 255 } else col = in[j]; 256 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 257 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 258 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 259 } 260 } 261 } else { 262 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 263 if (!baij->donotstash) { 264 if (roworiented) { 265 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 266 } else { 267 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 268 } 269 } 270 } 271 } 272 PetscFunctionReturn(0); 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 277 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 278 { 279 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 280 const PetscScalar *value; 281 MatScalar *barray=baij->barray; 282 PetscBool roworiented = baij->roworiented; 283 PetscErrorCode ierr; 284 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 285 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 286 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 287 288 PetscFunctionBegin; 289 if(!barray) { 290 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 291 baij->barray = barray; 292 } 293 294 if (roworiented) { 295 stepval = (n-1)*bs; 296 } else { 297 stepval = (m-1)*bs; 298 } 299 for (i=0; i<m; i++) { 300 if (im[i] < 0) continue; 301 #if defined(PETSC_USE_DEBUG) 302 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 303 #endif 304 if (im[i] >= rstart && im[i] < rend) { 305 row = im[i] - rstart; 306 for (j=0; j<n; j++) { 307 /* If NumCol = 1 then a copy is not required */ 308 if ((roworiented) && (n == 1)) { 309 barray = (MatScalar*)v + i*bs2; 310 } else if((!roworiented) && (m == 1)) { 311 barray = (MatScalar*)v + j*bs2; 312 } else { /* Here a copy is required */ 313 if (roworiented) { 314 value = v + (i*(stepval+bs) + j)*bs; 315 } else { 316 value = v + (j*(stepval+bs) + i)*bs; 317 } 318 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 319 for (jj=0; jj<bs; jj++) { 320 barray[jj] = value[jj]; 321 } 322 barray += bs; 323 } 324 barray -= bs2; 325 } 326 327 if (in[j] >= cstart && in[j] < cend){ 328 col = in[j] - cstart; 329 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 330 } 331 else if (in[j] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 334 #endif 335 else { 336 if (mat->was_assembled) { 337 if (!baij->colmap) { 338 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 339 } 340 341 #if defined(PETSC_USE_DEBUG) 342 #if defined (PETSC_USE_CTABLE) 343 { PetscInt data; 344 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 345 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 346 } 347 #else 348 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 349 #endif 350 #endif 351 #if defined (PETSC_USE_CTABLE) 352 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 353 col = (col - 1)/bs; 354 #else 355 col = (baij->colmap[in[j]] - 1)/bs; 356 #endif 357 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 358 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 359 col = in[j]; 360 } 361 } 362 else col = in[j]; 363 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 364 } 365 } 366 } else { 367 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 368 if (!baij->donotstash) { 369 if (roworiented) { 370 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 371 } else { 372 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 373 } 374 } 375 } 376 } 377 PetscFunctionReturn(0); 378 } 379 380 #define HASH_KEY 0.6180339887 381 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 382 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 383 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 384 #undef __FUNCT__ 385 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 386 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 387 { 388 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 389 PetscBool roworiented = baij->roworiented; 390 PetscErrorCode ierr; 391 PetscInt i,j,row,col; 392 PetscInt rstart_orig=mat->rmap->rstart; 393 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 394 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 395 PetscReal tmp; 396 MatScalar **HD = baij->hd,value; 397 #if defined(PETSC_USE_DEBUG) 398 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 399 #endif 400 401 PetscFunctionBegin; 402 if (v) PetscValidScalarPointer(v,6); 403 for (i=0; i<m; i++) { 404 #if defined(PETSC_USE_DEBUG) 405 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 406 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 407 #endif 408 row = im[i]; 409 if (row >= rstart_orig && row < rend_orig) { 410 for (j=0; j<n; j++) { 411 col = in[j]; 412 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 413 /* Look up PetscInto the Hash Table */ 414 key = (row/bs)*Nbs+(col/bs)+1; 415 h1 = HASH(size,key,tmp); 416 417 418 idx = h1; 419 #if defined(PETSC_USE_DEBUG) 420 insert_ct++; 421 total_ct++; 422 if (HT[idx] != key) { 423 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 424 if (idx == size) { 425 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 426 if (idx == h1) { 427 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 428 } 429 } 430 } 431 #else 432 if (HT[idx] != key) { 433 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 434 if (idx == size) { 435 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 436 if (idx == h1) { 437 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 438 } 439 } 440 } 441 #endif 442 /* A HASH table entry is found, so insert the values at the correct address */ 443 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 444 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 445 } 446 } else { 447 if (!baij->donotstash) { 448 if (roworiented) { 449 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 450 } else { 451 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 452 } 453 } 454 } 455 } 456 #if defined(PETSC_USE_DEBUG) 457 baij->ht_total_ct = total_ct; 458 baij->ht_insert_ct = insert_ct; 459 #endif 460 PetscFunctionReturn(0); 461 } 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 465 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 466 { 467 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 468 PetscBool roworiented = baij->roworiented; 469 PetscErrorCode ierr; 470 PetscInt i,j,ii,jj,row,col; 471 PetscInt rstart=baij->rstartbs; 472 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 473 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 474 PetscReal tmp; 475 MatScalar **HD = baij->hd,*baij_a; 476 const PetscScalar *v_t,*value; 477 #if defined(PETSC_USE_DEBUG) 478 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 479 #endif 480 481 PetscFunctionBegin; 482 483 if (roworiented) { 484 stepval = (n-1)*bs; 485 } else { 486 stepval = (m-1)*bs; 487 } 488 for (i=0; i<m; i++) { 489 #if defined(PETSC_USE_DEBUG) 490 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 491 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 492 #endif 493 row = im[i]; 494 v_t = v + i*nbs2; 495 if (row >= rstart && row < rend) { 496 for (j=0; j<n; j++) { 497 col = in[j]; 498 499 /* Look up into the Hash Table */ 500 key = row*Nbs+col+1; 501 h1 = HASH(size,key,tmp); 502 503 idx = h1; 504 #if defined(PETSC_USE_DEBUG) 505 total_ct++; 506 insert_ct++; 507 if (HT[idx] != key) { 508 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 509 if (idx == size) { 510 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 511 if (idx == h1) { 512 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 513 } 514 } 515 } 516 #else 517 if (HT[idx] != key) { 518 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 519 if (idx == size) { 520 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 521 if (idx == h1) { 522 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 523 } 524 } 525 } 526 #endif 527 baij_a = HD[idx]; 528 if (roworiented) { 529 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 530 /* value = v + (i*(stepval+bs)+j)*bs; */ 531 value = v_t; 532 v_t += bs; 533 if (addv == ADD_VALUES) { 534 for (ii=0; ii<bs; ii++,value+=stepval) { 535 for (jj=ii; jj<bs2; jj+=bs) { 536 baij_a[jj] += *value++; 537 } 538 } 539 } else { 540 for (ii=0; ii<bs; ii++,value+=stepval) { 541 for (jj=ii; jj<bs2; jj+=bs) { 542 baij_a[jj] = *value++; 543 } 544 } 545 } 546 } else { 547 value = v + j*(stepval+bs)*bs + i*bs; 548 if (addv == ADD_VALUES) { 549 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 550 for (jj=0; jj<bs; jj++) { 551 baij_a[jj] += *value++; 552 } 553 } 554 } else { 555 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 556 for (jj=0; jj<bs; jj++) { 557 baij_a[jj] = *value++; 558 } 559 } 560 } 561 } 562 } 563 } else { 564 if (!baij->donotstash) { 565 if (roworiented) { 566 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 567 } else { 568 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 569 } 570 } 571 } 572 } 573 #if defined(PETSC_USE_DEBUG) 574 baij->ht_total_ct = total_ct; 575 baij->ht_insert_ct = insert_ct; 576 #endif 577 PetscFunctionReturn(0); 578 } 579 580 #undef __FUNCT__ 581 #define __FUNCT__ "MatGetValues_MPIBAIJ" 582 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 583 { 584 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 585 PetscErrorCode ierr; 586 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 587 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 588 589 PetscFunctionBegin; 590 for (i=0; i<m; i++) { 591 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 592 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 593 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 594 row = idxm[i] - bsrstart; 595 for (j=0; j<n; j++) { 596 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 597 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 598 if (idxn[j] >= bscstart && idxn[j] < bscend){ 599 col = idxn[j] - bscstart; 600 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 601 } else { 602 if (!baij->colmap) { 603 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 604 } 605 #if defined (PETSC_USE_CTABLE) 606 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 607 data --; 608 #else 609 data = baij->colmap[idxn[j]/bs]-1; 610 #endif 611 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 612 else { 613 col = data + idxn[j]%bs; 614 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 615 } 616 } 617 } 618 } else { 619 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 620 } 621 } 622 PetscFunctionReturn(0); 623 } 624 625 #undef __FUNCT__ 626 #define __FUNCT__ "MatNorm_MPIBAIJ" 627 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 628 { 629 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 630 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 631 PetscErrorCode ierr; 632 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 633 PetscReal sum = 0.0; 634 MatScalar *v; 635 636 PetscFunctionBegin; 637 if (baij->size == 1) { 638 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 639 } else { 640 if (type == NORM_FROBENIUS) { 641 v = amat->a; 642 nz = amat->nz*bs2; 643 for (i=0; i<nz; i++) { 644 #if defined(PETSC_USE_COMPLEX) 645 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 646 #else 647 sum += (*v)*(*v); v++; 648 #endif 649 } 650 v = bmat->a; 651 nz = bmat->nz*bs2; 652 for (i=0; i<nz; i++) { 653 #if defined(PETSC_USE_COMPLEX) 654 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 655 #else 656 sum += (*v)*(*v); v++; 657 #endif 658 } 659 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 660 *nrm = sqrt(*nrm); 661 } else if (type == NORM_1) { /* max column sum */ 662 PetscReal *tmp,*tmp2; 663 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 664 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 665 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 666 v = amat->a; jj = amat->j; 667 for (i=0; i<amat->nz; i++) { 668 for (j=0; j<bs; j++){ 669 col = bs*(cstart + *jj) + j; /* column index */ 670 for (row=0; row<bs; row++){ 671 tmp[col] += PetscAbsScalar(*v); v++; 672 } 673 } 674 jj++; 675 } 676 v = bmat->a; jj = bmat->j; 677 for (i=0; i<bmat->nz; i++) { 678 for (j=0; j<bs; j++){ 679 col = bs*garray[*jj] + j; 680 for (row=0; row<bs; row++){ 681 tmp[col] += PetscAbsScalar(*v); v++; 682 } 683 } 684 jj++; 685 } 686 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 687 *nrm = 0.0; 688 for (j=0; j<mat->cmap->N; j++) { 689 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 690 } 691 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 692 } else if (type == NORM_INFINITY) { /* max row sum */ 693 PetscReal *sums; 694 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 695 sum = 0.0; 696 for (j=0; j<amat->mbs; j++) { 697 for (row=0; row<bs; row++) sums[row] = 0.0; 698 v = amat->a + bs2*amat->i[j]; 699 nz = amat->i[j+1]-amat->i[j]; 700 for (i=0; i<nz; i++) { 701 for (col=0; col<bs; col++){ 702 for (row=0; row<bs; row++){ 703 sums[row] += PetscAbsScalar(*v); v++; 704 } 705 } 706 } 707 v = bmat->a + bs2*bmat->i[j]; 708 nz = bmat->i[j+1]-bmat->i[j]; 709 for (i=0; i<nz; i++) { 710 for (col=0; col<bs; col++){ 711 for (row=0; row<bs; row++){ 712 sums[row] += PetscAbsScalar(*v); v++; 713 } 714 } 715 } 716 for (row=0; row<bs; row++){ 717 if (sums[row] > sum) sum = sums[row]; 718 } 719 } 720 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 721 ierr = PetscFree(sums);CHKERRQ(ierr); 722 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 723 } 724 PetscFunctionReturn(0); 725 } 726 727 /* 728 Creates the hash table, and sets the table 729 This table is created only once. 730 If new entried need to be added to the matrix 731 then the hash table has to be destroyed and 732 recreated. 733 */ 734 #undef __FUNCT__ 735 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 736 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 737 { 738 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 739 Mat A = baij->A,B=baij->B; 740 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 741 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 742 PetscErrorCode ierr; 743 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 744 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 745 PetscInt *HT,key; 746 MatScalar **HD; 747 PetscReal tmp; 748 #if defined(PETSC_USE_INFO) 749 PetscInt ct=0,max=0; 750 #endif 751 752 PetscFunctionBegin; 753 if (baij->ht) PetscFunctionReturn(0); 754 755 baij->ht_size = (PetscInt)(factor*nz); 756 ht_size = baij->ht_size; 757 758 /* Allocate Memory for Hash Table */ 759 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 760 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 761 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 762 HD = baij->hd; 763 HT = baij->ht; 764 765 /* Loop Over A */ 766 for (i=0; i<a->mbs; i++) { 767 for (j=ai[i]; j<ai[i+1]; j++) { 768 row = i+rstart; 769 col = aj[j]+cstart; 770 771 key = row*Nbs + col + 1; 772 h1 = HASH(ht_size,key,tmp); 773 for (k=0; k<ht_size; k++){ 774 if (!HT[(h1+k)%ht_size]) { 775 HT[(h1+k)%ht_size] = key; 776 HD[(h1+k)%ht_size] = a->a + j*bs2; 777 break; 778 #if defined(PETSC_USE_INFO) 779 } else { 780 ct++; 781 #endif 782 } 783 } 784 #if defined(PETSC_USE_INFO) 785 if (k> max) max = k; 786 #endif 787 } 788 } 789 /* Loop Over B */ 790 for (i=0; i<b->mbs; i++) { 791 for (j=bi[i]; j<bi[i+1]; j++) { 792 row = i+rstart; 793 col = garray[bj[j]]; 794 key = row*Nbs + col + 1; 795 h1 = HASH(ht_size,key,tmp); 796 for (k=0; k<ht_size; k++){ 797 if (!HT[(h1+k)%ht_size]) { 798 HT[(h1+k)%ht_size] = key; 799 HD[(h1+k)%ht_size] = b->a + j*bs2; 800 break; 801 #if defined(PETSC_USE_INFO) 802 } else { 803 ct++; 804 #endif 805 } 806 } 807 #if defined(PETSC_USE_INFO) 808 if (k> max) max = k; 809 #endif 810 } 811 } 812 813 /* Print Summary */ 814 #if defined(PETSC_USE_INFO) 815 for (i=0,j=0; i<ht_size; i++) { 816 if (HT[i]) {j++;} 817 } 818 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 819 #endif 820 PetscFunctionReturn(0); 821 } 822 823 #undef __FUNCT__ 824 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 825 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 826 { 827 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 828 PetscErrorCode ierr; 829 PetscInt nstash,reallocs; 830 InsertMode addv; 831 832 PetscFunctionBegin; 833 if (baij->donotstash || mat->nooffprocentries) { 834 PetscFunctionReturn(0); 835 } 836 837 /* make sure all processors are either in INSERTMODE or ADDMODE */ 838 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 839 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 840 mat->insertmode = addv; /* in case this processor had no cache */ 841 842 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 843 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 844 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 845 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 846 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 847 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 848 PetscFunctionReturn(0); 849 } 850 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 853 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 854 { 855 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 856 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 857 PetscErrorCode ierr; 858 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 859 PetscInt *row,*col; 860 PetscBool r1,r2,r3,other_disassembled; 861 MatScalar *val; 862 InsertMode addv = mat->insertmode; 863 PetscMPIInt n; 864 865 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 866 PetscFunctionBegin; 867 if (!baij->donotstash && !mat->nooffprocentries) { 868 while (1) { 869 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 870 if (!flg) break; 871 872 for (i=0; i<n;) { 873 /* Now identify the consecutive vals belonging to the same row */ 874 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 875 if (j < n) ncols = j-i; 876 else ncols = n-i; 877 /* Now assemble all these values with a single function call */ 878 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 879 i = j; 880 } 881 } 882 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 883 /* Now process the block-stash. Since the values are stashed column-oriented, 884 set the roworiented flag to column oriented, and after MatSetValues() 885 restore the original flags */ 886 r1 = baij->roworiented; 887 r2 = a->roworiented; 888 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 889 baij->roworiented = PETSC_FALSE; 890 a->roworiented = PETSC_FALSE; 891 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 892 while (1) { 893 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 894 if (!flg) break; 895 896 for (i=0; i<n;) { 897 /* Now identify the consecutive vals belonging to the same row */ 898 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 899 if (j < n) ncols = j-i; 900 else ncols = n-i; 901 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 902 i = j; 903 } 904 } 905 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 906 baij->roworiented = r1; 907 a->roworiented = r2; 908 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 909 } 910 911 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 912 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 913 914 /* determine if any processor has disassembled, if so we must 915 also disassemble ourselfs, in order that we may reassemble. */ 916 /* 917 if nonzero structure of submatrix B cannot change then we know that 918 no processor disassembled thus we can skip this stuff 919 */ 920 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 921 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 922 if (mat->was_assembled && !other_disassembled) { 923 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 924 } 925 } 926 927 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 928 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 929 } 930 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 931 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 932 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 933 934 #if defined(PETSC_USE_INFO) 935 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 936 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 937 baij->ht_total_ct = 0; 938 baij->ht_insert_ct = 0; 939 } 940 #endif 941 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 942 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 943 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 944 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 945 } 946 947 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 948 baij->rowvalues = 0; 949 PetscFunctionReturn(0); 950 } 951 952 #undef __FUNCT__ 953 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 954 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 955 { 956 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 957 PetscErrorCode ierr; 958 PetscMPIInt size = baij->size,rank = baij->rank; 959 PetscInt bs = mat->rmap->bs; 960 PetscBool iascii,isdraw; 961 PetscViewer sviewer; 962 PetscViewerFormat format; 963 964 PetscFunctionBegin; 965 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 966 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 967 if (iascii) { 968 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 969 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 970 MatInfo info; 971 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 972 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 973 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 974 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 975 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 976 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 977 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 978 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 979 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 980 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 981 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 982 PetscFunctionReturn(0); 983 } else if (format == PETSC_VIEWER_ASCII_INFO) { 984 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 985 PetscFunctionReturn(0); 986 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 987 PetscFunctionReturn(0); 988 } 989 } 990 991 if (isdraw) { 992 PetscDraw draw; 993 PetscBool isnull; 994 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 995 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 996 } 997 998 if (size == 1) { 999 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1000 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1001 } else { 1002 /* assemble the entire matrix onto first processor. */ 1003 Mat A; 1004 Mat_SeqBAIJ *Aloc; 1005 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1006 MatScalar *a; 1007 1008 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1009 /* Perhaps this should be the type of mat? */ 1010 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1011 if (!rank) { 1012 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1013 } else { 1014 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1015 } 1016 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1017 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1018 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1019 1020 /* copy over the A part */ 1021 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1022 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1023 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1024 1025 for (i=0; i<mbs; i++) { 1026 rvals[0] = bs*(baij->rstartbs + i); 1027 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1028 for (j=ai[i]; j<ai[i+1]; j++) { 1029 col = (baij->cstartbs+aj[j])*bs; 1030 for (k=0; k<bs; k++) { 1031 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1032 col++; a += bs; 1033 } 1034 } 1035 } 1036 /* copy over the B part */ 1037 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1038 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1039 for (i=0; i<mbs; i++) { 1040 rvals[0] = bs*(baij->rstartbs + i); 1041 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1042 for (j=ai[i]; j<ai[i+1]; j++) { 1043 col = baij->garray[aj[j]]*bs; 1044 for (k=0; k<bs; k++) { 1045 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1046 col++; a += bs; 1047 } 1048 } 1049 } 1050 ierr = PetscFree(rvals);CHKERRQ(ierr); 1051 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1052 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1053 /* 1054 Everyone has to call to draw the matrix since the graphics waits are 1055 synchronized across all processors that share the PetscDraw object 1056 */ 1057 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1058 if (!rank) { 1059 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1060 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1061 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1062 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1063 } 1064 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1065 ierr = MatDestroy(A);CHKERRQ(ierr); 1066 } 1067 PetscFunctionReturn(0); 1068 } 1069 1070 #undef __FUNCT__ 1071 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1072 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1073 { 1074 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1075 Mat_SeqBAIJ* A = (Mat_SeqBAIJ*)a->A->data; 1076 Mat_SeqBAIJ* B = (Mat_SeqBAIJ*)a->B->data; 1077 PetscErrorCode ierr; 1078 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,count,j,k,bs2=a->bs2,header[4],nz,rlen; 1079 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1080 int fd; 1081 PetscScalar *column_values; 1082 FILE *file; 1083 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1084 1085 PetscFunctionBegin; 1086 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1087 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1088 nz = bs2*(A->nz + B->nz); 1089 rlen = mat->rmap->n; 1090 if (!rank) { 1091 header[0] = MAT_FILE_CLASSID; 1092 header[1] = mat->rmap->N; 1093 header[2] = mat->cmap->N; 1094 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1095 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1096 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1097 /* get largest number of rows any processor has */ 1098 range = mat->rmap->range; 1099 for (i=1; i<size; i++) { 1100 rlen = PetscMax(rlen,range[i+1] - range[i]); 1101 } 1102 } else { 1103 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1104 } 1105 1106 ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr); 1107 /* compute lengths of each row */ 1108 count = 0; 1109 for (i=0; i<a->mbs; i++) { 1110 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1111 } 1112 /* store the row lengths to the file */ 1113 if (!rank) { 1114 MPI_Status status; 1115 ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr); 1116 rlen = (range[1] - range[0])/bs; 1117 for (i=0; i<rlen; i++) { 1118 for (j=0; j<bs; j++) { 1119 row_lens[i*bs+j] = bs*crow_lens[i]; 1120 } 1121 } 1122 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1123 for (i=1; i<size; i++) { 1124 rlen = (range[i+1] - range[i])/bs; 1125 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1126 for (k=0; k<rlen; k++) { 1127 for (j=0; j<bs; j++) { 1128 row_lens[k*bs+j] = bs*crow_lens[k]; 1129 } 1130 } 1131 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1132 } 1133 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1134 } else { 1135 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1136 } 1137 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1138 1139 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1140 information needed to make it for each row from a block row. This does require more communication but still not more than 1141 the communication needed for the nonzero values */ 1142 nzmax = nz; /* space a largest processor needs */ 1143 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1144 ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1145 cnt = 0; 1146 for (i=0; i<a->mbs; i++) { 1147 pcnt = cnt; 1148 for (j=B->i[i]; j<B->i[i+1]; j++) { 1149 if ( (col = garray[B->j[j]]) > cstart) break; 1150 for (l=0; l<bs; l++) { 1151 column_indices[cnt++] = bs*col+l; 1152 } 1153 } 1154 for (k=A->i[i]; k<A->i[i+1]; k++) { 1155 for (l=0; l<bs; l++) { 1156 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1157 } 1158 } 1159 for (; j<B->i[i+1]; j++) { 1160 for (l=0; l<bs; l++) { 1161 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1162 } 1163 } 1164 len = cnt - pcnt; 1165 for (k=1; k<bs; k++) { 1166 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1167 cnt += len; 1168 } 1169 } 1170 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1171 1172 /* store the columns to the file */ 1173 if (!rank) { 1174 MPI_Status status; 1175 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1176 for (i=1; i<size; i++) { 1177 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1178 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1179 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1180 } 1181 } else { 1182 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1183 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1184 } 1185 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1186 1187 /* load up the numerical values */ 1188 ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1189 cnt = 0; 1190 for (i=0; i<a->mbs; i++) { 1191 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1192 for (j=B->i[i]; j<B->i[i+1]; j++) { 1193 if ( garray[B->j[j]] > cstart) break; 1194 for (l=0; l<bs; l++) { 1195 for (ll=0; ll<bs; ll++) { 1196 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1197 } 1198 } 1199 cnt += bs; 1200 } 1201 for (k=A->i[i]; k<A->i[i+1]; k++) { 1202 for (l=0; l<bs; l++) { 1203 for (ll=0; ll<bs; ll++) { 1204 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1205 } 1206 } 1207 cnt += bs; 1208 } 1209 for (; j<B->i[i+1]; j++) { 1210 for (l=0; l<bs; l++) { 1211 for (ll=0; ll<bs; ll++) { 1212 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1213 } 1214 } 1215 cnt += bs; 1216 } 1217 cnt += (bs-1)*rlen; 1218 } 1219 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1220 1221 /* store the column values to the file */ 1222 if (!rank) { 1223 MPI_Status status; 1224 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1225 for (i=1; i<size; i++) { 1226 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1227 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1228 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1229 } 1230 } else { 1231 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1232 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1233 } 1234 ierr = PetscFree(column_values);CHKERRQ(ierr); 1235 1236 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1237 if (file) { 1238 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1239 } 1240 PetscFunctionReturn(0); 1241 } 1242 1243 #undef __FUNCT__ 1244 #define __FUNCT__ "MatView_MPIBAIJ" 1245 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1246 { 1247 PetscErrorCode ierr; 1248 PetscBool iascii,isdraw,issocket,isbinary; 1249 1250 PetscFunctionBegin; 1251 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1252 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1253 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1254 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1255 if (iascii || isdraw || issocket) { 1256 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1257 } else if (isbinary) { 1258 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1259 } else { 1260 SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1261 } 1262 PetscFunctionReturn(0); 1263 } 1264 1265 #undef __FUNCT__ 1266 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1267 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1268 { 1269 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1270 PetscErrorCode ierr; 1271 1272 PetscFunctionBegin; 1273 #if defined(PETSC_USE_LOG) 1274 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1275 #endif 1276 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1277 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1278 if (baij->A){ierr = MatDestroy(baij->A);CHKERRQ(ierr);} 1279 if (baij->B){ierr = MatDestroy(baij->B);CHKERRQ(ierr);} 1280 #if defined (PETSC_USE_CTABLE) 1281 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 1282 #else 1283 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1284 #endif 1285 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1286 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1287 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1288 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1289 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1290 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1291 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1292 ierr = PetscFree(baij);CHKERRQ(ierr); 1293 1294 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1302 PetscFunctionReturn(0); 1303 } 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatMult_MPIBAIJ" 1307 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1308 { 1309 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1310 PetscErrorCode ierr; 1311 PetscInt nt; 1312 1313 PetscFunctionBegin; 1314 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1315 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1316 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1317 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1318 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1319 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1320 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1321 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1322 PetscFunctionReturn(0); 1323 } 1324 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1327 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1328 { 1329 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1330 PetscErrorCode ierr; 1331 1332 PetscFunctionBegin; 1333 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1334 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1335 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1336 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1337 PetscFunctionReturn(0); 1338 } 1339 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1342 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1343 { 1344 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1345 PetscErrorCode ierr; 1346 PetscBool merged; 1347 1348 PetscFunctionBegin; 1349 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1350 /* do nondiagonal part */ 1351 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1352 if (!merged) { 1353 /* send it on its way */ 1354 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1355 /* do local part */ 1356 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1357 /* receive remote parts: note this assumes the values are not actually */ 1358 /* inserted in yy until the next line */ 1359 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1360 } else { 1361 /* do local part */ 1362 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1363 /* send it on its way */ 1364 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1365 /* values actually were received in the Begin() but we need to call this nop */ 1366 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1367 } 1368 PetscFunctionReturn(0); 1369 } 1370 1371 #undef __FUNCT__ 1372 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1373 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1374 { 1375 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1376 PetscErrorCode ierr; 1377 1378 PetscFunctionBegin; 1379 /* do nondiagonal part */ 1380 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1381 /* send it on its way */ 1382 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1383 /* do local part */ 1384 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1385 /* receive remote parts: note this assumes the values are not actually */ 1386 /* inserted in yy until the next line, which is true for my implementation*/ 1387 /* but is not perhaps always true. */ 1388 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1389 PetscFunctionReturn(0); 1390 } 1391 1392 /* 1393 This only works correctly for square matrices where the subblock A->A is the 1394 diagonal block 1395 */ 1396 #undef __FUNCT__ 1397 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1398 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1399 { 1400 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1401 PetscErrorCode ierr; 1402 1403 PetscFunctionBegin; 1404 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1405 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1406 PetscFunctionReturn(0); 1407 } 1408 1409 #undef __FUNCT__ 1410 #define __FUNCT__ "MatScale_MPIBAIJ" 1411 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1412 { 1413 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1414 PetscErrorCode ierr; 1415 1416 PetscFunctionBegin; 1417 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1418 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1419 PetscFunctionReturn(0); 1420 } 1421 1422 #undef __FUNCT__ 1423 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1424 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1425 { 1426 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1427 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1428 PetscErrorCode ierr; 1429 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1430 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1431 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1432 1433 PetscFunctionBegin; 1434 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1435 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1436 mat->getrowactive = PETSC_TRUE; 1437 1438 if (!mat->rowvalues && (idx || v)) { 1439 /* 1440 allocate enough space to hold information from the longest row. 1441 */ 1442 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1443 PetscInt max = 1,mbs = mat->mbs,tmp; 1444 for (i=0; i<mbs; i++) { 1445 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1446 if (max < tmp) { max = tmp; } 1447 } 1448 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1449 } 1450 lrow = row - brstart; 1451 1452 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1453 if (!v) {pvA = 0; pvB = 0;} 1454 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1455 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1456 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1457 nztot = nzA + nzB; 1458 1459 cmap = mat->garray; 1460 if (v || idx) { 1461 if (nztot) { 1462 /* Sort by increasing column numbers, assuming A and B already sorted */ 1463 PetscInt imark = -1; 1464 if (v) { 1465 *v = v_p = mat->rowvalues; 1466 for (i=0; i<nzB; i++) { 1467 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1468 else break; 1469 } 1470 imark = i; 1471 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1472 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1473 } 1474 if (idx) { 1475 *idx = idx_p = mat->rowindices; 1476 if (imark > -1) { 1477 for (i=0; i<imark; i++) { 1478 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1479 } 1480 } else { 1481 for (i=0; i<nzB; i++) { 1482 if (cmap[cworkB[i]/bs] < cstart) 1483 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1484 else break; 1485 } 1486 imark = i; 1487 } 1488 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1489 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1490 } 1491 } else { 1492 if (idx) *idx = 0; 1493 if (v) *v = 0; 1494 } 1495 } 1496 *nz = nztot; 1497 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1498 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1499 PetscFunctionReturn(0); 1500 } 1501 1502 #undef __FUNCT__ 1503 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1504 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1505 { 1506 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1507 1508 PetscFunctionBegin; 1509 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1510 baij->getrowactive = PETSC_FALSE; 1511 PetscFunctionReturn(0); 1512 } 1513 1514 #undef __FUNCT__ 1515 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1516 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1517 { 1518 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1519 PetscErrorCode ierr; 1520 1521 PetscFunctionBegin; 1522 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1523 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1524 PetscFunctionReturn(0); 1525 } 1526 1527 #undef __FUNCT__ 1528 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1529 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1530 { 1531 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1532 Mat A = a->A,B = a->B; 1533 PetscErrorCode ierr; 1534 PetscReal isend[5],irecv[5]; 1535 1536 PetscFunctionBegin; 1537 info->block_size = (PetscReal)matin->rmap->bs; 1538 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1539 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1540 isend[3] = info->memory; isend[4] = info->mallocs; 1541 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1542 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1543 isend[3] += info->memory; isend[4] += info->mallocs; 1544 if (flag == MAT_LOCAL) { 1545 info->nz_used = isend[0]; 1546 info->nz_allocated = isend[1]; 1547 info->nz_unneeded = isend[2]; 1548 info->memory = isend[3]; 1549 info->mallocs = isend[4]; 1550 } else if (flag == MAT_GLOBAL_MAX) { 1551 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1552 info->nz_used = irecv[0]; 1553 info->nz_allocated = irecv[1]; 1554 info->nz_unneeded = irecv[2]; 1555 info->memory = irecv[3]; 1556 info->mallocs = irecv[4]; 1557 } else if (flag == MAT_GLOBAL_SUM) { 1558 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1559 info->nz_used = irecv[0]; 1560 info->nz_allocated = irecv[1]; 1561 info->nz_unneeded = irecv[2]; 1562 info->memory = irecv[3]; 1563 info->mallocs = irecv[4]; 1564 } else { 1565 SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1566 } 1567 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1568 info->fill_ratio_needed = 0; 1569 info->factor_mallocs = 0; 1570 PetscFunctionReturn(0); 1571 } 1572 1573 #undef __FUNCT__ 1574 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1575 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1576 { 1577 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1578 PetscErrorCode ierr; 1579 1580 PetscFunctionBegin; 1581 switch (op) { 1582 case MAT_NEW_NONZERO_LOCATIONS: 1583 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1584 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1585 case MAT_KEEP_NONZERO_PATTERN: 1586 case MAT_NEW_NONZERO_LOCATION_ERR: 1587 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1588 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1589 break; 1590 case MAT_ROW_ORIENTED: 1591 a->roworiented = flg; 1592 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1593 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1594 break; 1595 case MAT_NEW_DIAGONALS: 1596 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1597 break; 1598 case MAT_IGNORE_OFF_PROC_ENTRIES: 1599 a->donotstash = flg; 1600 break; 1601 case MAT_USE_HASH_TABLE: 1602 a->ht_flag = flg; 1603 break; 1604 case MAT_SYMMETRIC: 1605 case MAT_STRUCTURALLY_SYMMETRIC: 1606 case MAT_HERMITIAN: 1607 case MAT_SYMMETRY_ETERNAL: 1608 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1609 break; 1610 default: 1611 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op); 1612 } 1613 PetscFunctionReturn(0); 1614 } 1615 1616 #undef __FUNCT__ 1617 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1618 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1619 { 1620 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1621 Mat_SeqBAIJ *Aloc; 1622 Mat B; 1623 PetscErrorCode ierr; 1624 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1625 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1626 MatScalar *a; 1627 1628 PetscFunctionBegin; 1629 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1630 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1631 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1632 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1633 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1634 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1635 } else { 1636 B = *matout; 1637 } 1638 1639 /* copy over the A part */ 1640 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1641 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1642 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1643 1644 for (i=0; i<mbs; i++) { 1645 rvals[0] = bs*(baij->rstartbs + i); 1646 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1647 for (j=ai[i]; j<ai[i+1]; j++) { 1648 col = (baij->cstartbs+aj[j])*bs; 1649 for (k=0; k<bs; k++) { 1650 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1651 col++; a += bs; 1652 } 1653 } 1654 } 1655 /* copy over the B part */ 1656 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1657 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1658 for (i=0; i<mbs; i++) { 1659 rvals[0] = bs*(baij->rstartbs + i); 1660 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1661 for (j=ai[i]; j<ai[i+1]; j++) { 1662 col = baij->garray[aj[j]]*bs; 1663 for (k=0; k<bs; k++) { 1664 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1665 col++; a += bs; 1666 } 1667 } 1668 } 1669 ierr = PetscFree(rvals);CHKERRQ(ierr); 1670 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1671 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1672 1673 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1674 *matout = B; 1675 } else { 1676 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1677 } 1678 PetscFunctionReturn(0); 1679 } 1680 1681 #undef __FUNCT__ 1682 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1683 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1684 { 1685 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1686 Mat a = baij->A,b = baij->B; 1687 PetscErrorCode ierr; 1688 PetscInt s1,s2,s3; 1689 1690 PetscFunctionBegin; 1691 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1692 if (rr) { 1693 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1694 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1695 /* Overlap communication with computation. */ 1696 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1697 } 1698 if (ll) { 1699 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1700 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1701 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1702 } 1703 /* scale the diagonal block */ 1704 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1705 1706 if (rr) { 1707 /* Do a scatter end and then right scale the off-diagonal block */ 1708 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1709 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1710 } 1711 1712 PetscFunctionReturn(0); 1713 } 1714 1715 #undef __FUNCT__ 1716 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1717 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 1718 { 1719 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1720 PetscErrorCode ierr; 1721 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1722 PetscInt i,*owners = A->rmap->range; 1723 PetscInt *nprocs,j,idx,nsends,row; 1724 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1725 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1726 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1727 MPI_Comm comm = ((PetscObject)A)->comm; 1728 MPI_Request *send_waits,*recv_waits; 1729 MPI_Status recv_status,*send_status; 1730 #if defined(PETSC_DEBUG) 1731 PetscBool found = PETSC_FALSE; 1732 #endif 1733 1734 PetscFunctionBegin; 1735 /* first count number of contributors to each processor */ 1736 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1737 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1738 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1739 j = 0; 1740 for (i=0; i<N; i++) { 1741 if (lastidx > (idx = rows[i])) j = 0; 1742 lastidx = idx; 1743 for (; j<size; j++) { 1744 if (idx >= owners[j] && idx < owners[j+1]) { 1745 nprocs[2*j]++; 1746 nprocs[2*j+1] = 1; 1747 owner[i] = j; 1748 #if defined(PETSC_DEBUG) 1749 found = PETSC_TRUE; 1750 #endif 1751 break; 1752 } 1753 } 1754 #if defined(PETSC_DEBUG) 1755 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1756 found = PETSC_FALSE; 1757 #endif 1758 } 1759 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1760 1761 if (A->nooffproczerorows) { 1762 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 1763 nrecvs = nsends; 1764 nmax = N; 1765 } else { 1766 /* inform other processors of number of messages and max length*/ 1767 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1768 } 1769 1770 /* post receives: */ 1771 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1772 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1773 for (i=0; i<nrecvs; i++) { 1774 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1775 } 1776 1777 /* do sends: 1778 1) starts[i] gives the starting index in svalues for stuff going to 1779 the ith processor 1780 */ 1781 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1782 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1783 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1784 starts[0] = 0; 1785 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1786 for (i=0; i<N; i++) { 1787 svalues[starts[owner[i]]++] = rows[i]; 1788 } 1789 1790 starts[0] = 0; 1791 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1792 count = 0; 1793 for (i=0; i<size; i++) { 1794 if (nprocs[2*i+1]) { 1795 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1796 } 1797 } 1798 ierr = PetscFree(starts);CHKERRQ(ierr); 1799 1800 base = owners[rank]; 1801 1802 /* wait on receives */ 1803 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1804 count = nrecvs; 1805 slen = 0; 1806 while (count) { 1807 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1808 /* unpack receives into our local space */ 1809 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1810 source[imdex] = recv_status.MPI_SOURCE; 1811 lens[imdex] = n; 1812 slen += n; 1813 count--; 1814 } 1815 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1816 1817 /* move the data into the send scatter */ 1818 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1819 count = 0; 1820 for (i=0; i<nrecvs; i++) { 1821 values = rvalues + i*nmax; 1822 for (j=0; j<lens[i]; j++) { 1823 lrows[count++] = values[j] - base; 1824 } 1825 } 1826 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1827 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1828 ierr = PetscFree(owner);CHKERRQ(ierr); 1829 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1830 1831 /* actually zap the local rows */ 1832 /* 1833 Zero the required rows. If the "diagonal block" of the matrix 1834 is square and the user wishes to set the diagonal we use separate 1835 code so that MatSetValues() is not called for each diagonal allocating 1836 new memory, thus calling lots of mallocs and slowing things down. 1837 1838 */ 1839 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1840 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1841 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1842 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1843 } else if (diag != 0.0) { 1844 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1845 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1846 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1847 for (i=0; i<slen; i++) { 1848 row = lrows[i] + rstart_bs; 1849 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1850 } 1851 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1852 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1853 } else { 1854 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1855 } 1856 1857 ierr = PetscFree(lrows);CHKERRQ(ierr); 1858 1859 /* wait on sends */ 1860 if (nsends) { 1861 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1862 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1863 ierr = PetscFree(send_status);CHKERRQ(ierr); 1864 } 1865 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1866 ierr = PetscFree(svalues);CHKERRQ(ierr); 1867 1868 PetscFunctionReturn(0); 1869 } 1870 1871 #undef __FUNCT__ 1872 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1873 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1874 { 1875 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1876 PetscErrorCode ierr; 1877 1878 PetscFunctionBegin; 1879 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1880 PetscFunctionReturn(0); 1881 } 1882 1883 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1884 1885 #undef __FUNCT__ 1886 #define __FUNCT__ "MatEqual_MPIBAIJ" 1887 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1888 { 1889 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1890 Mat a,b,c,d; 1891 PetscBool flg; 1892 PetscErrorCode ierr; 1893 1894 PetscFunctionBegin; 1895 a = matA->A; b = matA->B; 1896 c = matB->A; d = matB->B; 1897 1898 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1899 if (flg) { 1900 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1901 } 1902 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1903 PetscFunctionReturn(0); 1904 } 1905 1906 #undef __FUNCT__ 1907 #define __FUNCT__ "MatCopy_MPIBAIJ" 1908 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1909 { 1910 PetscErrorCode ierr; 1911 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1912 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1913 1914 PetscFunctionBegin; 1915 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1916 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1917 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1918 } else { 1919 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1920 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1921 } 1922 PetscFunctionReturn(0); 1923 } 1924 1925 #undef __FUNCT__ 1926 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1927 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1928 { 1929 PetscErrorCode ierr; 1930 1931 PetscFunctionBegin; 1932 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1933 PetscFunctionReturn(0); 1934 } 1935 1936 #undef __FUNCT__ 1937 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1938 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1939 { 1940 PetscErrorCode ierr; 1941 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1942 PetscBLASInt bnz,one=1; 1943 Mat_SeqBAIJ *x,*y; 1944 1945 PetscFunctionBegin; 1946 if (str == SAME_NONZERO_PATTERN) { 1947 PetscScalar alpha = a; 1948 x = (Mat_SeqBAIJ *)xx->A->data; 1949 y = (Mat_SeqBAIJ *)yy->A->data; 1950 bnz = PetscBLASIntCast(x->nz); 1951 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1952 x = (Mat_SeqBAIJ *)xx->B->data; 1953 y = (Mat_SeqBAIJ *)yy->B->data; 1954 bnz = PetscBLASIntCast(x->nz); 1955 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1956 } else { 1957 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1958 } 1959 PetscFunctionReturn(0); 1960 } 1961 1962 #undef __FUNCT__ 1963 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ" 1964 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs) 1965 { 1966 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1967 PetscInt rbs,cbs; 1968 PetscErrorCode ierr; 1969 1970 PetscFunctionBegin; 1971 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1972 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1973 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 1974 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 1975 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 1976 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 1977 PetscFunctionReturn(0); 1978 } 1979 1980 #undef __FUNCT__ 1981 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1982 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1983 { 1984 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1985 PetscErrorCode ierr; 1986 1987 PetscFunctionBegin; 1988 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1989 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1990 PetscFunctionReturn(0); 1991 } 1992 1993 #undef __FUNCT__ 1994 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1995 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1996 { 1997 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1998 PetscErrorCode ierr; 1999 2000 PetscFunctionBegin; 2001 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2002 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2003 PetscFunctionReturn(0); 2004 } 2005 2006 #undef __FUNCT__ 2007 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2008 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2009 { 2010 PetscErrorCode ierr; 2011 IS iscol_local; 2012 PetscInt csize; 2013 2014 PetscFunctionBegin; 2015 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2016 if (call == MAT_REUSE_MATRIX) { 2017 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2018 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2019 } else { 2020 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2021 } 2022 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2023 if (call == MAT_INITIAL_MATRIX) { 2024 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2025 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 2026 } 2027 PetscFunctionReturn(0); 2028 } 2029 2030 #undef __FUNCT__ 2031 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2032 /* 2033 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2034 in local and then by concatenating the local matrices the end result. 2035 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2036 */ 2037 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2038 { 2039 PetscErrorCode ierr; 2040 PetscMPIInt rank,size; 2041 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2042 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2043 Mat *local,M,Mreuse; 2044 MatScalar *vwork,*aa; 2045 MPI_Comm comm = ((PetscObject)mat)->comm; 2046 Mat_SeqBAIJ *aij; 2047 2048 2049 PetscFunctionBegin; 2050 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2051 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2052 2053 if (call == MAT_REUSE_MATRIX) { 2054 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2055 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2056 local = &Mreuse; 2057 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2058 } else { 2059 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2060 Mreuse = *local; 2061 ierr = PetscFree(local);CHKERRQ(ierr); 2062 } 2063 2064 /* 2065 m - number of local rows 2066 n - number of columns (same on all processors) 2067 rstart - first row in new global matrix generated 2068 */ 2069 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2070 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2071 m = m/bs; 2072 n = n/bs; 2073 2074 if (call == MAT_INITIAL_MATRIX) { 2075 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2076 ii = aij->i; 2077 jj = aij->j; 2078 2079 /* 2080 Determine the number of non-zeros in the diagonal and off-diagonal 2081 portions of the matrix in order to do correct preallocation 2082 */ 2083 2084 /* first get start and end of "diagonal" columns */ 2085 if (csize == PETSC_DECIDE) { 2086 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2087 if (mglobal == n*bs) { /* square matrix */ 2088 nlocal = m; 2089 } else { 2090 nlocal = n/size + ((n % size) > rank); 2091 } 2092 } else { 2093 nlocal = csize/bs; 2094 } 2095 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2096 rstart = rend - nlocal; 2097 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2098 2099 /* next, compute all the lengths */ 2100 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2101 olens = dlens + m; 2102 for (i=0; i<m; i++) { 2103 jend = ii[i+1] - ii[i]; 2104 olen = 0; 2105 dlen = 0; 2106 for (j=0; j<jend; j++) { 2107 if (*jj < rstart || *jj >= rend) olen++; 2108 else dlen++; 2109 jj++; 2110 } 2111 olens[i] = olen; 2112 dlens[i] = dlen; 2113 } 2114 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2115 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2116 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2117 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2118 ierr = PetscFree(dlens);CHKERRQ(ierr); 2119 } else { 2120 PetscInt ml,nl; 2121 2122 M = *newmat; 2123 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2124 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2125 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2126 /* 2127 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2128 rather than the slower MatSetValues(). 2129 */ 2130 M->was_assembled = PETSC_TRUE; 2131 M->assembled = PETSC_FALSE; 2132 } 2133 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2134 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2135 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2136 ii = aij->i; 2137 jj = aij->j; 2138 aa = aij->a; 2139 for (i=0; i<m; i++) { 2140 row = rstart/bs + i; 2141 nz = ii[i+1] - ii[i]; 2142 cwork = jj; jj += nz; 2143 vwork = aa; aa += nz; 2144 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2145 } 2146 2147 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2148 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2149 *newmat = M; 2150 2151 /* save submatrix used in processor for next request */ 2152 if (call == MAT_INITIAL_MATRIX) { 2153 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2154 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2155 } 2156 2157 PetscFunctionReturn(0); 2158 } 2159 2160 #undef __FUNCT__ 2161 #define __FUNCT__ "MatPermute_MPIBAIJ" 2162 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2163 { 2164 MPI_Comm comm,pcomm; 2165 PetscInt first,local_size,nrows; 2166 const PetscInt *rows; 2167 PetscMPIInt size; 2168 IS crowp,growp,irowp,lrowp,lcolp,icolp; 2169 PetscErrorCode ierr; 2170 2171 PetscFunctionBegin; 2172 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2173 /* make a collective version of 'rowp' */ 2174 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2175 if (pcomm==comm) { 2176 crowp = rowp; 2177 } else { 2178 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2179 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2180 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2181 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2182 } 2183 /* collect the global row permutation and invert it */ 2184 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2185 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2186 if (pcomm!=comm) { 2187 ierr = ISDestroy(crowp);CHKERRQ(ierr); 2188 } 2189 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2190 /* get the local target indices */ 2191 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2192 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2193 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2194 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 2195 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2196 ierr = ISDestroy(irowp);CHKERRQ(ierr); 2197 /* the column permutation is so much easier; 2198 make a local version of 'colp' and invert it */ 2199 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2200 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2201 if (size==1) { 2202 lcolp = colp; 2203 } else { 2204 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2205 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2206 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 2207 } 2208 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2209 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2210 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2211 if (size>1) { 2212 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2213 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 2214 } 2215 /* now we just get the submatrix */ 2216 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2217 /* clean up */ 2218 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 2219 ierr = ISDestroy(icolp);CHKERRQ(ierr); 2220 PetscFunctionReturn(0); 2221 } 2222 2223 #undef __FUNCT__ 2224 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2225 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2226 { 2227 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2228 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2229 2230 PetscFunctionBegin; 2231 if (nghosts) { *nghosts = B->nbs;} 2232 if (ghosts) {*ghosts = baij->garray;} 2233 PetscFunctionReturn(0); 2234 } 2235 2236 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat); 2237 2238 #undef __FUNCT__ 2239 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2240 /* 2241 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2242 */ 2243 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2244 { 2245 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2246 PetscErrorCode ierr; 2247 PetscMPIInt size,*ncolsonproc,*disp,nn; 2248 PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col; 2249 const PetscInt *is; 2250 PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj; 2251 PetscInt *rowhit,M,cstart,cend,colb; 2252 PetscInt *columnsforrow,l; 2253 IS *isa; 2254 PetscBool done,flg; 2255 ISLocalToGlobalMapping map = mat->bmapping; 2256 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2257 2258 PetscFunctionBegin; 2259 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2260 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2261 2262 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2263 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2264 M = mat->rmap->n/bs; 2265 cstart = mat->cmap->rstart/bs; 2266 cend = mat->cmap->rend/bs; 2267 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2268 c->N = mat->cmap->N/bs; 2269 c->m = mat->rmap->n/bs; 2270 c->rstart = mat->rmap->rstart/bs; 2271 2272 c->ncolors = nis; 2273 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2274 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2275 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2276 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2277 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2278 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2279 2280 /* Allow access to data structures of local part of matrix */ 2281 if (!baij->colmap) { 2282 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2283 } 2284 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2285 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2286 2287 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2288 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2289 2290 for (i=0; i<nis; i++) { 2291 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2292 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2293 c->ncolumns[i] = n; 2294 if (n) { 2295 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2296 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2297 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2298 } else { 2299 c->columns[i] = 0; 2300 } 2301 2302 if (ctype == IS_COLORING_GLOBAL){ 2303 /* Determine the total (parallel) number of columns of this color */ 2304 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2305 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2306 2307 nn = PetscMPIIntCast(n); 2308 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2309 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2310 if (!nctot) { 2311 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2312 } 2313 2314 disp[0] = 0; 2315 for (j=1; j<size; j++) { 2316 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2317 } 2318 2319 /* Get complete list of columns for color on each processor */ 2320 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2321 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2322 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2323 } else if (ctype == IS_COLORING_GHOSTED){ 2324 /* Determine local number of columns of this color on this process, including ghost points */ 2325 nctot = n; 2326 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2327 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2328 } else { 2329 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2330 } 2331 2332 /* 2333 Mark all rows affect by these columns 2334 */ 2335 /* Temporary option to allow for debugging/testing */ 2336 flg = PETSC_FALSE; 2337 ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2338 if (!flg) {/*-----------------------------------------------------------------------------*/ 2339 /* crude, fast version */ 2340 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2341 /* loop over columns*/ 2342 for (j=0; j<nctot; j++) { 2343 if (ctype == IS_COLORING_GHOSTED) { 2344 col = ltog[cols[j]]; 2345 } else { 2346 col = cols[j]; 2347 } 2348 if (col >= cstart && col < cend) { 2349 /* column is in diagonal block of matrix */ 2350 rows = A_cj + A_ci[col-cstart]; 2351 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2352 } else { 2353 #if defined (PETSC_USE_CTABLE) 2354 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2355 colb --; 2356 #else 2357 colb = baij->colmap[col] - 1; 2358 #endif 2359 if (colb == -1) { 2360 m = 0; 2361 } else { 2362 colb = colb/bs; 2363 rows = B_cj + B_ci[colb]; 2364 m = B_ci[colb+1] - B_ci[colb]; 2365 } 2366 } 2367 /* loop over columns marking them in rowhit */ 2368 for (k=0; k<m; k++) { 2369 rowhit[*rows++] = col + 1; 2370 } 2371 } 2372 2373 /* count the number of hits */ 2374 nrows = 0; 2375 for (j=0; j<M; j++) { 2376 if (rowhit[j]) nrows++; 2377 } 2378 c->nrows[i] = nrows; 2379 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2380 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2381 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2382 nrows = 0; 2383 for (j=0; j<M; j++) { 2384 if (rowhit[j]) { 2385 c->rows[i][nrows] = j; 2386 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2387 nrows++; 2388 } 2389 } 2390 } else {/*-------------------------------------------------------------------------------*/ 2391 /* slow version, using rowhit as a linked list */ 2392 PetscInt currentcol,fm,mfm; 2393 rowhit[M] = M; 2394 nrows = 0; 2395 /* loop over columns*/ 2396 for (j=0; j<nctot; j++) { 2397 if (ctype == IS_COLORING_GHOSTED) { 2398 col = ltog[cols[j]]; 2399 } else { 2400 col = cols[j]; 2401 } 2402 if (col >= cstart && col < cend) { 2403 /* column is in diagonal block of matrix */ 2404 rows = A_cj + A_ci[col-cstart]; 2405 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2406 } else { 2407 #if defined (PETSC_USE_CTABLE) 2408 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2409 colb --; 2410 #else 2411 colb = baij->colmap[col] - 1; 2412 #endif 2413 if (colb == -1) { 2414 m = 0; 2415 } else { 2416 colb = colb/bs; 2417 rows = B_cj + B_ci[colb]; 2418 m = B_ci[colb+1] - B_ci[colb]; 2419 } 2420 } 2421 2422 /* loop over columns marking them in rowhit */ 2423 fm = M; /* fm points to first entry in linked list */ 2424 for (k=0; k<m; k++) { 2425 currentcol = *rows++; 2426 /* is it already in the list? */ 2427 do { 2428 mfm = fm; 2429 fm = rowhit[fm]; 2430 } while (fm < currentcol); 2431 /* not in list so add it */ 2432 if (fm != currentcol) { 2433 nrows++; 2434 columnsforrow[currentcol] = col; 2435 /* next three lines insert new entry into linked list */ 2436 rowhit[mfm] = currentcol; 2437 rowhit[currentcol] = fm; 2438 fm = currentcol; 2439 /* fm points to present position in list since we know the columns are sorted */ 2440 } else { 2441 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2442 } 2443 } 2444 } 2445 c->nrows[i] = nrows; 2446 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2447 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2448 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2449 /* now store the linked list of rows into c->rows[i] */ 2450 nrows = 0; 2451 fm = rowhit[M]; 2452 do { 2453 c->rows[i][nrows] = fm; 2454 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2455 fm = rowhit[fm]; 2456 } while (fm < M); 2457 } /* ---------------------------------------------------------------------------------------*/ 2458 ierr = PetscFree(cols);CHKERRQ(ierr); 2459 } 2460 2461 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2462 /* 2463 vscale will contain the "diagonal" on processor scalings followed by the off processor 2464 */ 2465 if (ctype == IS_COLORING_GLOBAL) { 2466 PetscInt *garray; 2467 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2468 for (i=0; i<baij->B->cmap->n/bs; i++) { 2469 for (j=0; j<bs; j++) { 2470 garray[i*bs+j] = bs*baij->garray[i]+j; 2471 } 2472 } 2473 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2474 ierr = PetscFree(garray);CHKERRQ(ierr); 2475 CHKMEMQ; 2476 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2477 for (k=0; k<c->ncolors; k++) { 2478 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2479 for (l=0; l<c->nrows[k]; l++) { 2480 col = c->columnsforrow[k][l]; 2481 if (col >= cstart && col < cend) { 2482 /* column is in diagonal block of matrix */ 2483 colb = col - cstart; 2484 } else { 2485 /* column is in "off-processor" part */ 2486 #if defined (PETSC_USE_CTABLE) 2487 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2488 colb --; 2489 #else 2490 colb = baij->colmap[col] - 1; 2491 #endif 2492 colb = colb/bs; 2493 colb += cend - cstart; 2494 } 2495 c->vscaleforrow[k][l] = colb; 2496 } 2497 } 2498 } else if (ctype == IS_COLORING_GHOSTED) { 2499 /* Get gtol mapping */ 2500 PetscInt N = mat->cmap->N, *gtol; 2501 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2502 for (i=0; i<N; i++) gtol[i] = -1; 2503 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2504 2505 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2506 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2507 for (k=0; k<c->ncolors; k++) { 2508 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2509 for (l=0; l<c->nrows[k]; l++) { 2510 col = c->columnsforrow[k][l]; /* global column index */ 2511 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2512 } 2513 } 2514 ierr = PetscFree(gtol);CHKERRQ(ierr); 2515 } 2516 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2517 2518 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2519 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2520 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2521 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2522 CHKMEMQ; 2523 PetscFunctionReturn(0); 2524 } 2525 2526 #undef __FUNCT__ 2527 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ" 2528 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat) 2529 { 2530 Mat B; 2531 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2532 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2533 Mat_SeqAIJ *b; 2534 PetscErrorCode ierr; 2535 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2536 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2537 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2538 2539 PetscFunctionBegin; 2540 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2541 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2542 2543 /* ---------------------------------------------------------------- 2544 Tell every processor the number of nonzeros per row 2545 */ 2546 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2547 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2548 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2549 } 2550 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2551 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2552 displs = recvcounts + size; 2553 for (i=0; i<size; i++) { 2554 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2555 displs[i] = A->rmap->range[i]/bs; 2556 } 2557 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2558 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2559 #else 2560 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2561 #endif 2562 /* --------------------------------------------------------------- 2563 Create the sequential matrix of the same type as the local block diagonal 2564 */ 2565 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2566 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2567 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2568 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2569 b = (Mat_SeqAIJ *)B->data; 2570 2571 /*-------------------------------------------------------------------- 2572 Copy my part of matrix column indices over 2573 */ 2574 sendcount = ad->nz + bd->nz; 2575 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2576 a_jsendbuf = ad->j; 2577 b_jsendbuf = bd->j; 2578 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2579 cnt = 0; 2580 for (i=0; i<n; i++) { 2581 2582 /* put in lower diagonal portion */ 2583 m = bd->i[i+1] - bd->i[i]; 2584 while (m > 0) { 2585 /* is it above diagonal (in bd (compressed) numbering) */ 2586 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2587 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2588 m--; 2589 } 2590 2591 /* put in diagonal portion */ 2592 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2593 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2594 } 2595 2596 /* put in upper diagonal portion */ 2597 while (m-- > 0) { 2598 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2599 } 2600 } 2601 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2602 2603 /*-------------------------------------------------------------------- 2604 Gather all column indices to all processors 2605 */ 2606 for (i=0; i<size; i++) { 2607 recvcounts[i] = 0; 2608 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2609 recvcounts[i] += lens[j]; 2610 } 2611 } 2612 displs[0] = 0; 2613 for (i=1; i<size; i++) { 2614 displs[i] = displs[i-1] + recvcounts[i-1]; 2615 } 2616 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2617 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2618 #else 2619 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2620 #endif 2621 /*-------------------------------------------------------------------- 2622 Assemble the matrix into useable form (note numerical values not yet set) 2623 */ 2624 /* set the b->ilen (length of each row) values */ 2625 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2626 /* set the b->i indices */ 2627 b->i[0] = 0; 2628 for (i=1; i<=A->rmap->N/bs; i++) { 2629 b->i[i] = b->i[i-1] + lens[i-1]; 2630 } 2631 ierr = PetscFree(lens);CHKERRQ(ierr); 2632 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2633 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2634 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2635 2636 if (A->symmetric){ 2637 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2638 } else if (A->hermitian) { 2639 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2640 } else if (A->structurally_symmetric) { 2641 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2642 } 2643 *newmat = B; 2644 PetscFunctionReturn(0); 2645 } 2646 2647 #undef __FUNCT__ 2648 #define __FUNCT__ "MatSOR_MPIBAIJ" 2649 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2650 { 2651 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2652 PetscErrorCode ierr; 2653 Vec bb1 = 0; 2654 2655 PetscFunctionBegin; 2656 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2657 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2658 } 2659 2660 if (flag == SOR_APPLY_UPPER) { 2661 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2662 PetscFunctionReturn(0); 2663 } 2664 2665 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2666 if (flag & SOR_ZERO_INITIAL_GUESS) { 2667 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2668 its--; 2669 } 2670 2671 while (its--) { 2672 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2673 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2674 2675 /* update rhs: bb1 = bb - B*x */ 2676 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2677 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2678 2679 /* local sweep */ 2680 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2681 } 2682 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2683 if (flag & SOR_ZERO_INITIAL_GUESS) { 2684 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2685 its--; 2686 } 2687 while (its--) { 2688 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2689 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2690 2691 /* update rhs: bb1 = bb - B*x */ 2692 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2693 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2694 2695 /* local sweep */ 2696 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2697 } 2698 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2699 if (flag & SOR_ZERO_INITIAL_GUESS) { 2700 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2701 its--; 2702 } 2703 while (its--) { 2704 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2705 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2706 2707 /* update rhs: bb1 = bb - B*x */ 2708 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2709 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2710 2711 /* local sweep */ 2712 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2713 } 2714 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2715 2716 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 2717 PetscFunctionReturn(0); 2718 } 2719 2720 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2721 2722 2723 /* -------------------------------------------------------------------*/ 2724 static struct _MatOps MatOps_Values = { 2725 MatSetValues_MPIBAIJ, 2726 MatGetRow_MPIBAIJ, 2727 MatRestoreRow_MPIBAIJ, 2728 MatMult_MPIBAIJ, 2729 /* 4*/ MatMultAdd_MPIBAIJ, 2730 MatMultTranspose_MPIBAIJ, 2731 MatMultTransposeAdd_MPIBAIJ, 2732 0, 2733 0, 2734 0, 2735 /*10*/ 0, 2736 0, 2737 0, 2738 MatSOR_MPIBAIJ, 2739 MatTranspose_MPIBAIJ, 2740 /*15*/ MatGetInfo_MPIBAIJ, 2741 MatEqual_MPIBAIJ, 2742 MatGetDiagonal_MPIBAIJ, 2743 MatDiagonalScale_MPIBAIJ, 2744 MatNorm_MPIBAIJ, 2745 /*20*/ MatAssemblyBegin_MPIBAIJ, 2746 MatAssemblyEnd_MPIBAIJ, 2747 MatSetOption_MPIBAIJ, 2748 MatZeroEntries_MPIBAIJ, 2749 /*24*/ MatZeroRows_MPIBAIJ, 2750 0, 2751 0, 2752 0, 2753 0, 2754 /*29*/ MatSetUpPreallocation_MPIBAIJ, 2755 0, 2756 0, 2757 0, 2758 0, 2759 /*34*/ MatDuplicate_MPIBAIJ, 2760 0, 2761 0, 2762 0, 2763 0, 2764 /*39*/ MatAXPY_MPIBAIJ, 2765 MatGetSubMatrices_MPIBAIJ, 2766 MatIncreaseOverlap_MPIBAIJ, 2767 MatGetValues_MPIBAIJ, 2768 MatCopy_MPIBAIJ, 2769 /*44*/ 0, 2770 MatScale_MPIBAIJ, 2771 0, 2772 0, 2773 0, 2774 /*49*/ MatSetBlockSize_MPIBAIJ, 2775 0, 2776 0, 2777 0, 2778 0, 2779 /*54*/ MatFDColoringCreate_MPIBAIJ, 2780 0, 2781 MatSetUnfactored_MPIBAIJ, 2782 MatPermute_MPIBAIJ, 2783 MatSetValuesBlocked_MPIBAIJ, 2784 /*59*/ MatGetSubMatrix_MPIBAIJ, 2785 MatDestroy_MPIBAIJ, 2786 MatView_MPIBAIJ, 2787 0, 2788 0, 2789 /*64*/ 0, 2790 0, 2791 0, 2792 0, 2793 0, 2794 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2795 0, 2796 0, 2797 0, 2798 0, 2799 /*74*/ 0, 2800 MatFDColoringApply_BAIJ, 2801 0, 2802 0, 2803 0, 2804 /*79*/ 0, 2805 0, 2806 0, 2807 0, 2808 MatLoad_MPIBAIJ, 2809 /*84*/ 0, 2810 0, 2811 0, 2812 0, 2813 0, 2814 /*89*/ 0, 2815 0, 2816 0, 2817 0, 2818 0, 2819 /*94*/ 0, 2820 0, 2821 0, 2822 0, 2823 0, 2824 /*99*/ 0, 2825 0, 2826 0, 2827 0, 2828 0, 2829 /*104*/0, 2830 MatRealPart_MPIBAIJ, 2831 MatImaginaryPart_MPIBAIJ, 2832 0, 2833 0, 2834 /*109*/0, 2835 0, 2836 0, 2837 0, 2838 0, 2839 /*114*/MatGetSeqNonzerostructure_MPIBAIJ, 2840 0, 2841 MatGetGhosts_MPIBAIJ, 2842 0, 2843 0, 2844 /*119*/0, 2845 0, 2846 0, 2847 0 2848 }; 2849 2850 EXTERN_C_BEGIN 2851 #undef __FUNCT__ 2852 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2853 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscBool *iscopy,MatReuse reuse,Mat *a) 2854 { 2855 PetscFunctionBegin; 2856 *a = ((Mat_MPIBAIJ *)A->data)->A; 2857 *iscopy = PETSC_FALSE; 2858 PetscFunctionReturn(0); 2859 } 2860 EXTERN_C_END 2861 2862 EXTERN_C_BEGIN 2863 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2864 EXTERN_C_END 2865 2866 EXTERN_C_BEGIN 2867 #undef __FUNCT__ 2868 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2869 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2870 { 2871 PetscInt m,rstart,cstart,cend; 2872 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2873 const PetscInt *JJ=0; 2874 PetscScalar *values=0; 2875 PetscErrorCode ierr; 2876 2877 PetscFunctionBegin; 2878 2879 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2880 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2881 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2882 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2883 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2884 m = B->rmap->n/bs; 2885 rstart = B->rmap->rstart/bs; 2886 cstart = B->cmap->rstart/bs; 2887 cend = B->cmap->rend/bs; 2888 2889 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2890 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2891 for (i=0; i<m; i++) { 2892 nz = ii[i+1] - ii[i]; 2893 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2894 nz_max = PetscMax(nz_max,nz); 2895 JJ = jj + ii[i]; 2896 for (j=0; j<nz; j++) { 2897 if (*JJ >= cstart) break; 2898 JJ++; 2899 } 2900 d = 0; 2901 for (; j<nz; j++) { 2902 if (*JJ++ >= cend) break; 2903 d++; 2904 } 2905 d_nnz[i] = d; 2906 o_nnz[i] = nz - d; 2907 } 2908 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2909 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2910 2911 values = (PetscScalar*)V; 2912 if (!values) { 2913 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2914 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2915 } 2916 for (i=0; i<m; i++) { 2917 PetscInt row = i + rstart; 2918 PetscInt ncols = ii[i+1] - ii[i]; 2919 const PetscInt *icols = jj + ii[i]; 2920 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2921 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2922 } 2923 2924 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2925 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2926 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2927 2928 PetscFunctionReturn(0); 2929 } 2930 EXTERN_C_END 2931 2932 #undef __FUNCT__ 2933 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2934 /*@C 2935 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2936 (the default parallel PETSc format). 2937 2938 Collective on MPI_Comm 2939 2940 Input Parameters: 2941 + A - the matrix 2942 . bs - the block size 2943 . i - the indices into j for the start of each local row (starts with zero) 2944 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2945 - v - optional values in the matrix 2946 2947 Level: developer 2948 2949 .keywords: matrix, aij, compressed row, sparse, parallel 2950 2951 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2952 @*/ 2953 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2954 { 2955 PetscErrorCode ierr; 2956 2957 PetscFunctionBegin; 2958 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2959 PetscFunctionReturn(0); 2960 } 2961 2962 EXTERN_C_BEGIN 2963 #undef __FUNCT__ 2964 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2965 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2966 { 2967 Mat_MPIBAIJ *b; 2968 PetscErrorCode ierr; 2969 PetscInt i, newbs = PetscAbs(bs); 2970 2971 PetscFunctionBegin; 2972 if (bs < 0) { 2973 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2974 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2975 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2976 bs = PetscAbs(bs); 2977 } 2978 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 2979 bs = newbs; 2980 2981 2982 if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2983 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2984 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2985 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2986 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2987 2988 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2989 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2990 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2991 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2992 2993 if (d_nnz) { 2994 for (i=0; i<B->rmap->n/bs; i++) { 2995 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2996 } 2997 } 2998 if (o_nnz) { 2999 for (i=0; i<B->rmap->n/bs; i++) { 3000 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 3001 } 3002 } 3003 3004 b = (Mat_MPIBAIJ*)B->data; 3005 b->bs2 = bs*bs; 3006 b->mbs = B->rmap->n/bs; 3007 b->nbs = B->cmap->n/bs; 3008 b->Mbs = B->rmap->N/bs; 3009 b->Nbs = B->cmap->N/bs; 3010 3011 for (i=0; i<=b->size; i++) { 3012 b->rangebs[i] = B->rmap->range[i]/bs; 3013 } 3014 b->rstartbs = B->rmap->rstart/bs; 3015 b->rendbs = B->rmap->rend/bs; 3016 b->cstartbs = B->cmap->rstart/bs; 3017 b->cendbs = B->cmap->rend/bs; 3018 3019 if (!B->preallocated) { 3020 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3021 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3022 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3023 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3024 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3025 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3026 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3027 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3028 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 3029 } 3030 3031 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3032 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3033 B->preallocated = PETSC_TRUE; 3034 PetscFunctionReturn(0); 3035 } 3036 EXTERN_C_END 3037 3038 EXTERN_C_BEGIN 3039 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3040 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3041 EXTERN_C_END 3042 3043 3044 EXTERN_C_BEGIN 3045 #undef __FUNCT__ 3046 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3047 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj) 3048 { 3049 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3050 PetscErrorCode ierr; 3051 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3052 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3053 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3054 3055 PetscFunctionBegin; 3056 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 3057 ii[0] = 0; 3058 CHKMEMQ; 3059 for (i=0; i<M; i++) { 3060 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3061 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3062 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3063 /* remove one from count of matrix has diagonal */ 3064 for (j=id[i]; j<id[i+1]; j++) { 3065 if (jd[j] == i) {ii[i+1]--;break;} 3066 } 3067 CHKMEMQ; 3068 } 3069 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3070 cnt = 0; 3071 for (i=0; i<M; i++) { 3072 for (j=io[i]; j<io[i+1]; j++) { 3073 if (garray[jo[j]] > rstart) break; 3074 jj[cnt++] = garray[jo[j]]; 3075 CHKMEMQ; 3076 } 3077 for (k=id[i]; k<id[i+1]; k++) { 3078 if (jd[k] != i) { 3079 jj[cnt++] = rstart + jd[k]; 3080 CHKMEMQ; 3081 } 3082 } 3083 for (;j<io[i+1]; j++) { 3084 jj[cnt++] = garray[jo[j]]; 3085 CHKMEMQ; 3086 } 3087 } 3088 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 3089 PetscFunctionReturn(0); 3090 } 3091 EXTERN_C_END 3092 3093 EXTERN_C_BEGIN 3094 #if defined(PETSC_HAVE_MUMPS) 3095 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3096 #endif 3097 EXTERN_C_END 3098 3099 /*MC 3100 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3101 3102 Options Database Keys: 3103 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3104 . -mat_block_size <bs> - set the blocksize used to store the matrix 3105 - -mat_use_hash_table <fact> 3106 3107 Level: beginner 3108 3109 .seealso: MatCreateMPIBAIJ 3110 M*/ 3111 3112 EXTERN_C_BEGIN 3113 #undef __FUNCT__ 3114 #define __FUNCT__ "MatCreate_MPIBAIJ" 3115 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 3116 { 3117 Mat_MPIBAIJ *b; 3118 PetscErrorCode ierr; 3119 PetscBool flg; 3120 3121 PetscFunctionBegin; 3122 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 3123 B->data = (void*)b; 3124 3125 3126 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3127 B->mapping = 0; 3128 B->assembled = PETSC_FALSE; 3129 3130 B->insertmode = NOT_SET_VALUES; 3131 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 3132 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 3133 3134 /* build local table of row and column ownerships */ 3135 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 3136 3137 /* build cache for off array entries formed */ 3138 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 3139 b->donotstash = PETSC_FALSE; 3140 b->colmap = PETSC_NULL; 3141 b->garray = PETSC_NULL; 3142 b->roworiented = PETSC_TRUE; 3143 3144 /* stuff used in block assembly */ 3145 b->barray = 0; 3146 3147 /* stuff used for matrix vector multiply */ 3148 b->lvec = 0; 3149 b->Mvctx = 0; 3150 3151 /* stuff for MatGetRow() */ 3152 b->rowindices = 0; 3153 b->rowvalues = 0; 3154 b->getrowactive = PETSC_FALSE; 3155 3156 /* hash table stuff */ 3157 b->ht = 0; 3158 b->hd = 0; 3159 b->ht_size = 0; 3160 b->ht_flag = PETSC_FALSE; 3161 b->ht_fact = 0; 3162 b->ht_total_ct = 0; 3163 b->ht_insert_ct = 0; 3164 3165 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3166 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3167 if (flg) { 3168 PetscReal fact = 1.39; 3169 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3170 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 3171 if (fact <= 1.0) fact = 1.39; 3172 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3173 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3174 } 3175 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3176 3177 #if defined(PETSC_HAVE_MUMPS) 3178 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3179 #endif 3180 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3181 "MatConvert_MPIBAIJ_MPIAdj", 3182 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3183 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3184 "MatStoreValues_MPIBAIJ", 3185 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3186 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3187 "MatRetrieveValues_MPIBAIJ", 3188 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3189 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3190 "MatGetDiagonalBlock_MPIBAIJ", 3191 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3192 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3193 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3194 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3195 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3196 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3197 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3198 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3199 "MatDiagonalScaleLocal_MPIBAIJ", 3200 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3201 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3202 "MatSetHashTableFactor_MPIBAIJ", 3203 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3204 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3205 PetscFunctionReturn(0); 3206 } 3207 EXTERN_C_END 3208 3209 /*MC 3210 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3211 3212 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3213 and MATMPIBAIJ otherwise. 3214 3215 Options Database Keys: 3216 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3217 3218 Level: beginner 3219 3220 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3221 M*/ 3222 3223 EXTERN_C_BEGIN 3224 #undef __FUNCT__ 3225 #define __FUNCT__ "MatCreate_BAIJ" 3226 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 3227 { 3228 PetscErrorCode ierr; 3229 PetscMPIInt size; 3230 3231 PetscFunctionBegin; 3232 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3233 if (size == 1) { 3234 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 3235 } else { 3236 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 3237 } 3238 PetscFunctionReturn(0); 3239 } 3240 EXTERN_C_END 3241 3242 #undef __FUNCT__ 3243 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3244 /*@C 3245 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3246 (block compressed row). For good matrix assembly performance 3247 the user should preallocate the matrix storage by setting the parameters 3248 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3249 performance can be increased by more than a factor of 50. 3250 3251 Collective on Mat 3252 3253 Input Parameters: 3254 + A - the matrix 3255 . bs - size of blockk 3256 . d_nz - number of block nonzeros per block row in diagonal portion of local 3257 submatrix (same for all local rows) 3258 . d_nnz - array containing the number of block nonzeros in the various block rows 3259 of the in diagonal portion of the local (possibly different for each block 3260 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3261 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3262 submatrix (same for all local rows). 3263 - o_nnz - array containing the number of nonzeros in the various block rows of the 3264 off-diagonal portion of the local submatrix (possibly different for 3265 each block row) or PETSC_NULL. 3266 3267 If the *_nnz parameter is given then the *_nz parameter is ignored 3268 3269 Options Database Keys: 3270 + -mat_block_size - size of the blocks to use 3271 - -mat_use_hash_table <fact> 3272 3273 Notes: 3274 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3275 than it must be used on all processors that share the object for that argument. 3276 3277 Storage Information: 3278 For a square global matrix we define each processor's diagonal portion 3279 to be its local rows and the corresponding columns (a square submatrix); 3280 each processor's off-diagonal portion encompasses the remainder of the 3281 local matrix (a rectangular submatrix). 3282 3283 The user can specify preallocated storage for the diagonal part of 3284 the local submatrix with either d_nz or d_nnz (not both). Set 3285 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3286 memory allocation. Likewise, specify preallocated storage for the 3287 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3288 3289 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3290 the figure below we depict these three local rows and all columns (0-11). 3291 3292 .vb 3293 0 1 2 3 4 5 6 7 8 9 10 11 3294 ------------------- 3295 row 3 | o o o d d d o o o o o o 3296 row 4 | o o o d d d o o o o o o 3297 row 5 | o o o d d d o o o o o o 3298 ------------------- 3299 .ve 3300 3301 Thus, any entries in the d locations are stored in the d (diagonal) 3302 submatrix, and any entries in the o locations are stored in the 3303 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3304 stored simply in the MATSEQBAIJ format for compressed row storage. 3305 3306 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3307 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3308 In general, for PDE problems in which most nonzeros are near the diagonal, 3309 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3310 or you will get TERRIBLE performance; see the users' manual chapter on 3311 matrices. 3312 3313 You can call MatGetInfo() to get information on how effective the preallocation was; 3314 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3315 You can also run with the option -info and look for messages with the string 3316 malloc in them to see if additional memory allocation was needed. 3317 3318 Level: intermediate 3319 3320 .keywords: matrix, block, aij, compressed row, sparse, parallel 3321 3322 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 3323 @*/ 3324 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3325 { 3326 PetscErrorCode ierr; 3327 3328 PetscFunctionBegin; 3329 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3330 PetscFunctionReturn(0); 3331 } 3332 3333 #undef __FUNCT__ 3334 #define __FUNCT__ "MatCreateMPIBAIJ" 3335 /*@C 3336 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 3337 (block compressed row). For good matrix assembly performance 3338 the user should preallocate the matrix storage by setting the parameters 3339 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3340 performance can be increased by more than a factor of 50. 3341 3342 Collective on MPI_Comm 3343 3344 Input Parameters: 3345 + comm - MPI communicator 3346 . bs - size of blockk 3347 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3348 This value should be the same as the local size used in creating the 3349 y vector for the matrix-vector product y = Ax. 3350 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3351 This value should be the same as the local size used in creating the 3352 x vector for the matrix-vector product y = Ax. 3353 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3354 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3355 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3356 submatrix (same for all local rows) 3357 . d_nnz - array containing the number of nonzero blocks in the various block rows 3358 of the in diagonal portion of the local (possibly different for each block 3359 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3360 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3361 submatrix (same for all local rows). 3362 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3363 off-diagonal portion of the local submatrix (possibly different for 3364 each block row) or PETSC_NULL. 3365 3366 Output Parameter: 3367 . A - the matrix 3368 3369 Options Database Keys: 3370 + -mat_block_size - size of the blocks to use 3371 - -mat_use_hash_table <fact> 3372 3373 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3374 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3375 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3376 3377 Notes: 3378 If the *_nnz parameter is given then the *_nz parameter is ignored 3379 3380 A nonzero block is any block that as 1 or more nonzeros in it 3381 3382 The user MUST specify either the local or global matrix dimensions 3383 (possibly both). 3384 3385 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3386 than it must be used on all processors that share the object for that argument. 3387 3388 Storage Information: 3389 For a square global matrix we define each processor's diagonal portion 3390 to be its local rows and the corresponding columns (a square submatrix); 3391 each processor's off-diagonal portion encompasses the remainder of the 3392 local matrix (a rectangular submatrix). 3393 3394 The user can specify preallocated storage for the diagonal part of 3395 the local submatrix with either d_nz or d_nnz (not both). Set 3396 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3397 memory allocation. Likewise, specify preallocated storage for the 3398 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3399 3400 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3401 the figure below we depict these three local rows and all columns (0-11). 3402 3403 .vb 3404 0 1 2 3 4 5 6 7 8 9 10 11 3405 ------------------- 3406 row 3 | o o o d d d o o o o o o 3407 row 4 | o o o d d d o o o o o o 3408 row 5 | o o o d d d o o o o o o 3409 ------------------- 3410 .ve 3411 3412 Thus, any entries in the d locations are stored in the d (diagonal) 3413 submatrix, and any entries in the o locations are stored in the 3414 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3415 stored simply in the MATSEQBAIJ format for compressed row storage. 3416 3417 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3418 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3419 In general, for PDE problems in which most nonzeros are near the diagonal, 3420 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3421 or you will get TERRIBLE performance; see the users' manual chapter on 3422 matrices. 3423 3424 Level: intermediate 3425 3426 .keywords: matrix, block, aij, compressed row, sparse, parallel 3427 3428 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3429 @*/ 3430 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3431 { 3432 PetscErrorCode ierr; 3433 PetscMPIInt size; 3434 3435 PetscFunctionBegin; 3436 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3437 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3438 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3439 if (size > 1) { 3440 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3441 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3442 } else { 3443 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3444 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3445 } 3446 PetscFunctionReturn(0); 3447 } 3448 3449 #undef __FUNCT__ 3450 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3451 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3452 { 3453 Mat mat; 3454 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3455 PetscErrorCode ierr; 3456 PetscInt len=0; 3457 3458 PetscFunctionBegin; 3459 *newmat = 0; 3460 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3461 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3462 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3463 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3464 3465 mat->factortype = matin->factortype; 3466 mat->preallocated = PETSC_TRUE; 3467 mat->assembled = PETSC_TRUE; 3468 mat->insertmode = NOT_SET_VALUES; 3469 3470 a = (Mat_MPIBAIJ*)mat->data; 3471 mat->rmap->bs = matin->rmap->bs; 3472 a->bs2 = oldmat->bs2; 3473 a->mbs = oldmat->mbs; 3474 a->nbs = oldmat->nbs; 3475 a->Mbs = oldmat->Mbs; 3476 a->Nbs = oldmat->Nbs; 3477 3478 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3479 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3480 3481 a->size = oldmat->size; 3482 a->rank = oldmat->rank; 3483 a->donotstash = oldmat->donotstash; 3484 a->roworiented = oldmat->roworiented; 3485 a->rowindices = 0; 3486 a->rowvalues = 0; 3487 a->getrowactive = PETSC_FALSE; 3488 a->barray = 0; 3489 a->rstartbs = oldmat->rstartbs; 3490 a->rendbs = oldmat->rendbs; 3491 a->cstartbs = oldmat->cstartbs; 3492 a->cendbs = oldmat->cendbs; 3493 3494 /* hash table stuff */ 3495 a->ht = 0; 3496 a->hd = 0; 3497 a->ht_size = 0; 3498 a->ht_flag = oldmat->ht_flag; 3499 a->ht_fact = oldmat->ht_fact; 3500 a->ht_total_ct = 0; 3501 a->ht_insert_ct = 0; 3502 3503 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3504 if (oldmat->colmap) { 3505 #if defined (PETSC_USE_CTABLE) 3506 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3507 #else 3508 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3509 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3510 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3511 #endif 3512 } else a->colmap = 0; 3513 3514 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3515 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3516 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3517 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3518 } else a->garray = 0; 3519 3520 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3521 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3522 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3523 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3524 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3525 3526 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3527 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3528 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3529 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3530 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3531 *newmat = mat; 3532 3533 PetscFunctionReturn(0); 3534 } 3535 3536 #undef __FUNCT__ 3537 #define __FUNCT__ "MatLoad_MPIBAIJ" 3538 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3539 { 3540 PetscErrorCode ierr; 3541 int fd; 3542 PetscInt i,nz,j,rstart,rend; 3543 PetscScalar *vals,*buf; 3544 MPI_Comm comm = ((PetscObject)viewer)->comm; 3545 MPI_Status status; 3546 PetscMPIInt rank,size,maxnz; 3547 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3548 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3549 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3550 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3551 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3552 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3553 3554 PetscFunctionBegin; 3555 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3556 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3557 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3558 3559 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3560 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3561 if (!rank) { 3562 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3563 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3564 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3565 } 3566 3567 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3568 3569 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3570 M = header[1]; N = header[2]; 3571 3572 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3573 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3574 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3575 3576 /* If global sizes are set, check if they are consistent with that given in the file */ 3577 if (sizesset) { 3578 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3579 } 3580 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3581 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3582 3583 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3584 3585 /* 3586 This code adds extra rows to make sure the number of rows is 3587 divisible by the blocksize 3588 */ 3589 Mbs = M/bs; 3590 extra_rows = bs - M + bs*Mbs; 3591 if (extra_rows == bs) extra_rows = 0; 3592 else Mbs++; 3593 if (extra_rows && !rank) { 3594 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3595 } 3596 3597 /* determine ownership of all rows */ 3598 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3599 mbs = Mbs/size + ((Mbs % size) > rank); 3600 m = mbs*bs; 3601 } else { /* User set */ 3602 m = newmat->rmap->n; 3603 mbs = m/bs; 3604 } 3605 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3606 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3607 3608 /* process 0 needs enough room for process with most rows */ 3609 if (!rank) { 3610 mmax = rowners[1]; 3611 for (i=2; i<size; i++) { 3612 mmax = PetscMax(mmax,rowners[i]); 3613 } 3614 mmax*=bs; 3615 } else mmax = m; 3616 3617 rowners[0] = 0; 3618 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3619 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3620 rstart = rowners[rank]; 3621 rend = rowners[rank+1]; 3622 3623 /* distribute row lengths to all processors */ 3624 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3625 if (!rank) { 3626 mend = m; 3627 if (size == 1) mend = mend - extra_rows; 3628 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3629 for (j=mend; j<m; j++) locrowlens[j] = 1; 3630 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3631 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3632 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3633 for (j=0; j<m; j++) { 3634 procsnz[0] += locrowlens[j]; 3635 } 3636 for (i=1; i<size; i++) { 3637 mend = browners[i+1] - browners[i]; 3638 if (i == size-1) mend = mend - extra_rows; 3639 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3640 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3641 /* calculate the number of nonzeros on each processor */ 3642 for (j=0; j<browners[i+1]-browners[i]; j++) { 3643 procsnz[i] += rowlengths[j]; 3644 } 3645 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3646 } 3647 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3648 } else { 3649 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3650 } 3651 3652 if (!rank) { 3653 /* determine max buffer needed and allocate it */ 3654 maxnz = procsnz[0]; 3655 for (i=1; i<size; i++) { 3656 maxnz = PetscMax(maxnz,procsnz[i]); 3657 } 3658 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3659 3660 /* read in my part of the matrix column indices */ 3661 nz = procsnz[0]; 3662 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3663 mycols = ibuf; 3664 if (size == 1) nz -= extra_rows; 3665 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3666 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3667 3668 /* read in every ones (except the last) and ship off */ 3669 for (i=1; i<size-1; i++) { 3670 nz = procsnz[i]; 3671 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3672 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3673 } 3674 /* read in the stuff for the last proc */ 3675 if (size != 1) { 3676 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3677 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3678 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3679 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3680 } 3681 ierr = PetscFree(cols);CHKERRQ(ierr); 3682 } else { 3683 /* determine buffer space needed for message */ 3684 nz = 0; 3685 for (i=0; i<m; i++) { 3686 nz += locrowlens[i]; 3687 } 3688 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3689 mycols = ibuf; 3690 /* receive message of column indices*/ 3691 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3692 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3693 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3694 } 3695 3696 /* loop over local rows, determining number of off diagonal entries */ 3697 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3698 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3699 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3700 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3701 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3702 rowcount = 0; nzcount = 0; 3703 for (i=0; i<mbs; i++) { 3704 dcount = 0; 3705 odcount = 0; 3706 for (j=0; j<bs; j++) { 3707 kmax = locrowlens[rowcount]; 3708 for (k=0; k<kmax; k++) { 3709 tmp = mycols[nzcount++]/bs; 3710 if (!mask[tmp]) { 3711 mask[tmp] = 1; 3712 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3713 else masked1[dcount++] = tmp; 3714 } 3715 } 3716 rowcount++; 3717 } 3718 3719 dlens[i] = dcount; 3720 odlens[i] = odcount; 3721 3722 /* zero out the mask elements we set */ 3723 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3724 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3725 } 3726 3727 3728 if (!sizesset) { 3729 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3730 } 3731 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3732 3733 if (!rank) { 3734 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3735 /* read in my part of the matrix numerical values */ 3736 nz = procsnz[0]; 3737 vals = buf; 3738 mycols = ibuf; 3739 if (size == 1) nz -= extra_rows; 3740 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3741 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3742 3743 /* insert into matrix */ 3744 jj = rstart*bs; 3745 for (i=0; i<m; i++) { 3746 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3747 mycols += locrowlens[i]; 3748 vals += locrowlens[i]; 3749 jj++; 3750 } 3751 /* read in other processors (except the last one) and ship out */ 3752 for (i=1; i<size-1; i++) { 3753 nz = procsnz[i]; 3754 vals = buf; 3755 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3756 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3757 } 3758 /* the last proc */ 3759 if (size != 1){ 3760 nz = procsnz[i] - extra_rows; 3761 vals = buf; 3762 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3763 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3764 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3765 } 3766 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3767 } else { 3768 /* receive numeric values */ 3769 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3770 3771 /* receive message of values*/ 3772 vals = buf; 3773 mycols = ibuf; 3774 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 3775 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3776 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3777 3778 /* insert into matrix */ 3779 jj = rstart*bs; 3780 for (i=0; i<m; i++) { 3781 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3782 mycols += locrowlens[i]; 3783 vals += locrowlens[i]; 3784 jj++; 3785 } 3786 } 3787 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3788 ierr = PetscFree(buf);CHKERRQ(ierr); 3789 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3790 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3791 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3792 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3793 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3794 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3795 3796 PetscFunctionReturn(0); 3797 } 3798 3799 #undef __FUNCT__ 3800 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3801 /*@ 3802 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3803 3804 Input Parameters: 3805 . mat - the matrix 3806 . fact - factor 3807 3808 Not Collective, each process can use a different factor 3809 3810 Level: advanced 3811 3812 Notes: 3813 This can also be set by the command line option: -mat_use_hash_table <fact> 3814 3815 .keywords: matrix, hashtable, factor, HT 3816 3817 .seealso: MatSetOption() 3818 @*/ 3819 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3820 { 3821 PetscErrorCode ierr; 3822 3823 PetscFunctionBegin; 3824 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3825 PetscFunctionReturn(0); 3826 } 3827 3828 EXTERN_C_BEGIN 3829 #undef __FUNCT__ 3830 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3831 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3832 { 3833 Mat_MPIBAIJ *baij; 3834 3835 PetscFunctionBegin; 3836 baij = (Mat_MPIBAIJ*)mat->data; 3837 baij->ht_fact = fact; 3838 PetscFunctionReturn(0); 3839 } 3840 EXTERN_C_END 3841 3842 #undef __FUNCT__ 3843 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3844 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3845 { 3846 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3847 PetscFunctionBegin; 3848 *Ad = a->A; 3849 *Ao = a->B; 3850 *colmap = a->garray; 3851 PetscFunctionReturn(0); 3852 } 3853 3854 /* 3855 Special version for direct calls from Fortran (to eliminate two function call overheads 3856 */ 3857 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3858 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3859 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3860 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3861 #endif 3862 3863 #undef __FUNCT__ 3864 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3865 /*@C 3866 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3867 3868 Collective on Mat 3869 3870 Input Parameters: 3871 + mat - the matrix 3872 . min - number of input rows 3873 . im - input rows 3874 . nin - number of input columns 3875 . in - input columns 3876 . v - numerical values input 3877 - addvin - INSERT_VALUES or ADD_VALUES 3878 3879 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3880 3881 Level: advanced 3882 3883 .seealso: MatSetValuesBlocked() 3884 @*/ 3885 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3886 { 3887 /* convert input arguments to C version */ 3888 Mat mat = *matin; 3889 PetscInt m = *min, n = *nin; 3890 InsertMode addv = *addvin; 3891 3892 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3893 const MatScalar *value; 3894 MatScalar *barray=baij->barray; 3895 PetscBool roworiented = baij->roworiented; 3896 PetscErrorCode ierr; 3897 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3898 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3899 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3900 3901 PetscFunctionBegin; 3902 /* tasks normally handled by MatSetValuesBlocked() */ 3903 if (mat->insertmode == NOT_SET_VALUES) { 3904 mat->insertmode = addv; 3905 } 3906 #if defined(PETSC_USE_DEBUG) 3907 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3908 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3909 #endif 3910 if (mat->assembled) { 3911 mat->was_assembled = PETSC_TRUE; 3912 mat->assembled = PETSC_FALSE; 3913 } 3914 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3915 3916 3917 if(!barray) { 3918 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3919 baij->barray = barray; 3920 } 3921 3922 if (roworiented) { 3923 stepval = (n-1)*bs; 3924 } else { 3925 stepval = (m-1)*bs; 3926 } 3927 for (i=0; i<m; i++) { 3928 if (im[i] < 0) continue; 3929 #if defined(PETSC_USE_DEBUG) 3930 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3931 #endif 3932 if (im[i] >= rstart && im[i] < rend) { 3933 row = im[i] - rstart; 3934 for (j=0; j<n; j++) { 3935 /* If NumCol = 1 then a copy is not required */ 3936 if ((roworiented) && (n == 1)) { 3937 barray = (MatScalar*)v + i*bs2; 3938 } else if((!roworiented) && (m == 1)) { 3939 barray = (MatScalar*)v + j*bs2; 3940 } else { /* Here a copy is required */ 3941 if (roworiented) { 3942 value = v + i*(stepval+bs)*bs + j*bs; 3943 } else { 3944 value = v + j*(stepval+bs)*bs + i*bs; 3945 } 3946 for (ii=0; ii<bs; ii++,value+=stepval) { 3947 for (jj=0; jj<bs; jj++) { 3948 *barray++ = *value++; 3949 } 3950 } 3951 barray -=bs2; 3952 } 3953 3954 if (in[j] >= cstart && in[j] < cend){ 3955 col = in[j] - cstart; 3956 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3957 } 3958 else if (in[j] < 0) continue; 3959 #if defined(PETSC_USE_DEBUG) 3960 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3961 #endif 3962 else { 3963 if (mat->was_assembled) { 3964 if (!baij->colmap) { 3965 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3966 } 3967 3968 #if defined(PETSC_USE_DEBUG) 3969 #if defined (PETSC_USE_CTABLE) 3970 { PetscInt data; 3971 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3972 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3973 } 3974 #else 3975 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3976 #endif 3977 #endif 3978 #if defined (PETSC_USE_CTABLE) 3979 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3980 col = (col - 1)/bs; 3981 #else 3982 col = (baij->colmap[in[j]] - 1)/bs; 3983 #endif 3984 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3985 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3986 col = in[j]; 3987 } 3988 } 3989 else col = in[j]; 3990 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3991 } 3992 } 3993 } else { 3994 if (!baij->donotstash) { 3995 if (roworiented) { 3996 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3997 } else { 3998 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3999 } 4000 } 4001 } 4002 } 4003 4004 /* task normally handled by MatSetValuesBlocked() */ 4005 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4006 PetscFunctionReturn(0); 4007 } 4008 4009 #undef __FUNCT__ 4010 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 4011 /*@ 4012 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 4013 CSR format the local rows. 4014 4015 Collective on MPI_Comm 4016 4017 Input Parameters: 4018 + comm - MPI communicator 4019 . bs - the block size, only a block size of 1 is supported 4020 . m - number of local rows (Cannot be PETSC_DECIDE) 4021 . n - This value should be the same as the local size used in creating the 4022 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4023 calculated if N is given) For square matrices n is almost always m. 4024 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4025 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4026 . i - row indices 4027 . j - column indices 4028 - a - matrix values 4029 4030 Output Parameter: 4031 . mat - the matrix 4032 4033 Level: intermediate 4034 4035 Notes: 4036 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4037 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4038 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4039 4040 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4041 4042 .keywords: matrix, aij, compressed row, sparse, parallel 4043 4044 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4045 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 4046 @*/ 4047 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4048 { 4049 PetscErrorCode ierr; 4050 4051 4052 PetscFunctionBegin; 4053 if (i[0]) { 4054 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4055 } 4056 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4057 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4058 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4059 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4060 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4061 PetscFunctionReturn(0); 4062 } 4063